[llvm] [ISel] Introduce llvm.clmul intrinsic (PR #168731)

Ramkumar Ramachandra via llvm-commits llvm-commits at lists.llvm.org
Thu Nov 20 05:57:33 PST 2025


https://github.com/artagnon updated https://github.com/llvm/llvm-project/pull/168731

>From 9e143aade13bbf4b4eaa1c9a09d2563376222739 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Wed, 19 Nov 2025 15:43:42 +0000
Subject: [PATCH 1/5] [ISel] Introduce llvm.clmul[rh] intrinsics

In line with a std proposal to introduce the llvm.clmul[rh] family of
intrinsics corresponding to carry-less multiply operations. This work
builds upon 727ee7e ([APInt] Introduce carry-less multiply primitives),
and follow-up patches will introduce custom-lowering on supported
targets, replacing target-specific clmul intrinsics.

Testing is done on the RISC-V target, which should be sufficient to
prove that the intrinsic works, since no RISC-V specific lowering has
been added.

Ref: https://isocpp.org/files/papers/P3642R3.html

Co-authored-by: Oscar Smith <oscardssmith at gmail.com>
---
 llvm/docs/LangRef.rst                         |   101 +-
 llvm/include/llvm/CodeGen/ISDOpcodes.h        |     5 +
 llvm/include/llvm/CodeGen/TargetLowering.h    |     5 +
 llvm/include/llvm/IR/Intrinsics.td            |     4 +
 .../include/llvm/Target/TargetSelectionDAG.td |     4 +
 llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp |     6 +
 .../SelectionDAG/LegalizeIntegerTypes.cpp     |    14 +
 llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h |     1 +
 .../SelectionDAG/LegalizeVectorTypes.cpp      |    12 +
 .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp |     6 +
 .../SelectionDAG/SelectionDAGBuilder.cpp      |    16 +
 .../SelectionDAG/SelectionDAGDumper.cpp       |     3 +
 .../CodeGen/SelectionDAG/TargetLowering.cpp   |    43 +
 llvm/lib/CodeGen/TargetLoweringBase.cpp       |     3 +
 llvm/test/CodeGen/RISCV/clmul.ll              |  7582 +++++
 llvm/test/CodeGen/RISCV/rvv/clmul-sdnode.ll   | 24188 ++++++++++++++++
 .../CodeGen/RISCV/rvv/fixed-vectors-clmul.ll  | 19366 +++++++++++++
 17 files changed, 51357 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/CodeGen/RISCV/clmul.ll
 create mode 100644 llvm/test/CodeGen/RISCV/rvv/clmul-sdnode.ll
 create mode 100644 llvm/test/CodeGen/RISCV/rvv/fixed-vectors-clmul.ll

diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 734778f73af5f..a33e2bdceafb8 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -18291,8 +18291,6 @@ then the result is the size in bits of the type of ``src`` if
 ``is_zero_poison == 0`` and ``poison`` otherwise. For example,
 ``llvm.cttz(2) = 1``.
 
-.. _int_overflow:
-
 .. _int_fshl:
 
 '``llvm.fshl.*``' Intrinsic
@@ -18389,6 +18387,105 @@ Example:
       %r = call i8 @llvm.fshr.i8(i8 15, i8 15, i8 11)  ; %r = i8: 225 (0b11100001)
       %r = call i8 @llvm.fshr.i8(i8 0, i8 255, i8 8)   ; %r = i8: 255 (0b11111111)
 
+.. _int_clmul:
+
+'``llvm.clmul.*``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+This is an overloaded intrinsic. You can use ``llvm.clmul`` on any integer
+or vectors of integer elements.
+
+::
+
+      declare i16 @llvm.clmul.i16(i16 %a, i16 %b)
+      declare i32 @llvm.clmul.i32(i32 %a, i32 %b)
+      declare i64 @llvm.clmul.i64(i64 %a, i64 %b)
+      declare <4 x i32> @llvm.clmul.v4i32(<4 x i32> %a, <4 x i32> %b)
+
+Overview:
+"""""""""
+
+The '``llvm.clmul``' family of intrinsic functions performs carry-less
+multiplication, or XOR multiplication, on the two arguments, and returns
+the low-bits.
+
+Arguments:
+""""""""""
+
+The arguments may be any integer type or vector of integer type. Both arguments
+and result must have the same type.
+
+Semantics:
+""""""""""
+
+The '``llvm.clmul``' intrinsic computes carry-less multiply of its arguments,
+which is the result of applying the standard Eucledian multiplication algorithm,
+where all of the additions are replaced with XORs, and returns the low-bits.
+The vector variants operate lane-wise.
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+      %r = call i4 @llvm.clmul.i4(i4 1, i4 2)    ; %r = 2
+      %r = call i4 @llvm.clmul.i4(i4 5, i4 6)    ; %r = 14
+      %r = call i4 @llvm.clmul.i4(i4 -4, i4 2)   ; %r = -8
+      %r = call i4 @llvm.clmul.i4(i4 -4, i4 -5)  ; %r = 4
+
+'``llvm.clmulr.*``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+This is an overloaded intrinsic. You can use ``llvm.clmulr`` on any integer
+or vectors of integer elements.
+
+::
+
+      declare i16 @llvm.clmulr.i16(i16 %a, i16 %b)
+      declare i32 @llvm.clmulr.i32(i32 %a, i32 %b)
+      declare i64 @llvm.clmulr.i64(i64 %a, i64 %b)
+      declare <4 x i32> @llvm.clmulr.v4i32(<4 x i32> %a, <4 x i32> %b)
+
+Overview:
+"""""""""
+
+The '``llvm.clmulr``' family of intrinsic functions performs reversed
+carry-less multiplication on the two arguments.
+
+Arguments:
+""""""""""
+
+The arguments may be any integer type or vector of integer type. Both arguments
+and result must have the same type.
+
+Semantics:
+""""""""""
+
+The '``llvm.clmulr``' intrinsic computes reversed carry-less multiply of its
+arguments. The vector variants operate lane-wise.
+
+.. code-block:: text
+
+      clmulr(%a, %b) = bitreverse(clmul(bitreverse(%a), bitreverse(%b)))
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+      %r = call i4 @llvm.clmulr.i4(i4 1, i4 2)    ; %r = 0
+      %r = call i4 @llvm.clmulr.i4(i4 5, i4 6)    ; %r = 3
+      %r = call i4 @llvm.clmulr.i4(i4 -4, i4 2)   ; %r = 3
+      %r = call i4 @llvm.clmulr.i4(i4 -4, i4 -5)  ; %r = -2
+
+.. _int_overflow:
+
 Arithmetic with Overflow Intrinsics
 -----------------------------------
 
diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h
index cdaa916548c25..08d87f7e7b266 100644
--- a/llvm/include/llvm/CodeGen/ISDOpcodes.h
+++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h
@@ -767,6 +767,11 @@ enum NodeType {
   FSHL,
   FSHR,
 
+  /// Carry-less multiplication operations.
+  CLMUL,
+  CLMULR,
+  CLMULH,
+
   /// Byte Swap and Counting operators.
   BSWAP,
   CTTZ,
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 7df5d8a09f0f6..4c904cffcafaa 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -5457,6 +5457,11 @@ class LLVM_ABI TargetLowering : public TargetLoweringBase {
   /// \returns The expansion if successful, SDValue() otherwise
   SDValue expandFunnelShift(SDNode *N, SelectionDAG &DAG) const;
 
+  /// Expand carryless multiply.
+  /// \param N Node to expand
+  /// \returns The expansion if successful, SDValue() otherwise
+  SDValue expandCLMUL(SDNode *N, SelectionDAG &DAG) const;
+
   /// Expand rotations.
   /// \param N Node to expand
   /// \param AllowVectorOps expand vector rotate, this should only be performed
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index 8f3cc54747074..fb8857cec2075 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -1465,6 +1465,10 @@ let IntrProperties = [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison] in
       [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>]>;
   def int_fshr : DefaultAttrsIntrinsic<[llvm_anyint_ty],
       [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>]>;
+  def int_clmul : DefaultAttrsIntrinsic<[llvm_anyint_ty],
+      [LLVMMatchType<0>, LLVMMatchType<0>]>;
+  def int_clmulr : DefaultAttrsIntrinsic<[llvm_anyint_ty],
+      [LLVMMatchType<0>, LLVMMatchType<0>]>;
 }
 
 let IntrProperties = [IntrNoMem, IntrSpeculatable,
diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td
index a9750a5ab03f9..6c5024845dc6d 100644
--- a/llvm/include/llvm/Target/TargetSelectionDAG.td
+++ b/llvm/include/llvm/Target/TargetSelectionDAG.td
@@ -441,6 +441,10 @@ def sra_parts  : SDNode<"ISD::SRA_PARTS" , SDTIntShiftPairOp>;
 def srl_parts  : SDNode<"ISD::SRL_PARTS" , SDTIntShiftPairOp>;
 def fshl       : SDNode<"ISD::FSHL"      , SDTIntShiftDOp>;
 def fshr       : SDNode<"ISD::FSHR"      , SDTIntShiftDOp>;
+def clmul      : SDNode<"ISD::CLMUL"     , SDTIntBinOp,
+                        [SDNPCommutative, SDNPAssociative]>;
+def clmulr     : SDNode<"ISD::CLMULR"    , SDTIntBinOp, [SDNPCommutative]>;
+def clmulh     : SDNode<"ISD::CLMULH"    , SDTIntBinOp, [SDNPCommutative]>;
 def and        : SDNode<"ISD::AND"       , SDTIntBinOp,
                         [SDNPCommutative, SDNPAssociative]>;
 def or         : SDNode<"ISD::OR"        , SDTIntBinOp,
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 99d14a60c6ed1..4e9cbbb85c129 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -4095,6 +4095,12 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     if (SDValue Expanded = TLI.expandFunnelShift(Node, DAG))
       Results.push_back(Expanded);
     break;
+  case ISD::CLMUL:
+  case ISD::CLMULR:
+  case ISD::CLMULH:
+    if (SDValue Expanded = TLI.expandCLMUL(Node, DAG))
+      Results.push_back(Expanded);
+    break;
   case ISD::ROTL:
   case ISD::ROTR:
     if (SDValue Expanded = TLI.expandROT(Node, true /*AllowVectorOps*/, DAG))
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 44e5a187c4281..ec3327c85f248 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -204,6 +204,9 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
   case ISD::ADD:
   case ISD::SUB:
   case ISD::MUL:
+  case ISD::CLMUL:
+  case ISD::CLMULR:
+  case ISD::CLMULH:
   case ISD::VP_AND:
   case ISD::VP_OR:
   case ISD::VP_XOR:
@@ -3162,6 +3165,12 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
     ExpandIntRes_FunnelShift(N, Lo, Hi);
     break;
 
+  case ISD::CLMUL:
+  case ISD::CLMULR:
+  case ISD::CLMULH:
+    ExpandIntRes_CLMUL(N, Lo, Hi);
+    break;
+
   case ISD::VSCALE:
     ExpandIntRes_VSCALE(N, Lo, Hi);
     break;
@@ -5492,6 +5501,11 @@ void DAGTypeLegalizer::ExpandIntRes_FunnelShift(SDNode *N, SDValue &Lo,
   Hi = DAG.getNode(Opc, DL, HalfVT, Select3, Select2, NewShAmt);
 }
 
+void DAGTypeLegalizer::ExpandIntRes_CLMUL(SDNode *N, SDValue &Lo, SDValue &Hi) {
+  SDValue Res = TLI.expandCLMUL(N, DAG);
+  SplitInteger(Res, Lo, Hi);
+}
+
 void DAGTypeLegalizer::ExpandIntRes_VSCALE(SDNode *N, SDValue &Lo,
                                            SDValue &Hi) {
   EVT VT = N->getValueType(0);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index ede522eff6df3..ee9f519e249ed 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -513,6 +513,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
 
   void ExpandIntRes_Rotate            (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandIntRes_FunnelShift       (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandIntRes_CLMUL(SDNode *N, SDValue &Lo, SDValue &Hi);
 
   void ExpandIntRes_VSCALE            (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandIntRes_READ_REGISTER(SDNode *N, SDValue &Lo, SDValue &Hi);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 10d5f7a9b4f65..7f077940cb2a5 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -173,6 +173,9 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::SMAX:
   case ISD::UMIN:
   case ISD::UMAX:
+  case ISD::CLMUL:
+  case ISD::CLMULR:
+  case ISD::CLMULH:
 
   case ISD::SADDSAT:
   case ISD::UADDSAT:
@@ -1372,6 +1375,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::ADD: case ISD::VP_ADD:
   case ISD::SUB: case ISD::VP_SUB:
   case ISD::MUL: case ISD::VP_MUL:
+  case ISD::CLMUL:
+  case ISD::CLMULR:
+  case ISD::CLMULH:
   case ISD::MULHS:
   case ISD::MULHU:
   case ISD::ABDS:
@@ -4924,6 +4930,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::SHL: case ISD::VP_SHL:
   case ISD::SRA: case ISD::VP_SRA:
   case ISD::SRL: case ISD::VP_SRL:
+  case ISD::CLMUL:
+  case ISD::CLMULR:
+  case ISD::CLMULH:
   case ISD::FMINNUM:
   case ISD::FMINNUM_IEEE:
   case ISD::VP_FMINNUM:
@@ -7064,6 +7073,9 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
   case ISD::LLROUND:
   case ISD::LRINT:
   case ISD::LLRINT:
+  case ISD::CLMUL:
+  case ISD::CLMULR:
+  case ISD::CLMULH:
     Res = WidenVecOp_UnrollVectorOp(N);
     break;
   case ISD::IS_FPCLASS:         Res = WidenVecOp_IS_FPCLASS(N); break;
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 1b15a207a2d37..1e2c7c13a4457 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -6789,6 +6789,12 @@ static std::optional<APInt> FoldValue(unsigned Opcode, const APInt &C1,
     return APIntOps::mulhs(C1, C2);
   case ISD::MULHU:
     return APIntOps::mulhu(C1, C2);
+  case ISD::CLMUL:
+    return APIntOps::clmul(C1, C2);
+  case ISD::CLMULR:
+    return APIntOps::clmulr(C1, C2);
+  case ISD::CLMULH:
+    return APIntOps::clmulh(C1, C2);
   }
   return std::nullopt;
 }
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 985a54ca83256..428eaeb3a1dde 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -7279,6 +7279,22 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
     }
     return;
   }
+  case Intrinsic::clmul:
+  case Intrinsic::clmulr: {
+    SDValue Op1 = getValue(I.getArgOperand(0));
+    SDValue Op2 = getValue(I.getArgOperand(1));
+    unsigned Opcode;
+    switch (Intrinsic) {
+    case Intrinsic::clmul:
+      Opcode = ISD::CLMUL;
+      break;
+    case Intrinsic::clmulr:
+      Opcode = ISD::CLMULR;
+      break;
+    }
+    setValue(&I, DAG.getNode(Opcode, sdl, Op1.getValueType(), Op1, Op2));
+    return;
+  }
   case Intrinsic::sadd_sat: {
     SDValue Op1 = getValue(I.getArgOperand(0));
     SDValue Op2 = getValue(I.getArgOperand(1));
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index ec5edd5f13978..d537236afb41c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -299,6 +299,9 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
   case ISD::ROTR:                       return "rotr";
   case ISD::FSHL:                       return "fshl";
   case ISD::FSHR:                       return "fshr";
+  case ISD::CLMUL:                      return "clmul";
+  case ISD::CLMULR:                     return "clmulr";
+  case ISD::CLMULH:                     return "clmulh";
   case ISD::FADD:                       return "fadd";
   case ISD::STRICT_FADD:                return "strict_fadd";
   case ISD::FSUB:                       return "fsub";
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 5684e0e4c26c4..7db1dad5b4426 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -8302,6 +8302,49 @@ SDValue TargetLowering::expandFunnelShift(SDNode *Node,
   return DAG.getNode(ISD::OR, DL, VT, ShX, ShY);
 }
 
+SDValue TargetLowering::expandCLMUL(SDNode *Node, SelectionDAG &DAG) const {
+  SDLoc DL(Node);
+  EVT VT = Node->getValueType(0);
+  SDValue X = Node->getOperand(0);
+  SDValue Y = Node->getOperand(1);
+  unsigned BW = VT.getScalarSizeInBits();
+
+  if (VT.isVector() && isOperationLegalOrCustomOrPromote(
+                           Node->getOpcode(), VT.getVectorElementType()))
+    return DAG.UnrollVectorOp(Node);
+
+  SDValue Res = DAG.getConstant(0, DL, VT);
+  switch (Node->getOpcode()) {
+  case ISD::CLMUL: {
+    for (unsigned I = 0; I < BW; ++I) {
+      SDValue Mask = DAG.getConstant(APInt::getOneBitSet(BW, I), DL, VT);
+      SDValue YMasked = DAG.getNode(ISD::AND, DL, VT, Y, Mask);
+      SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, X, YMasked);
+      Res = DAG.getNode(ISD::XOR, DL, VT, Res, Mul);
+    }
+    break;
+  }
+  case ISD::CLMULR: {
+    SDValue XRev = DAG.getNode(ISD::BITREVERSE, DL, VT, X);
+    SDValue YRev = DAG.getNode(ISD::BITREVERSE, DL, VT, X);
+    SDValue ResR = DAG.getNode(ISD::CLMUL, DL, VT, XRev, YRev);
+    Res = DAG.getNode(ISD::BITREVERSE, DL, VT, ResR);
+    break;
+  }
+  case ISD::CLMULH: {
+    EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), 2 * BW);
+    SDValue XExt = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVT, X);
+    SDValue YExt = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVT, Y);
+    SDValue ClMul = DAG.getNode(ISD::CLMUL, DL, ExtVT, XExt, YExt);
+    SDValue HiBits = DAG.getNode(ISD::SRL, DL, ExtVT, ClMul,
+                                 DAG.getShiftAmountConstant(BW, VT, DL));
+    Res = DAG.getNode(ISD::TRUNCATE, DL, VT, HiBits);
+    break;
+  }
+  }
+  return Res;
+}
+
 // TODO: Merge with expandFunnelShift.
 SDValue TargetLowering::expandROT(SDNode *Node, bool AllowVectorOps,
                                   SelectionDAG &DAG) const {
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index f9d727eaf1e20..9fd05236418b1 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -913,6 +913,9 @@ void TargetLoweringBase::initActions() {
     // Absolute difference
     setOperationAction({ISD::ABDS, ISD::ABDU}, VT, Expand);
 
+    // Carry-less multiply
+    setOperationAction({ISD::CLMUL, ISD::CLMULR, ISD::CLMULH}, VT, Expand);
+
     // Saturated trunc
     setOperationAction(ISD::TRUNCATE_SSAT_S, VT, Expand);
     setOperationAction(ISD::TRUNCATE_SSAT_U, VT, Expand);
diff --git a/llvm/test/CodeGen/RISCV/clmul.ll b/llvm/test/CodeGen/RISCV/clmul.ll
new file mode 100644
index 0000000000000..1e3acd8ccce74
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/clmul.ll
@@ -0,0 +1,7582 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32IM
+; RUN: llc -mtriple=riscv64 -mattr=+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64IM
+
+define i4 @clmul_i4(i4 %a, i4 %b) nounwind {
+; RV32IM-LABEL: clmul_i4:
+; RV32IM:       # %bb.0:
+; RV32IM-NEXT:    addi sp, sp, -48
+; RV32IM-NEXT:    sw s0, 44(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s1, 40(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s2, 36(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s3, 32(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s4, 28(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s5, 24(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s6, 20(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s7, 16(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s8, 12(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s9, 8(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s10, 4(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    andi t6, a1, 2
+; RV32IM-NEXT:    andi s1, a1, 1
+; RV32IM-NEXT:    andi a7, a1, 4
+; RV32IM-NEXT:    andi t2, a1, 8
+; RV32IM-NEXT:    andi t0, a1, 16
+; RV32IM-NEXT:    andi t3, a1, 32
+; RV32IM-NEXT:    andi a2, a1, 64
+; RV32IM-NEXT:    andi t4, a1, 128
+; RV32IM-NEXT:    andi s0, a1, 256
+; RV32IM-NEXT:    andi a3, a1, 512
+; RV32IM-NEXT:    li a4, 1
+; RV32IM-NEXT:    lui a5, 1
+; RV32IM-NEXT:    lui a6, 2
+; RV32IM-NEXT:    lui t1, 4
+; RV32IM-NEXT:    lui t5, 8
+; RV32IM-NEXT:    lui s2, 16
+; RV32IM-NEXT:    lui s3, 32
+; RV32IM-NEXT:    lui s4, 64
+; RV32IM-NEXT:    lui s5, 128
+; RV32IM-NEXT:    lui s6, 256
+; RV32IM-NEXT:    lui s7, 512
+; RV32IM-NEXT:    lui s8, 1024
+; RV32IM-NEXT:    lui s9, 2048
+; RV32IM-NEXT:    lui s10, 4096
+; RV32IM-NEXT:    mul t6, a0, t6
+; RV32IM-NEXT:    mul s1, a0, s1
+; RV32IM-NEXT:    xor t6, s1, t6
+; RV32IM-NEXT:    lui s1, 8192
+; RV32IM-NEXT:    mul a7, a0, a7
+; RV32IM-NEXT:    mul t2, a0, t2
+; RV32IM-NEXT:    xor a7, a7, t2
+; RV32IM-NEXT:    lui t2, 16384
+; RV32IM-NEXT:    mul t0, a0, t0
+; RV32IM-NEXT:    mul t3, a0, t3
+; RV32IM-NEXT:    xor t0, t0, t3
+; RV32IM-NEXT:    lui t3, 32768
+; RV32IM-NEXT:    mul t4, a0, t4
+; RV32IM-NEXT:    mul s0, a0, s0
+; RV32IM-NEXT:    xor t4, t4, s0
+; RV32IM-NEXT:    lui s0, 65536
+; RV32IM-NEXT:    xor a7, t6, a7
+; RV32IM-NEXT:    lui t6, 131072
+; RV32IM-NEXT:    mul a2, a0, a2
+; RV32IM-NEXT:    xor a2, t0, a2
+; RV32IM-NEXT:    lui t0, 262144
+; RV32IM-NEXT:    mul a3, a0, a3
+; RV32IM-NEXT:    xor a3, t4, a3
+; RV32IM-NEXT:    lui t4, 524288
+; RV32IM-NEXT:    slli a4, a4, 11
+; RV32IM-NEXT:    and a5, a1, a5
+; RV32IM-NEXT:    and a6, a1, a6
+; RV32IM-NEXT:    and t1, a1, t1
+; RV32IM-NEXT:    and t5, a1, t5
+; RV32IM-NEXT:    and s2, a1, s2
+; RV32IM-NEXT:    and s3, a1, s3
+; RV32IM-NEXT:    and s4, a1, s4
+; RV32IM-NEXT:    and s5, a1, s5
+; RV32IM-NEXT:    and s6, a1, s6
+; RV32IM-NEXT:    and s7, a1, s7
+; RV32IM-NEXT:    and s8, a1, s8
+; RV32IM-NEXT:    and s9, a1, s9
+; RV32IM-NEXT:    and s10, a1, s10
+; RV32IM-NEXT:    and s1, a1, s1
+; RV32IM-NEXT:    and t2, a1, t2
+; RV32IM-NEXT:    and t3, a1, t3
+; RV32IM-NEXT:    and s0, a1, s0
+; RV32IM-NEXT:    and t6, a1, t6
+; RV32IM-NEXT:    and t0, a1, t0
+; RV32IM-NEXT:    and t4, a1, t4
+; RV32IM-NEXT:    and a4, a1, a4
+; RV32IM-NEXT:    andi a1, a1, 1024
+; RV32IM-NEXT:    mul a1, a0, a1
+; RV32IM-NEXT:    mul a5, a0, a5
+; RV32IM-NEXT:    mul a6, a0, a6
+; RV32IM-NEXT:    mul t1, a0, t1
+; RV32IM-NEXT:    mul t5, a0, t5
+; RV32IM-NEXT:    mul s2, a0, s2
+; RV32IM-NEXT:    mul s3, a0, s3
+; RV32IM-NEXT:    mul s4, a0, s4
+; RV32IM-NEXT:    mul s5, a0, s5
+; RV32IM-NEXT:    mul s6, a0, s6
+; RV32IM-NEXT:    mul s7, a0, s7
+; RV32IM-NEXT:    mul s8, a0, s8
+; RV32IM-NEXT:    mul s9, a0, s9
+; RV32IM-NEXT:    mul s10, a0, s10
+; RV32IM-NEXT:    mul s1, a0, s1
+; RV32IM-NEXT:    mul t2, a0, t2
+; RV32IM-NEXT:    mul t3, a0, t3
+; RV32IM-NEXT:    mul s0, a0, s0
+; RV32IM-NEXT:    mul t6, a0, t6
+; RV32IM-NEXT:    mul t0, a0, t0
+; RV32IM-NEXT:    mul t4, a0, t4
+; RV32IM-NEXT:    mul a0, a0, a4
+; RV32IM-NEXT:    xor a4, t1, t5
+; RV32IM-NEXT:    xor t1, s5, s6
+; RV32IM-NEXT:    xor t2, s1, t2
+; RV32IM-NEXT:    xor a2, a7, a2
+; RV32IM-NEXT:    xor a1, a3, a1
+; RV32IM-NEXT:    xor a0, a0, a5
+; RV32IM-NEXT:    xor a3, a4, s2
+; RV32IM-NEXT:    xor a4, t1, s7
+; RV32IM-NEXT:    xor a5, t2, t3
+; RV32IM-NEXT:    xor a1, a2, a1
+; RV32IM-NEXT:    xor a0, a0, a6
+; RV32IM-NEXT:    xor a2, a3, s3
+; RV32IM-NEXT:    xor a3, a4, s8
+; RV32IM-NEXT:    xor a5, a5, s0
+; RV32IM-NEXT:    xor a0, a1, a0
+; RV32IM-NEXT:    xor a1, a2, s4
+; RV32IM-NEXT:    xor a2, a3, s9
+; RV32IM-NEXT:    xor a3, a5, t6
+; RV32IM-NEXT:    xor a0, a0, a1
+; RV32IM-NEXT:    xor a1, a2, s10
+; RV32IM-NEXT:    xor a2, a3, t0
+; RV32IM-NEXT:    xor a0, a0, a1
+; RV32IM-NEXT:    xor a1, a2, t4
+; RV32IM-NEXT:    xor a0, a0, a1
+; RV32IM-NEXT:    lw s0, 44(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s1, 40(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s2, 36(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s3, 32(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s4, 28(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s5, 24(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s6, 20(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s7, 16(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s8, 12(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s9, 8(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s10, 4(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    addi sp, sp, 48
+; RV32IM-NEXT:    ret
+;
+; RV64IM-LABEL: clmul_i4:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    addi sp, sp, -448
+; RV64IM-NEXT:    sd ra, 440(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s0, 432(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s1, 424(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s2, 416(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s3, 408(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s4, 400(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s5, 392(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s6, 384(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s7, 376(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s8, 368(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s9, 360(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s10, 352(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s11, 344(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    andi t2, a1, 2
+; RV64IM-NEXT:    andi t4, a1, 1
+; RV64IM-NEXT:    andi a6, a1, 4
+; RV64IM-NEXT:    andi t0, a1, 8
+; RV64IM-NEXT:    andi a5, a1, 16
+; RV64IM-NEXT:    andi a7, a1, 32
+; RV64IM-NEXT:    andi a3, a1, 64
+; RV64IM-NEXT:    andi t1, a1, 128
+; RV64IM-NEXT:    andi t3, a1, 256
+; RV64IM-NEXT:    andi a4, a1, 512
+; RV64IM-NEXT:    li a2, 1
+; RV64IM-NEXT:    lui s7, 1
+; RV64IM-NEXT:    lui t6, 2
+; RV64IM-NEXT:    lui s0, 4
+; RV64IM-NEXT:    lui s1, 8
+; RV64IM-NEXT:    lui s2, 16
+; RV64IM-NEXT:    lui s3, 32
+; RV64IM-NEXT:    lui s4, 64
+; RV64IM-NEXT:    lui s5, 128
+; RV64IM-NEXT:    lui s6, 256
+; RV64IM-NEXT:    lui s8, 512
+; RV64IM-NEXT:    lui s9, 1024
+; RV64IM-NEXT:    lui s10, 2048
+; RV64IM-NEXT:    lui s11, 4096
+; RV64IM-NEXT:    lui ra, 8192
+; RV64IM-NEXT:    lui t5, 16384
+; RV64IM-NEXT:    mul t2, a0, t2
+; RV64IM-NEXT:    mul t4, a0, t4
+; RV64IM-NEXT:    xor t2, t4, t2
+; RV64IM-NEXT:    lui t4, 32768
+; RV64IM-NEXT:    mul a6, a0, a6
+; RV64IM-NEXT:    mul t0, a0, t0
+; RV64IM-NEXT:    xor a6, a6, t0
+; RV64IM-NEXT:    lui t0, 65536
+; RV64IM-NEXT:    mul a5, a0, a5
+; RV64IM-NEXT:    mul a7, a0, a7
+; RV64IM-NEXT:    xor a5, a5, a7
+; RV64IM-NEXT:    lui a7, 131072
+; RV64IM-NEXT:    mul t1, a0, t1
+; RV64IM-NEXT:    mul t3, a0, t3
+; RV64IM-NEXT:    xor t1, t1, t3
+; RV64IM-NEXT:    lui t3, 262144
+; RV64IM-NEXT:    mul a3, a0, a3
+; RV64IM-NEXT:    mul a4, a0, a4
+; RV64IM-NEXT:    xor a6, t2, a6
+; RV64IM-NEXT:    sd a6, 336(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a6, a2, 11
+; RV64IM-NEXT:    sd a6, 216(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and s7, a1, s7
+; RV64IM-NEXT:    and a6, a1, t6
+; RV64IM-NEXT:    sd a6, 288(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and s0, a1, s0
+; RV64IM-NEXT:    and s1, a1, s1
+; RV64IM-NEXT:    and s2, a1, s2
+; RV64IM-NEXT:    and s3, a1, s3
+; RV64IM-NEXT:    and a6, a1, s4
+; RV64IM-NEXT:    sd a6, 280(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a6, a1, s5
+; RV64IM-NEXT:    and t2, a1, s6
+; RV64IM-NEXT:    and s8, a1, s8
+; RV64IM-NEXT:    and t6, a1, s9
+; RV64IM-NEXT:    sd t6, 272(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and t6, a1, s10
+; RV64IM-NEXT:    sd t6, 264(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and t6, a1, s11
+; RV64IM-NEXT:    sd t6, 256(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and t6, a1, ra
+; RV64IM-NEXT:    and t5, a1, t5
+; RV64IM-NEXT:    and t4, a1, t4
+; RV64IM-NEXT:    sd t4, 248(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and t0, a1, t0
+; RV64IM-NEXT:    sd t0, 240(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a7, a1, a7
+; RV64IM-NEXT:    sd a7, 232(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a7, a1, t3
+; RV64IM-NEXT:    sd a7, 224(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    xor a3, a5, a3
+; RV64IM-NEXT:    sd a3, 328(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli t4, a2, 32
+; RV64IM-NEXT:    xor a3, t1, a4
+; RV64IM-NEXT:    sd a3, 320(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli s4, a2, 33
+; RV64IM-NEXT:    mul a3, a0, s0
+; RV64IM-NEXT:    mul a4, a0, s1
+; RV64IM-NEXT:    xor a3, a3, a4
+; RV64IM-NEXT:    sd a3, 312(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli s0, a2, 34
+; RV64IM-NEXT:    mul a3, a0, a6
+; RV64IM-NEXT:    mul a4, a0, t2
+; RV64IM-NEXT:    xor a3, a3, a4
+; RV64IM-NEXT:    sd a3, 304(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli s1, a2, 35
+; RV64IM-NEXT:    mul a3, a0, t6
+; RV64IM-NEXT:    mul a4, a0, t5
+; RV64IM-NEXT:    xor a3, a3, a4
+; RV64IM-NEXT:    sd a3, 296(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli t5, a2, 36
+; RV64IM-NEXT:    slli t6, a2, 37
+; RV64IM-NEXT:    slli s5, a2, 38
+; RV64IM-NEXT:    slli s6, a2, 39
+; RV64IM-NEXT:    slli s9, a2, 40
+; RV64IM-NEXT:    slli s10, a2, 41
+; RV64IM-NEXT:    slli s11, a2, 42
+; RV64IM-NEXT:    slli ra, a2, 43
+; RV64IM-NEXT:    slli a3, a2, 44
+; RV64IM-NEXT:    sd a3, 208(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a3, a2, 45
+; RV64IM-NEXT:    sd a3, 200(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a3, a2, 46
+; RV64IM-NEXT:    sd a3, 192(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a3, a2, 47
+; RV64IM-NEXT:    sd a3, 184(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a3, a2, 48
+; RV64IM-NEXT:    sd a3, 176(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a3, a2, 49
+; RV64IM-NEXT:    sd a3, 168(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a3, a2, 50
+; RV64IM-NEXT:    sd a3, 160(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a3, a2, 51
+; RV64IM-NEXT:    sd a3, 152(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a3, a2, 52
+; RV64IM-NEXT:    sd a3, 144(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a3, a2, 53
+; RV64IM-NEXT:    sd a3, 136(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a3, a2, 54
+; RV64IM-NEXT:    sd a3, 128(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli t1, a2, 55
+; RV64IM-NEXT:    slli t0, a2, 56
+; RV64IM-NEXT:    slli a7, a2, 57
+; RV64IM-NEXT:    slli a6, a2, 58
+; RV64IM-NEXT:    slli a5, a2, 59
+; RV64IM-NEXT:    slli a4, a2, 60
+; RV64IM-NEXT:    slli a3, a2, 61
+; RV64IM-NEXT:    slli a2, a2, 62
+; RV64IM-NEXT:    ld t2, 216(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and t3, a1, t2
+; RV64IM-NEXT:    and t2, a1, t4
+; RV64IM-NEXT:    sd t2, 120(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and t4, a1, s4
+; RV64IM-NEXT:    and s0, a1, s0
+; RV64IM-NEXT:    sd s0, 112(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and s1, a1, s1
+; RV64IM-NEXT:    sd s1, 104(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and t2, a1, t5
+; RV64IM-NEXT:    sd t2, 96(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and s1, a1, t6
+; RV64IM-NEXT:    and t2, a1, s5
+; RV64IM-NEXT:    sd t2, 88(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and t2, a1, s6
+; RV64IM-NEXT:    sd t2, 80(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and s4, a1, s9
+; RV64IM-NEXT:    and s5, a1, s10
+; RV64IM-NEXT:    and s6, a1, s11
+; RV64IM-NEXT:    and t6, a1, ra
+; RV64IM-NEXT:    ld t2, 208(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and t2, a1, t2
+; RV64IM-NEXT:    sd t2, 72(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld t2, 200(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and t2, a1, t2
+; RV64IM-NEXT:    sd t2, 64(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld t2, 192(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s10, a1, t2
+; RV64IM-NEXT:    ld t2, 184(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s11, a1, t2
+; RV64IM-NEXT:    ld t2, 176(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and ra, a1, t2
+; RV64IM-NEXT:    ld t2, 168(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and t2, a1, t2
+; RV64IM-NEXT:    sd t2, 184(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld t2, 160(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and t2, a1, t2
+; RV64IM-NEXT:    sd t2, 160(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld t2, 152(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and t2, a1, t2
+; RV64IM-NEXT:    sd t2, 152(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld t2, 144(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and t2, a1, t2
+; RV64IM-NEXT:    sd t2, 144(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld t2, 136(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and t2, a1, t2
+; RV64IM-NEXT:    sd t2, 136(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld t2, 128(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and t2, a1, t2
+; RV64IM-NEXT:    sd t2, 128(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and t1, a1, t1
+; RV64IM-NEXT:    and t0, a1, t0
+; RV64IM-NEXT:    sd t0, 56(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a7, a1, a7
+; RV64IM-NEXT:    sd a7, 48(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a6, a1, a6
+; RV64IM-NEXT:    and a5, a1, a5
+; RV64IM-NEXT:    sd a5, 40(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a4, a1, a4
+; RV64IM-NEXT:    sd a4, 32(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a3, a1, a3
+; RV64IM-NEXT:    sd a3, 24(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a2, a1, a2
+; RV64IM-NEXT:    sd a2, 16(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    andi a2, a1, 1024
+; RV64IM-NEXT:    srliw a3, a1, 31
+; RV64IM-NEXT:    srli a1, a1, 63
+; RV64IM-NEXT:    mul s9, a0, a2
+; RV64IM-NEXT:    slli a3, a3, 31
+; RV64IM-NEXT:    slli a1, a1, 63
+; RV64IM-NEXT:    mul s7, a0, s7
+; RV64IM-NEXT:    ld a2, 288(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    sd a2, 192(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul s2, a0, s2
+; RV64IM-NEXT:    mul a2, a0, s3
+; RV64IM-NEXT:    sd a2, 176(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a2, 280(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    sd a2, 216(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul s0, a0, s8
+; RV64IM-NEXT:    ld a2, 272(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    sd a2, 8(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a2, 264(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    sd a2, 208(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a2, 256(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    sd a2, 272(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a2, 248(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul t2, a0, a2
+; RV64IM-NEXT:    ld a2, 240(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    sd a2, 0(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a2, 232(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    sd a2, 200(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a2, 224(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    sd a2, 256(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul a2, a0, a3
+; RV64IM-NEXT:    sd a2, 280(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 288(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul a5, a0, t3
+; RV64IM-NEXT:    ld a1, 120(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a7, a0, a1
+; RV64IM-NEXT:    mul t4, a0, t4
+; RV64IM-NEXT:    ld a1, 112(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul t5, a0, a1
+; RV64IM-NEXT:    ld a1, 104(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 120(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 96(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 224(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul a1, a0, s1
+; RV64IM-NEXT:    sd a1, 240(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 88(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 264(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 80(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul s1, a0, a1
+; RV64IM-NEXT:    mul s4, a0, s4
+; RV64IM-NEXT:    mul s5, a0, s5
+; RV64IM-NEXT:    mul s6, a0, s6
+; RV64IM-NEXT:    mul a1, a0, t6
+; RV64IM-NEXT:    sd a1, 112(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 72(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 168(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 64(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 232(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul a1, a0, s10
+; RV64IM-NEXT:    sd a1, 248(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul s10, a0, s11
+; RV64IM-NEXT:    mul s11, a0, ra
+; RV64IM-NEXT:    ld a1, 184(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul ra, a0, a1
+; RV64IM-NEXT:    ld a1, 160(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul t0, a0, a1
+; RV64IM-NEXT:    ld a1, 152(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul t3, a0, a1
+; RV64IM-NEXT:    ld a1, 144(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul s3, a0, a1
+; RV64IM-NEXT:    ld a1, 136(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 152(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 128(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 160(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul a1, a0, t1
+; RV64IM-NEXT:    sd a1, 184(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 56(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a2, a0, a1
+; RV64IM-NEXT:    ld a1, 48(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    mul a3, a0, a6
+; RV64IM-NEXT:    ld a4, 40(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a4, a0, a4
+; RV64IM-NEXT:    ld a6, 32(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a6, a0, a6
+; RV64IM-NEXT:    ld t1, 24(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul t1, a0, t1
+; RV64IM-NEXT:    ld t6, 16(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul t6, a0, t6
+; RV64IM-NEXT:    ld a0, 336(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s8, 328(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a0, a0, s8
+; RV64IM-NEXT:    ld s8, 320(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor s9, s8, s9
+; RV64IM-NEXT:    xor a5, a5, s7
+; RV64IM-NEXT:    ld s7, 312(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor s2, s7, s2
+; RV64IM-NEXT:    ld s7, 304(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor s0, s7, s0
+; RV64IM-NEXT:    ld s7, 296(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t2, s7, t2
+; RV64IM-NEXT:    xor a7, a7, t4
+; RV64IM-NEXT:    xor t4, s1, s4
+; RV64IM-NEXT:    xor s1, s10, s11
+; RV64IM-NEXT:    xor a1, a2, a1
+; RV64IM-NEXT:    xor a0, a0, s9
+; RV64IM-NEXT:    ld a2, 192(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a2, a5, a2
+; RV64IM-NEXT:    ld a5, 176(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a5, s2, a5
+; RV64IM-NEXT:    ld s2, 8(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor s0, s0, s2
+; RV64IM-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t2, t2, s2
+; RV64IM-NEXT:    xor a7, a7, t5
+; RV64IM-NEXT:    xor t4, t4, s5
+; RV64IM-NEXT:    xor t5, s1, ra
+; RV64IM-NEXT:    xor a1, a1, a3
+; RV64IM-NEXT:    xor a0, a0, a2
+; RV64IM-NEXT:    ld a2, 216(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a2, a5, a2
+; RV64IM-NEXT:    ld a3, 208(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a3, s0, a3
+; RV64IM-NEXT:    ld a5, 200(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a5, t2, a5
+; RV64IM-NEXT:    ld t2, 120(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a7, a7, t2
+; RV64IM-NEXT:    xor t2, t4, s6
+; RV64IM-NEXT:    xor t0, t5, t0
+; RV64IM-NEXT:    xor a1, a1, a4
+; RV64IM-NEXT:    xor a0, a0, a2
+; RV64IM-NEXT:    ld a2, 272(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a2, a3, a2
+; RV64IM-NEXT:    ld a3, 256(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a3, a5, a3
+; RV64IM-NEXT:    ld a4, 224(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a4, a7, a4
+; RV64IM-NEXT:    ld a5, 112(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a5, t2, a5
+; RV64IM-NEXT:    xor a7, t0, t3
+; RV64IM-NEXT:    xor a1, a1, a6
+; RV64IM-NEXT:    xor a0, a0, a2
+; RV64IM-NEXT:    ld a2, 280(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a2, a3, a2
+; RV64IM-NEXT:    ld a3, 240(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a3, a4, a3
+; RV64IM-NEXT:    ld a4, 168(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a4, a5, a4
+; RV64IM-NEXT:    xor a5, a7, s3
+; RV64IM-NEXT:    xor a1, a1, t1
+; RV64IM-NEXT:    xor a0, a0, a2
+; RV64IM-NEXT:    ld a2, 264(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a2, a3, a2
+; RV64IM-NEXT:    ld a3, 232(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a3, a4, a3
+; RV64IM-NEXT:    ld a4, 152(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a4, a5, a4
+; RV64IM-NEXT:    xor a1, a1, t6
+; RV64IM-NEXT:    xor a0, a0, a2
+; RV64IM-NEXT:    ld a2, 248(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a2, a3, a2
+; RV64IM-NEXT:    ld a3, 160(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a3, a4, a3
+; RV64IM-NEXT:    xor a0, a0, a2
+; RV64IM-NEXT:    ld a2, 184(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a2, a3, a2
+; RV64IM-NEXT:    xor a0, a0, a2
+; RV64IM-NEXT:    ld a2, 288(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a1, a1, a2
+; RV64IM-NEXT:    xor a0, a0, a1
+; RV64IM-NEXT:    ld ra, 440(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s0, 432(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s1, 424(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s2, 416(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s3, 408(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s4, 400(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s5, 392(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s6, 384(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s7, 376(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s8, 368(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s9, 360(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s10, 352(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s11, 344(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    addi sp, sp, 448
+; RV64IM-NEXT:    ret
+  %res = call i4 @llvm.clmul.i4(i4 %a, i4 %b)
+  ret i4 %res
+}
+
+define i8 @clmul_i8(i8 %a, i8 %b) nounwind {
+; RV32IM-LABEL: clmul_i8:
+; RV32IM:       # %bb.0:
+; RV32IM-NEXT:    addi sp, sp, -48
+; RV32IM-NEXT:    sw s0, 44(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s1, 40(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s2, 36(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s3, 32(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s4, 28(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s5, 24(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s6, 20(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s7, 16(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s8, 12(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s9, 8(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s10, 4(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    andi t6, a1, 2
+; RV32IM-NEXT:    andi s1, a1, 1
+; RV32IM-NEXT:    andi a7, a1, 4
+; RV32IM-NEXT:    andi t2, a1, 8
+; RV32IM-NEXT:    andi t0, a1, 16
+; RV32IM-NEXT:    andi t3, a1, 32
+; RV32IM-NEXT:    andi a2, a1, 64
+; RV32IM-NEXT:    andi t4, a1, 128
+; RV32IM-NEXT:    andi s0, a1, 256
+; RV32IM-NEXT:    andi a3, a1, 512
+; RV32IM-NEXT:    li a4, 1
+; RV32IM-NEXT:    lui a5, 1
+; RV32IM-NEXT:    lui a6, 2
+; RV32IM-NEXT:    lui t1, 4
+; RV32IM-NEXT:    lui t5, 8
+; RV32IM-NEXT:    lui s2, 16
+; RV32IM-NEXT:    lui s3, 32
+; RV32IM-NEXT:    lui s4, 64
+; RV32IM-NEXT:    lui s5, 128
+; RV32IM-NEXT:    lui s6, 256
+; RV32IM-NEXT:    lui s7, 512
+; RV32IM-NEXT:    lui s8, 1024
+; RV32IM-NEXT:    lui s9, 2048
+; RV32IM-NEXT:    lui s10, 4096
+; RV32IM-NEXT:    mul t6, a0, t6
+; RV32IM-NEXT:    mul s1, a0, s1
+; RV32IM-NEXT:    xor t6, s1, t6
+; RV32IM-NEXT:    lui s1, 8192
+; RV32IM-NEXT:    mul a7, a0, a7
+; RV32IM-NEXT:    mul t2, a0, t2
+; RV32IM-NEXT:    xor a7, a7, t2
+; RV32IM-NEXT:    lui t2, 16384
+; RV32IM-NEXT:    mul t0, a0, t0
+; RV32IM-NEXT:    mul t3, a0, t3
+; RV32IM-NEXT:    xor t0, t0, t3
+; RV32IM-NEXT:    lui t3, 32768
+; RV32IM-NEXT:    mul t4, a0, t4
+; RV32IM-NEXT:    mul s0, a0, s0
+; RV32IM-NEXT:    xor t4, t4, s0
+; RV32IM-NEXT:    lui s0, 65536
+; RV32IM-NEXT:    xor a7, t6, a7
+; RV32IM-NEXT:    lui t6, 131072
+; RV32IM-NEXT:    mul a2, a0, a2
+; RV32IM-NEXT:    xor a2, t0, a2
+; RV32IM-NEXT:    lui t0, 262144
+; RV32IM-NEXT:    mul a3, a0, a3
+; RV32IM-NEXT:    xor a3, t4, a3
+; RV32IM-NEXT:    lui t4, 524288
+; RV32IM-NEXT:    slli a4, a4, 11
+; RV32IM-NEXT:    and a5, a1, a5
+; RV32IM-NEXT:    and a6, a1, a6
+; RV32IM-NEXT:    and t1, a1, t1
+; RV32IM-NEXT:    and t5, a1, t5
+; RV32IM-NEXT:    and s2, a1, s2
+; RV32IM-NEXT:    and s3, a1, s3
+; RV32IM-NEXT:    and s4, a1, s4
+; RV32IM-NEXT:    and s5, a1, s5
+; RV32IM-NEXT:    and s6, a1, s6
+; RV32IM-NEXT:    and s7, a1, s7
+; RV32IM-NEXT:    and s8, a1, s8
+; RV32IM-NEXT:    and s9, a1, s9
+; RV32IM-NEXT:    and s10, a1, s10
+; RV32IM-NEXT:    and s1, a1, s1
+; RV32IM-NEXT:    and t2, a1, t2
+; RV32IM-NEXT:    and t3, a1, t3
+; RV32IM-NEXT:    and s0, a1, s0
+; RV32IM-NEXT:    and t6, a1, t6
+; RV32IM-NEXT:    and t0, a1, t0
+; RV32IM-NEXT:    and t4, a1, t4
+; RV32IM-NEXT:    and a4, a1, a4
+; RV32IM-NEXT:    andi a1, a1, 1024
+; RV32IM-NEXT:    mul a1, a0, a1
+; RV32IM-NEXT:    mul a5, a0, a5
+; RV32IM-NEXT:    mul a6, a0, a6
+; RV32IM-NEXT:    mul t1, a0, t1
+; RV32IM-NEXT:    mul t5, a0, t5
+; RV32IM-NEXT:    mul s2, a0, s2
+; RV32IM-NEXT:    mul s3, a0, s3
+; RV32IM-NEXT:    mul s4, a0, s4
+; RV32IM-NEXT:    mul s5, a0, s5
+; RV32IM-NEXT:    mul s6, a0, s6
+; RV32IM-NEXT:    mul s7, a0, s7
+; RV32IM-NEXT:    mul s8, a0, s8
+; RV32IM-NEXT:    mul s9, a0, s9
+; RV32IM-NEXT:    mul s10, a0, s10
+; RV32IM-NEXT:    mul s1, a0, s1
+; RV32IM-NEXT:    mul t2, a0, t2
+; RV32IM-NEXT:    mul t3, a0, t3
+; RV32IM-NEXT:    mul s0, a0, s0
+; RV32IM-NEXT:    mul t6, a0, t6
+; RV32IM-NEXT:    mul t0, a0, t0
+; RV32IM-NEXT:    mul t4, a0, t4
+; RV32IM-NEXT:    mul a0, a0, a4
+; RV32IM-NEXT:    xor a4, t1, t5
+; RV32IM-NEXT:    xor t1, s5, s6
+; RV32IM-NEXT:    xor t2, s1, t2
+; RV32IM-NEXT:    xor a2, a7, a2
+; RV32IM-NEXT:    xor a1, a3, a1
+; RV32IM-NEXT:    xor a0, a0, a5
+; RV32IM-NEXT:    xor a3, a4, s2
+; RV32IM-NEXT:    xor a4, t1, s7
+; RV32IM-NEXT:    xor a5, t2, t3
+; RV32IM-NEXT:    xor a1, a2, a1
+; RV32IM-NEXT:    xor a0, a0, a6
+; RV32IM-NEXT:    xor a2, a3, s3
+; RV32IM-NEXT:    xor a3, a4, s8
+; RV32IM-NEXT:    xor a5, a5, s0
+; RV32IM-NEXT:    xor a0, a1, a0
+; RV32IM-NEXT:    xor a1, a2, s4
+; RV32IM-NEXT:    xor a2, a3, s9
+; RV32IM-NEXT:    xor a3, a5, t6
+; RV32IM-NEXT:    xor a0, a0, a1
+; RV32IM-NEXT:    xor a1, a2, s10
+; RV32IM-NEXT:    xor a2, a3, t0
+; RV32IM-NEXT:    xor a0, a0, a1
+; RV32IM-NEXT:    xor a1, a2, t4
+; RV32IM-NEXT:    xor a0, a0, a1
+; RV32IM-NEXT:    lw s0, 44(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s1, 40(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s2, 36(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s3, 32(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s4, 28(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s5, 24(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s6, 20(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s7, 16(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s8, 12(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s9, 8(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s10, 4(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    addi sp, sp, 48
+; RV32IM-NEXT:    ret
+;
+; RV64IM-LABEL: clmul_i8:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    addi sp, sp, -448
+; RV64IM-NEXT:    sd ra, 440(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s0, 432(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s1, 424(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s2, 416(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s3, 408(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s4, 400(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s5, 392(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s6, 384(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s7, 376(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s8, 368(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s9, 360(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s10, 352(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s11, 344(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    andi t2, a1, 2
+; RV64IM-NEXT:    andi t4, a1, 1
+; RV64IM-NEXT:    andi a6, a1, 4
+; RV64IM-NEXT:    andi t0, a1, 8
+; RV64IM-NEXT:    andi a5, a1, 16
+; RV64IM-NEXT:    andi a7, a1, 32
+; RV64IM-NEXT:    andi a3, a1, 64
+; RV64IM-NEXT:    andi t1, a1, 128
+; RV64IM-NEXT:    andi t3, a1, 256
+; RV64IM-NEXT:    andi a4, a1, 512
+; RV64IM-NEXT:    li a2, 1
+; RV64IM-NEXT:    lui s7, 1
+; RV64IM-NEXT:    lui t6, 2
+; RV64IM-NEXT:    lui s0, 4
+; RV64IM-NEXT:    lui s1, 8
+; RV64IM-NEXT:    lui s2, 16
+; RV64IM-NEXT:    lui s3, 32
+; RV64IM-NEXT:    lui s4, 64
+; RV64IM-NEXT:    lui s5, 128
+; RV64IM-NEXT:    lui s6, 256
+; RV64IM-NEXT:    lui s8, 512
+; RV64IM-NEXT:    lui s9, 1024
+; RV64IM-NEXT:    lui s10, 2048
+; RV64IM-NEXT:    lui s11, 4096
+; RV64IM-NEXT:    lui ra, 8192
+; RV64IM-NEXT:    lui t5, 16384
+; RV64IM-NEXT:    mul t2, a0, t2
+; RV64IM-NEXT:    mul t4, a0, t4
+; RV64IM-NEXT:    xor t2, t4, t2
+; RV64IM-NEXT:    lui t4, 32768
+; RV64IM-NEXT:    mul a6, a0, a6
+; RV64IM-NEXT:    mul t0, a0, t0
+; RV64IM-NEXT:    xor a6, a6, t0
+; RV64IM-NEXT:    lui t0, 65536
+; RV64IM-NEXT:    mul a5, a0, a5
+; RV64IM-NEXT:    mul a7, a0, a7
+; RV64IM-NEXT:    xor a5, a5, a7
+; RV64IM-NEXT:    lui a7, 131072
+; RV64IM-NEXT:    mul t1, a0, t1
+; RV64IM-NEXT:    mul t3, a0, t3
+; RV64IM-NEXT:    xor t1, t1, t3
+; RV64IM-NEXT:    lui t3, 262144
+; RV64IM-NEXT:    mul a3, a0, a3
+; RV64IM-NEXT:    mul a4, a0, a4
+; RV64IM-NEXT:    xor a6, t2, a6
+; RV64IM-NEXT:    sd a6, 336(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a6, a2, 11
+; RV64IM-NEXT:    sd a6, 216(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and s7, a1, s7
+; RV64IM-NEXT:    and a6, a1, t6
+; RV64IM-NEXT:    sd a6, 288(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and s0, a1, s0
+; RV64IM-NEXT:    and s1, a1, s1
+; RV64IM-NEXT:    and s2, a1, s2
+; RV64IM-NEXT:    and s3, a1, s3
+; RV64IM-NEXT:    and a6, a1, s4
+; RV64IM-NEXT:    sd a6, 280(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a6, a1, s5
+; RV64IM-NEXT:    and t2, a1, s6
+; RV64IM-NEXT:    and s8, a1, s8
+; RV64IM-NEXT:    and t6, a1, s9
+; RV64IM-NEXT:    sd t6, 272(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and t6, a1, s10
+; RV64IM-NEXT:    sd t6, 264(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and t6, a1, s11
+; RV64IM-NEXT:    sd t6, 256(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and t6, a1, ra
+; RV64IM-NEXT:    and t5, a1, t5
+; RV64IM-NEXT:    and t4, a1, t4
+; RV64IM-NEXT:    sd t4, 248(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and t0, a1, t0
+; RV64IM-NEXT:    sd t0, 240(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a7, a1, a7
+; RV64IM-NEXT:    sd a7, 232(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a7, a1, t3
+; RV64IM-NEXT:    sd a7, 224(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    xor a3, a5, a3
+; RV64IM-NEXT:    sd a3, 328(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli t4, a2, 32
+; RV64IM-NEXT:    xor a3, t1, a4
+; RV64IM-NEXT:    sd a3, 320(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli s4, a2, 33
+; RV64IM-NEXT:    mul a3, a0, s0
+; RV64IM-NEXT:    mul a4, a0, s1
+; RV64IM-NEXT:    xor a3, a3, a4
+; RV64IM-NEXT:    sd a3, 312(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli s0, a2, 34
+; RV64IM-NEXT:    mul a3, a0, a6
+; RV64IM-NEXT:    mul a4, a0, t2
+; RV64IM-NEXT:    xor a3, a3, a4
+; RV64IM-NEXT:    sd a3, 304(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli s1, a2, 35
+; RV64IM-NEXT:    mul a3, a0, t6
+; RV64IM-NEXT:    mul a4, a0, t5
+; RV64IM-NEXT:    xor a3, a3, a4
+; RV64IM-NEXT:    sd a3, 296(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli t5, a2, 36
+; RV64IM-NEXT:    slli t6, a2, 37
+; RV64IM-NEXT:    slli s5, a2, 38
+; RV64IM-NEXT:    slli s6, a2, 39
+; RV64IM-NEXT:    slli s9, a2, 40
+; RV64IM-NEXT:    slli s10, a2, 41
+; RV64IM-NEXT:    slli s11, a2, 42
+; RV64IM-NEXT:    slli ra, a2, 43
+; RV64IM-NEXT:    slli a3, a2, 44
+; RV64IM-NEXT:    sd a3, 208(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a3, a2, 45
+; RV64IM-NEXT:    sd a3, 200(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a3, a2, 46
+; RV64IM-NEXT:    sd a3, 192(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a3, a2, 47
+; RV64IM-NEXT:    sd a3, 184(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a3, a2, 48
+; RV64IM-NEXT:    sd a3, 176(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a3, a2, 49
+; RV64IM-NEXT:    sd a3, 168(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a3, a2, 50
+; RV64IM-NEXT:    sd a3, 160(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a3, a2, 51
+; RV64IM-NEXT:    sd a3, 152(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a3, a2, 52
+; RV64IM-NEXT:    sd a3, 144(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a3, a2, 53
+; RV64IM-NEXT:    sd a3, 136(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a3, a2, 54
+; RV64IM-NEXT:    sd a3, 128(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli t1, a2, 55
+; RV64IM-NEXT:    slli t0, a2, 56
+; RV64IM-NEXT:    slli a7, a2, 57
+; RV64IM-NEXT:    slli a6, a2, 58
+; RV64IM-NEXT:    slli a5, a2, 59
+; RV64IM-NEXT:    slli a4, a2, 60
+; RV64IM-NEXT:    slli a3, a2, 61
+; RV64IM-NEXT:    slli a2, a2, 62
+; RV64IM-NEXT:    ld t2, 216(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and t3, a1, t2
+; RV64IM-NEXT:    and t2, a1, t4
+; RV64IM-NEXT:    sd t2, 120(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and t4, a1, s4
+; RV64IM-NEXT:    and s0, a1, s0
+; RV64IM-NEXT:    sd s0, 112(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and s1, a1, s1
+; RV64IM-NEXT:    sd s1, 104(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and t2, a1, t5
+; RV64IM-NEXT:    sd t2, 96(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and s1, a1, t6
+; RV64IM-NEXT:    and t2, a1, s5
+; RV64IM-NEXT:    sd t2, 88(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and t2, a1, s6
+; RV64IM-NEXT:    sd t2, 80(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and s4, a1, s9
+; RV64IM-NEXT:    and s5, a1, s10
+; RV64IM-NEXT:    and s6, a1, s11
+; RV64IM-NEXT:    and t6, a1, ra
+; RV64IM-NEXT:    ld t2, 208(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and t2, a1, t2
+; RV64IM-NEXT:    sd t2, 72(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld t2, 200(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and t2, a1, t2
+; RV64IM-NEXT:    sd t2, 64(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld t2, 192(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s10, a1, t2
+; RV64IM-NEXT:    ld t2, 184(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s11, a1, t2
+; RV64IM-NEXT:    ld t2, 176(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and ra, a1, t2
+; RV64IM-NEXT:    ld t2, 168(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and t2, a1, t2
+; RV64IM-NEXT:    sd t2, 184(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld t2, 160(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and t2, a1, t2
+; RV64IM-NEXT:    sd t2, 160(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld t2, 152(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and t2, a1, t2
+; RV64IM-NEXT:    sd t2, 152(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld t2, 144(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and t2, a1, t2
+; RV64IM-NEXT:    sd t2, 144(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld t2, 136(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and t2, a1, t2
+; RV64IM-NEXT:    sd t2, 136(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld t2, 128(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and t2, a1, t2
+; RV64IM-NEXT:    sd t2, 128(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and t1, a1, t1
+; RV64IM-NEXT:    and t0, a1, t0
+; RV64IM-NEXT:    sd t0, 56(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a7, a1, a7
+; RV64IM-NEXT:    sd a7, 48(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a6, a1, a6
+; RV64IM-NEXT:    and a5, a1, a5
+; RV64IM-NEXT:    sd a5, 40(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a4, a1, a4
+; RV64IM-NEXT:    sd a4, 32(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a3, a1, a3
+; RV64IM-NEXT:    sd a3, 24(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a2, a1, a2
+; RV64IM-NEXT:    sd a2, 16(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    andi a2, a1, 1024
+; RV64IM-NEXT:    srliw a3, a1, 31
+; RV64IM-NEXT:    srli a1, a1, 63
+; RV64IM-NEXT:    mul s9, a0, a2
+; RV64IM-NEXT:    slli a3, a3, 31
+; RV64IM-NEXT:    slli a1, a1, 63
+; RV64IM-NEXT:    mul s7, a0, s7
+; RV64IM-NEXT:    ld a2, 288(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    sd a2, 192(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul s2, a0, s2
+; RV64IM-NEXT:    mul a2, a0, s3
+; RV64IM-NEXT:    sd a2, 176(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a2, 280(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    sd a2, 216(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul s0, a0, s8
+; RV64IM-NEXT:    ld a2, 272(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    sd a2, 8(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a2, 264(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    sd a2, 208(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a2, 256(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    sd a2, 272(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a2, 248(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul t2, a0, a2
+; RV64IM-NEXT:    ld a2, 240(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    sd a2, 0(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a2, 232(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    sd a2, 200(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a2, 224(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    sd a2, 256(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul a2, a0, a3
+; RV64IM-NEXT:    sd a2, 280(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 288(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul a5, a0, t3
+; RV64IM-NEXT:    ld a1, 120(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a7, a0, a1
+; RV64IM-NEXT:    mul t4, a0, t4
+; RV64IM-NEXT:    ld a1, 112(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul t5, a0, a1
+; RV64IM-NEXT:    ld a1, 104(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 120(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 96(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 224(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul a1, a0, s1
+; RV64IM-NEXT:    sd a1, 240(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 88(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 264(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 80(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul s1, a0, a1
+; RV64IM-NEXT:    mul s4, a0, s4
+; RV64IM-NEXT:    mul s5, a0, s5
+; RV64IM-NEXT:    mul s6, a0, s6
+; RV64IM-NEXT:    mul a1, a0, t6
+; RV64IM-NEXT:    sd a1, 112(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 72(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 168(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 64(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 232(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul a1, a0, s10
+; RV64IM-NEXT:    sd a1, 248(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul s10, a0, s11
+; RV64IM-NEXT:    mul s11, a0, ra
+; RV64IM-NEXT:    ld a1, 184(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul ra, a0, a1
+; RV64IM-NEXT:    ld a1, 160(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul t0, a0, a1
+; RV64IM-NEXT:    ld a1, 152(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul t3, a0, a1
+; RV64IM-NEXT:    ld a1, 144(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul s3, a0, a1
+; RV64IM-NEXT:    ld a1, 136(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 152(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 128(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 160(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul a1, a0, t1
+; RV64IM-NEXT:    sd a1, 184(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 56(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a2, a0, a1
+; RV64IM-NEXT:    ld a1, 48(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    mul a3, a0, a6
+; RV64IM-NEXT:    ld a4, 40(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a4, a0, a4
+; RV64IM-NEXT:    ld a6, 32(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a6, a0, a6
+; RV64IM-NEXT:    ld t1, 24(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul t1, a0, t1
+; RV64IM-NEXT:    ld t6, 16(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul t6, a0, t6
+; RV64IM-NEXT:    ld a0, 336(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s8, 328(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a0, a0, s8
+; RV64IM-NEXT:    ld s8, 320(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor s9, s8, s9
+; RV64IM-NEXT:    xor a5, a5, s7
+; RV64IM-NEXT:    ld s7, 312(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor s2, s7, s2
+; RV64IM-NEXT:    ld s7, 304(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor s0, s7, s0
+; RV64IM-NEXT:    ld s7, 296(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t2, s7, t2
+; RV64IM-NEXT:    xor a7, a7, t4
+; RV64IM-NEXT:    xor t4, s1, s4
+; RV64IM-NEXT:    xor s1, s10, s11
+; RV64IM-NEXT:    xor a1, a2, a1
+; RV64IM-NEXT:    xor a0, a0, s9
+; RV64IM-NEXT:    ld a2, 192(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a2, a5, a2
+; RV64IM-NEXT:    ld a5, 176(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a5, s2, a5
+; RV64IM-NEXT:    ld s2, 8(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor s0, s0, s2
+; RV64IM-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t2, t2, s2
+; RV64IM-NEXT:    xor a7, a7, t5
+; RV64IM-NEXT:    xor t4, t4, s5
+; RV64IM-NEXT:    xor t5, s1, ra
+; RV64IM-NEXT:    xor a1, a1, a3
+; RV64IM-NEXT:    xor a0, a0, a2
+; RV64IM-NEXT:    ld a2, 216(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a2, a5, a2
+; RV64IM-NEXT:    ld a3, 208(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a3, s0, a3
+; RV64IM-NEXT:    ld a5, 200(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a5, t2, a5
+; RV64IM-NEXT:    ld t2, 120(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a7, a7, t2
+; RV64IM-NEXT:    xor t2, t4, s6
+; RV64IM-NEXT:    xor t0, t5, t0
+; RV64IM-NEXT:    xor a1, a1, a4
+; RV64IM-NEXT:    xor a0, a0, a2
+; RV64IM-NEXT:    ld a2, 272(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a2, a3, a2
+; RV64IM-NEXT:    ld a3, 256(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a3, a5, a3
+; RV64IM-NEXT:    ld a4, 224(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a4, a7, a4
+; RV64IM-NEXT:    ld a5, 112(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a5, t2, a5
+; RV64IM-NEXT:    xor a7, t0, t3
+; RV64IM-NEXT:    xor a1, a1, a6
+; RV64IM-NEXT:    xor a0, a0, a2
+; RV64IM-NEXT:    ld a2, 280(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a2, a3, a2
+; RV64IM-NEXT:    ld a3, 240(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a3, a4, a3
+; RV64IM-NEXT:    ld a4, 168(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a4, a5, a4
+; RV64IM-NEXT:    xor a5, a7, s3
+; RV64IM-NEXT:    xor a1, a1, t1
+; RV64IM-NEXT:    xor a0, a0, a2
+; RV64IM-NEXT:    ld a2, 264(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a2, a3, a2
+; RV64IM-NEXT:    ld a3, 232(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a3, a4, a3
+; RV64IM-NEXT:    ld a4, 152(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a4, a5, a4
+; RV64IM-NEXT:    xor a1, a1, t6
+; RV64IM-NEXT:    xor a0, a0, a2
+; RV64IM-NEXT:    ld a2, 248(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a2, a3, a2
+; RV64IM-NEXT:    ld a3, 160(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a3, a4, a3
+; RV64IM-NEXT:    xor a0, a0, a2
+; RV64IM-NEXT:    ld a2, 184(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a2, a3, a2
+; RV64IM-NEXT:    xor a0, a0, a2
+; RV64IM-NEXT:    ld a2, 288(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a1, a1, a2
+; RV64IM-NEXT:    xor a0, a0, a1
+; RV64IM-NEXT:    ld ra, 440(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s0, 432(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s1, 424(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s2, 416(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s3, 408(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s4, 400(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s5, 392(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s6, 384(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s7, 376(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s8, 368(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s9, 360(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s10, 352(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s11, 344(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    addi sp, sp, 448
+; RV64IM-NEXT:    ret
+  %res = call i8 @llvm.clmul.i8(i8 %a, i8 %b)
+  ret i8 %res
+}
+
+define i16 @clmul_i16(i16 %a, i16 %b) nounwind {
+; RV32IM-LABEL: clmul_i16:
+; RV32IM:       # %bb.0:
+; RV32IM-NEXT:    addi sp, sp, -48
+; RV32IM-NEXT:    sw s0, 44(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s1, 40(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s2, 36(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s3, 32(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s4, 28(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s5, 24(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s6, 20(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s7, 16(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s8, 12(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s9, 8(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s10, 4(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    andi t6, a1, 2
+; RV32IM-NEXT:    andi s1, a1, 1
+; RV32IM-NEXT:    andi a7, a1, 4
+; RV32IM-NEXT:    andi t2, a1, 8
+; RV32IM-NEXT:    andi t0, a1, 16
+; RV32IM-NEXT:    andi t3, a1, 32
+; RV32IM-NEXT:    andi a2, a1, 64
+; RV32IM-NEXT:    andi t4, a1, 128
+; RV32IM-NEXT:    andi s0, a1, 256
+; RV32IM-NEXT:    andi a3, a1, 512
+; RV32IM-NEXT:    li a4, 1
+; RV32IM-NEXT:    lui a5, 1
+; RV32IM-NEXT:    lui a6, 2
+; RV32IM-NEXT:    lui t1, 4
+; RV32IM-NEXT:    lui t5, 8
+; RV32IM-NEXT:    lui s2, 16
+; RV32IM-NEXT:    lui s3, 32
+; RV32IM-NEXT:    lui s4, 64
+; RV32IM-NEXT:    lui s5, 128
+; RV32IM-NEXT:    lui s6, 256
+; RV32IM-NEXT:    lui s7, 512
+; RV32IM-NEXT:    lui s8, 1024
+; RV32IM-NEXT:    lui s9, 2048
+; RV32IM-NEXT:    lui s10, 4096
+; RV32IM-NEXT:    mul t6, a0, t6
+; RV32IM-NEXT:    mul s1, a0, s1
+; RV32IM-NEXT:    xor t6, s1, t6
+; RV32IM-NEXT:    lui s1, 8192
+; RV32IM-NEXT:    mul a7, a0, a7
+; RV32IM-NEXT:    mul t2, a0, t2
+; RV32IM-NEXT:    xor a7, a7, t2
+; RV32IM-NEXT:    lui t2, 16384
+; RV32IM-NEXT:    mul t0, a0, t0
+; RV32IM-NEXT:    mul t3, a0, t3
+; RV32IM-NEXT:    xor t0, t0, t3
+; RV32IM-NEXT:    lui t3, 32768
+; RV32IM-NEXT:    mul t4, a0, t4
+; RV32IM-NEXT:    mul s0, a0, s0
+; RV32IM-NEXT:    xor t4, t4, s0
+; RV32IM-NEXT:    lui s0, 65536
+; RV32IM-NEXT:    xor a7, t6, a7
+; RV32IM-NEXT:    lui t6, 131072
+; RV32IM-NEXT:    mul a2, a0, a2
+; RV32IM-NEXT:    xor a2, t0, a2
+; RV32IM-NEXT:    lui t0, 262144
+; RV32IM-NEXT:    mul a3, a0, a3
+; RV32IM-NEXT:    xor a3, t4, a3
+; RV32IM-NEXT:    lui t4, 524288
+; RV32IM-NEXT:    slli a4, a4, 11
+; RV32IM-NEXT:    and a5, a1, a5
+; RV32IM-NEXT:    and a6, a1, a6
+; RV32IM-NEXT:    and t1, a1, t1
+; RV32IM-NEXT:    and t5, a1, t5
+; RV32IM-NEXT:    and s2, a1, s2
+; RV32IM-NEXT:    and s3, a1, s3
+; RV32IM-NEXT:    and s4, a1, s4
+; RV32IM-NEXT:    and s5, a1, s5
+; RV32IM-NEXT:    and s6, a1, s6
+; RV32IM-NEXT:    and s7, a1, s7
+; RV32IM-NEXT:    and s8, a1, s8
+; RV32IM-NEXT:    and s9, a1, s9
+; RV32IM-NEXT:    and s10, a1, s10
+; RV32IM-NEXT:    and s1, a1, s1
+; RV32IM-NEXT:    and t2, a1, t2
+; RV32IM-NEXT:    and t3, a1, t3
+; RV32IM-NEXT:    and s0, a1, s0
+; RV32IM-NEXT:    and t6, a1, t6
+; RV32IM-NEXT:    and t0, a1, t0
+; RV32IM-NEXT:    and t4, a1, t4
+; RV32IM-NEXT:    and a4, a1, a4
+; RV32IM-NEXT:    andi a1, a1, 1024
+; RV32IM-NEXT:    mul a1, a0, a1
+; RV32IM-NEXT:    mul a5, a0, a5
+; RV32IM-NEXT:    mul a6, a0, a6
+; RV32IM-NEXT:    mul t1, a0, t1
+; RV32IM-NEXT:    mul t5, a0, t5
+; RV32IM-NEXT:    mul s2, a0, s2
+; RV32IM-NEXT:    mul s3, a0, s3
+; RV32IM-NEXT:    mul s4, a0, s4
+; RV32IM-NEXT:    mul s5, a0, s5
+; RV32IM-NEXT:    mul s6, a0, s6
+; RV32IM-NEXT:    mul s7, a0, s7
+; RV32IM-NEXT:    mul s8, a0, s8
+; RV32IM-NEXT:    mul s9, a0, s9
+; RV32IM-NEXT:    mul s10, a0, s10
+; RV32IM-NEXT:    mul s1, a0, s1
+; RV32IM-NEXT:    mul t2, a0, t2
+; RV32IM-NEXT:    mul t3, a0, t3
+; RV32IM-NEXT:    mul s0, a0, s0
+; RV32IM-NEXT:    mul t6, a0, t6
+; RV32IM-NEXT:    mul t0, a0, t0
+; RV32IM-NEXT:    mul t4, a0, t4
+; RV32IM-NEXT:    mul a0, a0, a4
+; RV32IM-NEXT:    xor a4, t1, t5
+; RV32IM-NEXT:    xor t1, s5, s6
+; RV32IM-NEXT:    xor t2, s1, t2
+; RV32IM-NEXT:    xor a2, a7, a2
+; RV32IM-NEXT:    xor a1, a3, a1
+; RV32IM-NEXT:    xor a0, a0, a5
+; RV32IM-NEXT:    xor a3, a4, s2
+; RV32IM-NEXT:    xor a4, t1, s7
+; RV32IM-NEXT:    xor a5, t2, t3
+; RV32IM-NEXT:    xor a1, a2, a1
+; RV32IM-NEXT:    xor a0, a0, a6
+; RV32IM-NEXT:    xor a2, a3, s3
+; RV32IM-NEXT:    xor a3, a4, s8
+; RV32IM-NEXT:    xor a5, a5, s0
+; RV32IM-NEXT:    xor a0, a1, a0
+; RV32IM-NEXT:    xor a1, a2, s4
+; RV32IM-NEXT:    xor a2, a3, s9
+; RV32IM-NEXT:    xor a3, a5, t6
+; RV32IM-NEXT:    xor a0, a0, a1
+; RV32IM-NEXT:    xor a1, a2, s10
+; RV32IM-NEXT:    xor a2, a3, t0
+; RV32IM-NEXT:    xor a0, a0, a1
+; RV32IM-NEXT:    xor a1, a2, t4
+; RV32IM-NEXT:    xor a0, a0, a1
+; RV32IM-NEXT:    lw s0, 44(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s1, 40(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s2, 36(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s3, 32(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s4, 28(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s5, 24(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s6, 20(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s7, 16(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s8, 12(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s9, 8(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s10, 4(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    addi sp, sp, 48
+; RV32IM-NEXT:    ret
+;
+; RV64IM-LABEL: clmul_i16:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    addi sp, sp, -448
+; RV64IM-NEXT:    sd ra, 440(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s0, 432(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s1, 424(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s2, 416(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s3, 408(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s4, 400(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s5, 392(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s6, 384(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s7, 376(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s8, 368(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s9, 360(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s10, 352(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s11, 344(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    andi t2, a1, 2
+; RV64IM-NEXT:    andi t4, a1, 1
+; RV64IM-NEXT:    andi a6, a1, 4
+; RV64IM-NEXT:    andi t0, a1, 8
+; RV64IM-NEXT:    andi a5, a1, 16
+; RV64IM-NEXT:    andi a7, a1, 32
+; RV64IM-NEXT:    andi a3, a1, 64
+; RV64IM-NEXT:    andi t1, a1, 128
+; RV64IM-NEXT:    andi t3, a1, 256
+; RV64IM-NEXT:    andi a4, a1, 512
+; RV64IM-NEXT:    li a2, 1
+; RV64IM-NEXT:    lui s7, 1
+; RV64IM-NEXT:    lui t6, 2
+; RV64IM-NEXT:    lui s0, 4
+; RV64IM-NEXT:    lui s1, 8
+; RV64IM-NEXT:    lui s2, 16
+; RV64IM-NEXT:    lui s3, 32
+; RV64IM-NEXT:    lui s4, 64
+; RV64IM-NEXT:    lui s5, 128
+; RV64IM-NEXT:    lui s6, 256
+; RV64IM-NEXT:    lui s8, 512
+; RV64IM-NEXT:    lui s9, 1024
+; RV64IM-NEXT:    lui s10, 2048
+; RV64IM-NEXT:    lui s11, 4096
+; RV64IM-NEXT:    lui ra, 8192
+; RV64IM-NEXT:    lui t5, 16384
+; RV64IM-NEXT:    mul t2, a0, t2
+; RV64IM-NEXT:    mul t4, a0, t4
+; RV64IM-NEXT:    xor t2, t4, t2
+; RV64IM-NEXT:    lui t4, 32768
+; RV64IM-NEXT:    mul a6, a0, a6
+; RV64IM-NEXT:    mul t0, a0, t0
+; RV64IM-NEXT:    xor a6, a6, t0
+; RV64IM-NEXT:    lui t0, 65536
+; RV64IM-NEXT:    mul a5, a0, a5
+; RV64IM-NEXT:    mul a7, a0, a7
+; RV64IM-NEXT:    xor a5, a5, a7
+; RV64IM-NEXT:    lui a7, 131072
+; RV64IM-NEXT:    mul t1, a0, t1
+; RV64IM-NEXT:    mul t3, a0, t3
+; RV64IM-NEXT:    xor t1, t1, t3
+; RV64IM-NEXT:    lui t3, 262144
+; RV64IM-NEXT:    mul a3, a0, a3
+; RV64IM-NEXT:    mul a4, a0, a4
+; RV64IM-NEXT:    xor a6, t2, a6
+; RV64IM-NEXT:    sd a6, 336(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a6, a2, 11
+; RV64IM-NEXT:    sd a6, 216(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and s7, a1, s7
+; RV64IM-NEXT:    and a6, a1, t6
+; RV64IM-NEXT:    sd a6, 288(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and s0, a1, s0
+; RV64IM-NEXT:    and s1, a1, s1
+; RV64IM-NEXT:    and s2, a1, s2
+; RV64IM-NEXT:    and s3, a1, s3
+; RV64IM-NEXT:    and a6, a1, s4
+; RV64IM-NEXT:    sd a6, 280(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a6, a1, s5
+; RV64IM-NEXT:    and t2, a1, s6
+; RV64IM-NEXT:    and s8, a1, s8
+; RV64IM-NEXT:    and t6, a1, s9
+; RV64IM-NEXT:    sd t6, 272(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and t6, a1, s10
+; RV64IM-NEXT:    sd t6, 264(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and t6, a1, s11
+; RV64IM-NEXT:    sd t6, 256(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and t6, a1, ra
+; RV64IM-NEXT:    and t5, a1, t5
+; RV64IM-NEXT:    and t4, a1, t4
+; RV64IM-NEXT:    sd t4, 248(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and t0, a1, t0
+; RV64IM-NEXT:    sd t0, 240(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a7, a1, a7
+; RV64IM-NEXT:    sd a7, 232(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a7, a1, t3
+; RV64IM-NEXT:    sd a7, 224(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    xor a3, a5, a3
+; RV64IM-NEXT:    sd a3, 328(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli t4, a2, 32
+; RV64IM-NEXT:    xor a3, t1, a4
+; RV64IM-NEXT:    sd a3, 320(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli s4, a2, 33
+; RV64IM-NEXT:    mul a3, a0, s0
+; RV64IM-NEXT:    mul a4, a0, s1
+; RV64IM-NEXT:    xor a3, a3, a4
+; RV64IM-NEXT:    sd a3, 312(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli s0, a2, 34
+; RV64IM-NEXT:    mul a3, a0, a6
+; RV64IM-NEXT:    mul a4, a0, t2
+; RV64IM-NEXT:    xor a3, a3, a4
+; RV64IM-NEXT:    sd a3, 304(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli s1, a2, 35
+; RV64IM-NEXT:    mul a3, a0, t6
+; RV64IM-NEXT:    mul a4, a0, t5
+; RV64IM-NEXT:    xor a3, a3, a4
+; RV64IM-NEXT:    sd a3, 296(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli t5, a2, 36
+; RV64IM-NEXT:    slli t6, a2, 37
+; RV64IM-NEXT:    slli s5, a2, 38
+; RV64IM-NEXT:    slli s6, a2, 39
+; RV64IM-NEXT:    slli s9, a2, 40
+; RV64IM-NEXT:    slli s10, a2, 41
+; RV64IM-NEXT:    slli s11, a2, 42
+; RV64IM-NEXT:    slli ra, a2, 43
+; RV64IM-NEXT:    slli a3, a2, 44
+; RV64IM-NEXT:    sd a3, 208(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a3, a2, 45
+; RV64IM-NEXT:    sd a3, 200(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a3, a2, 46
+; RV64IM-NEXT:    sd a3, 192(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a3, a2, 47
+; RV64IM-NEXT:    sd a3, 184(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a3, a2, 48
+; RV64IM-NEXT:    sd a3, 176(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a3, a2, 49
+; RV64IM-NEXT:    sd a3, 168(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a3, a2, 50
+; RV64IM-NEXT:    sd a3, 160(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a3, a2, 51
+; RV64IM-NEXT:    sd a3, 152(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a3, a2, 52
+; RV64IM-NEXT:    sd a3, 144(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a3, a2, 53
+; RV64IM-NEXT:    sd a3, 136(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a3, a2, 54
+; RV64IM-NEXT:    sd a3, 128(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli t1, a2, 55
+; RV64IM-NEXT:    slli t0, a2, 56
+; RV64IM-NEXT:    slli a7, a2, 57
+; RV64IM-NEXT:    slli a6, a2, 58
+; RV64IM-NEXT:    slli a5, a2, 59
+; RV64IM-NEXT:    slli a4, a2, 60
+; RV64IM-NEXT:    slli a3, a2, 61
+; RV64IM-NEXT:    slli a2, a2, 62
+; RV64IM-NEXT:    ld t2, 216(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and t3, a1, t2
+; RV64IM-NEXT:    and t2, a1, t4
+; RV64IM-NEXT:    sd t2, 120(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and t4, a1, s4
+; RV64IM-NEXT:    and s0, a1, s0
+; RV64IM-NEXT:    sd s0, 112(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and s1, a1, s1
+; RV64IM-NEXT:    sd s1, 104(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and t2, a1, t5
+; RV64IM-NEXT:    sd t2, 96(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and s1, a1, t6
+; RV64IM-NEXT:    and t2, a1, s5
+; RV64IM-NEXT:    sd t2, 88(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and t2, a1, s6
+; RV64IM-NEXT:    sd t2, 80(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and s4, a1, s9
+; RV64IM-NEXT:    and s5, a1, s10
+; RV64IM-NEXT:    and s6, a1, s11
+; RV64IM-NEXT:    and t6, a1, ra
+; RV64IM-NEXT:    ld t2, 208(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and t2, a1, t2
+; RV64IM-NEXT:    sd t2, 72(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld t2, 200(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and t2, a1, t2
+; RV64IM-NEXT:    sd t2, 64(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld t2, 192(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s10, a1, t2
+; RV64IM-NEXT:    ld t2, 184(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s11, a1, t2
+; RV64IM-NEXT:    ld t2, 176(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and ra, a1, t2
+; RV64IM-NEXT:    ld t2, 168(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and t2, a1, t2
+; RV64IM-NEXT:    sd t2, 184(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld t2, 160(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and t2, a1, t2
+; RV64IM-NEXT:    sd t2, 160(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld t2, 152(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and t2, a1, t2
+; RV64IM-NEXT:    sd t2, 152(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld t2, 144(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and t2, a1, t2
+; RV64IM-NEXT:    sd t2, 144(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld t2, 136(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and t2, a1, t2
+; RV64IM-NEXT:    sd t2, 136(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld t2, 128(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and t2, a1, t2
+; RV64IM-NEXT:    sd t2, 128(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and t1, a1, t1
+; RV64IM-NEXT:    and t0, a1, t0
+; RV64IM-NEXT:    sd t0, 56(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a7, a1, a7
+; RV64IM-NEXT:    sd a7, 48(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a6, a1, a6
+; RV64IM-NEXT:    and a5, a1, a5
+; RV64IM-NEXT:    sd a5, 40(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a4, a1, a4
+; RV64IM-NEXT:    sd a4, 32(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a3, a1, a3
+; RV64IM-NEXT:    sd a3, 24(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a2, a1, a2
+; RV64IM-NEXT:    sd a2, 16(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    andi a2, a1, 1024
+; RV64IM-NEXT:    srliw a3, a1, 31
+; RV64IM-NEXT:    srli a1, a1, 63
+; RV64IM-NEXT:    mul s9, a0, a2
+; RV64IM-NEXT:    slli a3, a3, 31
+; RV64IM-NEXT:    slli a1, a1, 63
+; RV64IM-NEXT:    mul s7, a0, s7
+; RV64IM-NEXT:    ld a2, 288(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    sd a2, 192(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul s2, a0, s2
+; RV64IM-NEXT:    mul a2, a0, s3
+; RV64IM-NEXT:    sd a2, 176(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a2, 280(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    sd a2, 216(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul s0, a0, s8
+; RV64IM-NEXT:    ld a2, 272(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    sd a2, 8(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a2, 264(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    sd a2, 208(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a2, 256(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    sd a2, 272(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a2, 248(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul t2, a0, a2
+; RV64IM-NEXT:    ld a2, 240(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    sd a2, 0(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a2, 232(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    sd a2, 200(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a2, 224(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    sd a2, 256(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul a2, a0, a3
+; RV64IM-NEXT:    sd a2, 280(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 288(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul a5, a0, t3
+; RV64IM-NEXT:    ld a1, 120(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a7, a0, a1
+; RV64IM-NEXT:    mul t4, a0, t4
+; RV64IM-NEXT:    ld a1, 112(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul t5, a0, a1
+; RV64IM-NEXT:    ld a1, 104(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 120(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 96(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 224(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul a1, a0, s1
+; RV64IM-NEXT:    sd a1, 240(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 88(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 264(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 80(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul s1, a0, a1
+; RV64IM-NEXT:    mul s4, a0, s4
+; RV64IM-NEXT:    mul s5, a0, s5
+; RV64IM-NEXT:    mul s6, a0, s6
+; RV64IM-NEXT:    mul a1, a0, t6
+; RV64IM-NEXT:    sd a1, 112(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 72(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 168(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 64(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 232(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul a1, a0, s10
+; RV64IM-NEXT:    sd a1, 248(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul s10, a0, s11
+; RV64IM-NEXT:    mul s11, a0, ra
+; RV64IM-NEXT:    ld a1, 184(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul ra, a0, a1
+; RV64IM-NEXT:    ld a1, 160(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul t0, a0, a1
+; RV64IM-NEXT:    ld a1, 152(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul t3, a0, a1
+; RV64IM-NEXT:    ld a1, 144(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul s3, a0, a1
+; RV64IM-NEXT:    ld a1, 136(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 152(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 128(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 160(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul a1, a0, t1
+; RV64IM-NEXT:    sd a1, 184(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 56(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a2, a0, a1
+; RV64IM-NEXT:    ld a1, 48(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    mul a3, a0, a6
+; RV64IM-NEXT:    ld a4, 40(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a4, a0, a4
+; RV64IM-NEXT:    ld a6, 32(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a6, a0, a6
+; RV64IM-NEXT:    ld t1, 24(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul t1, a0, t1
+; RV64IM-NEXT:    ld t6, 16(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul t6, a0, t6
+; RV64IM-NEXT:    ld a0, 336(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s8, 328(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a0, a0, s8
+; RV64IM-NEXT:    ld s8, 320(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor s9, s8, s9
+; RV64IM-NEXT:    xor a5, a5, s7
+; RV64IM-NEXT:    ld s7, 312(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor s2, s7, s2
+; RV64IM-NEXT:    ld s7, 304(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor s0, s7, s0
+; RV64IM-NEXT:    ld s7, 296(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t2, s7, t2
+; RV64IM-NEXT:    xor a7, a7, t4
+; RV64IM-NEXT:    xor t4, s1, s4
+; RV64IM-NEXT:    xor s1, s10, s11
+; RV64IM-NEXT:    xor a1, a2, a1
+; RV64IM-NEXT:    xor a0, a0, s9
+; RV64IM-NEXT:    ld a2, 192(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a2, a5, a2
+; RV64IM-NEXT:    ld a5, 176(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a5, s2, a5
+; RV64IM-NEXT:    ld s2, 8(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor s0, s0, s2
+; RV64IM-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t2, t2, s2
+; RV64IM-NEXT:    xor a7, a7, t5
+; RV64IM-NEXT:    xor t4, t4, s5
+; RV64IM-NEXT:    xor t5, s1, ra
+; RV64IM-NEXT:    xor a1, a1, a3
+; RV64IM-NEXT:    xor a0, a0, a2
+; RV64IM-NEXT:    ld a2, 216(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a2, a5, a2
+; RV64IM-NEXT:    ld a3, 208(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a3, s0, a3
+; RV64IM-NEXT:    ld a5, 200(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a5, t2, a5
+; RV64IM-NEXT:    ld t2, 120(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a7, a7, t2
+; RV64IM-NEXT:    xor t2, t4, s6
+; RV64IM-NEXT:    xor t0, t5, t0
+; RV64IM-NEXT:    xor a1, a1, a4
+; RV64IM-NEXT:    xor a0, a0, a2
+; RV64IM-NEXT:    ld a2, 272(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a2, a3, a2
+; RV64IM-NEXT:    ld a3, 256(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a3, a5, a3
+; RV64IM-NEXT:    ld a4, 224(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a4, a7, a4
+; RV64IM-NEXT:    ld a5, 112(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a5, t2, a5
+; RV64IM-NEXT:    xor a7, t0, t3
+; RV64IM-NEXT:    xor a1, a1, a6
+; RV64IM-NEXT:    xor a0, a0, a2
+; RV64IM-NEXT:    ld a2, 280(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a2, a3, a2
+; RV64IM-NEXT:    ld a3, 240(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a3, a4, a3
+; RV64IM-NEXT:    ld a4, 168(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a4, a5, a4
+; RV64IM-NEXT:    xor a5, a7, s3
+; RV64IM-NEXT:    xor a1, a1, t1
+; RV64IM-NEXT:    xor a0, a0, a2
+; RV64IM-NEXT:    ld a2, 264(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a2, a3, a2
+; RV64IM-NEXT:    ld a3, 232(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a3, a4, a3
+; RV64IM-NEXT:    ld a4, 152(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a4, a5, a4
+; RV64IM-NEXT:    xor a1, a1, t6
+; RV64IM-NEXT:    xor a0, a0, a2
+; RV64IM-NEXT:    ld a2, 248(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a2, a3, a2
+; RV64IM-NEXT:    ld a3, 160(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a3, a4, a3
+; RV64IM-NEXT:    xor a0, a0, a2
+; RV64IM-NEXT:    ld a2, 184(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a2, a3, a2
+; RV64IM-NEXT:    xor a0, a0, a2
+; RV64IM-NEXT:    ld a2, 288(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a1, a1, a2
+; RV64IM-NEXT:    xor a0, a0, a1
+; RV64IM-NEXT:    ld ra, 440(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s0, 432(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s1, 424(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s2, 416(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s3, 408(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s4, 400(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s5, 392(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s6, 384(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s7, 376(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s8, 368(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s9, 360(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s10, 352(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s11, 344(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    addi sp, sp, 448
+; RV64IM-NEXT:    ret
+  %res = call i16 @llvm.clmul.i16(i16 %a, i16 %b)
+  ret i16 %res
+}
+
+define i32 @clmul_i32(i32 %a, i32 %b) nounwind {
+; RV32IM-LABEL: clmul_i32:
+; RV32IM:       # %bb.0:
+; RV32IM-NEXT:    addi sp, sp, -48
+; RV32IM-NEXT:    sw s0, 44(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s1, 40(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s2, 36(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s3, 32(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s4, 28(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s5, 24(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s6, 20(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s7, 16(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s8, 12(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s9, 8(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s10, 4(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    andi t6, a1, 2
+; RV32IM-NEXT:    andi s1, a1, 1
+; RV32IM-NEXT:    andi a7, a1, 4
+; RV32IM-NEXT:    andi t2, a1, 8
+; RV32IM-NEXT:    andi t0, a1, 16
+; RV32IM-NEXT:    andi t3, a1, 32
+; RV32IM-NEXT:    andi a2, a1, 64
+; RV32IM-NEXT:    andi t4, a1, 128
+; RV32IM-NEXT:    andi s0, a1, 256
+; RV32IM-NEXT:    andi a3, a1, 512
+; RV32IM-NEXT:    li a4, 1
+; RV32IM-NEXT:    lui a5, 1
+; RV32IM-NEXT:    lui a6, 2
+; RV32IM-NEXT:    lui t1, 4
+; RV32IM-NEXT:    lui t5, 8
+; RV32IM-NEXT:    lui s2, 16
+; RV32IM-NEXT:    lui s3, 32
+; RV32IM-NEXT:    lui s4, 64
+; RV32IM-NEXT:    lui s5, 128
+; RV32IM-NEXT:    lui s6, 256
+; RV32IM-NEXT:    lui s7, 512
+; RV32IM-NEXT:    lui s8, 1024
+; RV32IM-NEXT:    lui s9, 2048
+; RV32IM-NEXT:    lui s10, 4096
+; RV32IM-NEXT:    mul t6, a0, t6
+; RV32IM-NEXT:    mul s1, a0, s1
+; RV32IM-NEXT:    xor t6, s1, t6
+; RV32IM-NEXT:    lui s1, 8192
+; RV32IM-NEXT:    mul a7, a0, a7
+; RV32IM-NEXT:    mul t2, a0, t2
+; RV32IM-NEXT:    xor a7, a7, t2
+; RV32IM-NEXT:    lui t2, 16384
+; RV32IM-NEXT:    mul t0, a0, t0
+; RV32IM-NEXT:    mul t3, a0, t3
+; RV32IM-NEXT:    xor t0, t0, t3
+; RV32IM-NEXT:    lui t3, 32768
+; RV32IM-NEXT:    mul t4, a0, t4
+; RV32IM-NEXT:    mul s0, a0, s0
+; RV32IM-NEXT:    xor t4, t4, s0
+; RV32IM-NEXT:    lui s0, 65536
+; RV32IM-NEXT:    xor a7, t6, a7
+; RV32IM-NEXT:    lui t6, 131072
+; RV32IM-NEXT:    mul a2, a0, a2
+; RV32IM-NEXT:    xor a2, t0, a2
+; RV32IM-NEXT:    lui t0, 262144
+; RV32IM-NEXT:    mul a3, a0, a3
+; RV32IM-NEXT:    xor a3, t4, a3
+; RV32IM-NEXT:    lui t4, 524288
+; RV32IM-NEXT:    slli a4, a4, 11
+; RV32IM-NEXT:    and a5, a1, a5
+; RV32IM-NEXT:    and a6, a1, a6
+; RV32IM-NEXT:    and t1, a1, t1
+; RV32IM-NEXT:    and t5, a1, t5
+; RV32IM-NEXT:    and s2, a1, s2
+; RV32IM-NEXT:    and s3, a1, s3
+; RV32IM-NEXT:    and s4, a1, s4
+; RV32IM-NEXT:    and s5, a1, s5
+; RV32IM-NEXT:    and s6, a1, s6
+; RV32IM-NEXT:    and s7, a1, s7
+; RV32IM-NEXT:    and s8, a1, s8
+; RV32IM-NEXT:    and s9, a1, s9
+; RV32IM-NEXT:    and s10, a1, s10
+; RV32IM-NEXT:    and s1, a1, s1
+; RV32IM-NEXT:    and t2, a1, t2
+; RV32IM-NEXT:    and t3, a1, t3
+; RV32IM-NEXT:    and s0, a1, s0
+; RV32IM-NEXT:    and t6, a1, t6
+; RV32IM-NEXT:    and t0, a1, t0
+; RV32IM-NEXT:    and t4, a1, t4
+; RV32IM-NEXT:    and a4, a1, a4
+; RV32IM-NEXT:    andi a1, a1, 1024
+; RV32IM-NEXT:    mul a1, a0, a1
+; RV32IM-NEXT:    mul a5, a0, a5
+; RV32IM-NEXT:    mul a6, a0, a6
+; RV32IM-NEXT:    mul t1, a0, t1
+; RV32IM-NEXT:    mul t5, a0, t5
+; RV32IM-NEXT:    mul s2, a0, s2
+; RV32IM-NEXT:    mul s3, a0, s3
+; RV32IM-NEXT:    mul s4, a0, s4
+; RV32IM-NEXT:    mul s5, a0, s5
+; RV32IM-NEXT:    mul s6, a0, s6
+; RV32IM-NEXT:    mul s7, a0, s7
+; RV32IM-NEXT:    mul s8, a0, s8
+; RV32IM-NEXT:    mul s9, a0, s9
+; RV32IM-NEXT:    mul s10, a0, s10
+; RV32IM-NEXT:    mul s1, a0, s1
+; RV32IM-NEXT:    mul t2, a0, t2
+; RV32IM-NEXT:    mul t3, a0, t3
+; RV32IM-NEXT:    mul s0, a0, s0
+; RV32IM-NEXT:    mul t6, a0, t6
+; RV32IM-NEXT:    mul t0, a0, t0
+; RV32IM-NEXT:    mul t4, a0, t4
+; RV32IM-NEXT:    mul a0, a0, a4
+; RV32IM-NEXT:    xor a4, t1, t5
+; RV32IM-NEXT:    xor t1, s5, s6
+; RV32IM-NEXT:    xor t2, s1, t2
+; RV32IM-NEXT:    xor a2, a7, a2
+; RV32IM-NEXT:    xor a1, a3, a1
+; RV32IM-NEXT:    xor a0, a0, a5
+; RV32IM-NEXT:    xor a3, a4, s2
+; RV32IM-NEXT:    xor a4, t1, s7
+; RV32IM-NEXT:    xor a5, t2, t3
+; RV32IM-NEXT:    xor a1, a2, a1
+; RV32IM-NEXT:    xor a0, a0, a6
+; RV32IM-NEXT:    xor a2, a3, s3
+; RV32IM-NEXT:    xor a3, a4, s8
+; RV32IM-NEXT:    xor a5, a5, s0
+; RV32IM-NEXT:    xor a0, a1, a0
+; RV32IM-NEXT:    xor a1, a2, s4
+; RV32IM-NEXT:    xor a2, a3, s9
+; RV32IM-NEXT:    xor a3, a5, t6
+; RV32IM-NEXT:    xor a0, a0, a1
+; RV32IM-NEXT:    xor a1, a2, s10
+; RV32IM-NEXT:    xor a2, a3, t0
+; RV32IM-NEXT:    xor a0, a0, a1
+; RV32IM-NEXT:    xor a1, a2, t4
+; RV32IM-NEXT:    xor a0, a0, a1
+; RV32IM-NEXT:    lw s0, 44(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s1, 40(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s2, 36(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s3, 32(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s4, 28(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s5, 24(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s6, 20(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s7, 16(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s8, 12(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s9, 8(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s10, 4(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    addi sp, sp, 48
+; RV32IM-NEXT:    ret
+;
+; RV64IM-LABEL: clmul_i32:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    addi sp, sp, -448
+; RV64IM-NEXT:    sd ra, 440(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s0, 432(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s1, 424(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s2, 416(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s3, 408(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s4, 400(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s5, 392(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s6, 384(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s7, 376(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s8, 368(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s9, 360(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s10, 352(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s11, 344(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    andi t2, a1, 2
+; RV64IM-NEXT:    andi t4, a1, 1
+; RV64IM-NEXT:    andi a6, a1, 4
+; RV64IM-NEXT:    andi t0, a1, 8
+; RV64IM-NEXT:    andi a5, a1, 16
+; RV64IM-NEXT:    andi a7, a1, 32
+; RV64IM-NEXT:    andi a3, a1, 64
+; RV64IM-NEXT:    andi t1, a1, 128
+; RV64IM-NEXT:    andi t3, a1, 256
+; RV64IM-NEXT:    andi a4, a1, 512
+; RV64IM-NEXT:    li a2, 1
+; RV64IM-NEXT:    lui s7, 1
+; RV64IM-NEXT:    lui t6, 2
+; RV64IM-NEXT:    lui s0, 4
+; RV64IM-NEXT:    lui s1, 8
+; RV64IM-NEXT:    lui s2, 16
+; RV64IM-NEXT:    lui s3, 32
+; RV64IM-NEXT:    lui s4, 64
+; RV64IM-NEXT:    lui s5, 128
+; RV64IM-NEXT:    lui s6, 256
+; RV64IM-NEXT:    lui s8, 512
+; RV64IM-NEXT:    lui s9, 1024
+; RV64IM-NEXT:    lui s10, 2048
+; RV64IM-NEXT:    lui s11, 4096
+; RV64IM-NEXT:    lui ra, 8192
+; RV64IM-NEXT:    lui t5, 16384
+; RV64IM-NEXT:    mul t2, a0, t2
+; RV64IM-NEXT:    mul t4, a0, t4
+; RV64IM-NEXT:    xor t2, t4, t2
+; RV64IM-NEXT:    lui t4, 32768
+; RV64IM-NEXT:    mul a6, a0, a6
+; RV64IM-NEXT:    mul t0, a0, t0
+; RV64IM-NEXT:    xor a6, a6, t0
+; RV64IM-NEXT:    lui t0, 65536
+; RV64IM-NEXT:    mul a5, a0, a5
+; RV64IM-NEXT:    mul a7, a0, a7
+; RV64IM-NEXT:    xor a5, a5, a7
+; RV64IM-NEXT:    lui a7, 131072
+; RV64IM-NEXT:    mul t1, a0, t1
+; RV64IM-NEXT:    mul t3, a0, t3
+; RV64IM-NEXT:    xor t1, t1, t3
+; RV64IM-NEXT:    lui t3, 262144
+; RV64IM-NEXT:    mul a3, a0, a3
+; RV64IM-NEXT:    mul a4, a0, a4
+; RV64IM-NEXT:    xor a6, t2, a6
+; RV64IM-NEXT:    sd a6, 336(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a6, a2, 11
+; RV64IM-NEXT:    sd a6, 216(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and s7, a1, s7
+; RV64IM-NEXT:    and a6, a1, t6
+; RV64IM-NEXT:    sd a6, 288(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and s0, a1, s0
+; RV64IM-NEXT:    and s1, a1, s1
+; RV64IM-NEXT:    and s2, a1, s2
+; RV64IM-NEXT:    and s3, a1, s3
+; RV64IM-NEXT:    and a6, a1, s4
+; RV64IM-NEXT:    sd a6, 280(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a6, a1, s5
+; RV64IM-NEXT:    and t2, a1, s6
+; RV64IM-NEXT:    and s8, a1, s8
+; RV64IM-NEXT:    and t6, a1, s9
+; RV64IM-NEXT:    sd t6, 272(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and t6, a1, s10
+; RV64IM-NEXT:    sd t6, 264(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and t6, a1, s11
+; RV64IM-NEXT:    sd t6, 256(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and t6, a1, ra
+; RV64IM-NEXT:    and t5, a1, t5
+; RV64IM-NEXT:    and t4, a1, t4
+; RV64IM-NEXT:    sd t4, 248(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and t0, a1, t0
+; RV64IM-NEXT:    sd t0, 240(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a7, a1, a7
+; RV64IM-NEXT:    sd a7, 232(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a7, a1, t3
+; RV64IM-NEXT:    sd a7, 224(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    xor a3, a5, a3
+; RV64IM-NEXT:    sd a3, 328(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli t4, a2, 32
+; RV64IM-NEXT:    xor a3, t1, a4
+; RV64IM-NEXT:    sd a3, 320(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli s4, a2, 33
+; RV64IM-NEXT:    mul a3, a0, s0
+; RV64IM-NEXT:    mul a4, a0, s1
+; RV64IM-NEXT:    xor a3, a3, a4
+; RV64IM-NEXT:    sd a3, 312(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli s0, a2, 34
+; RV64IM-NEXT:    mul a3, a0, a6
+; RV64IM-NEXT:    mul a4, a0, t2
+; RV64IM-NEXT:    xor a3, a3, a4
+; RV64IM-NEXT:    sd a3, 304(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli s1, a2, 35
+; RV64IM-NEXT:    mul a3, a0, t6
+; RV64IM-NEXT:    mul a4, a0, t5
+; RV64IM-NEXT:    xor a3, a3, a4
+; RV64IM-NEXT:    sd a3, 296(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli t5, a2, 36
+; RV64IM-NEXT:    slli t6, a2, 37
+; RV64IM-NEXT:    slli s5, a2, 38
+; RV64IM-NEXT:    slli s6, a2, 39
+; RV64IM-NEXT:    slli s9, a2, 40
+; RV64IM-NEXT:    slli s10, a2, 41
+; RV64IM-NEXT:    slli s11, a2, 42
+; RV64IM-NEXT:    slli ra, a2, 43
+; RV64IM-NEXT:    slli a3, a2, 44
+; RV64IM-NEXT:    sd a3, 208(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a3, a2, 45
+; RV64IM-NEXT:    sd a3, 200(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a3, a2, 46
+; RV64IM-NEXT:    sd a3, 192(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a3, a2, 47
+; RV64IM-NEXT:    sd a3, 184(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a3, a2, 48
+; RV64IM-NEXT:    sd a3, 176(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a3, a2, 49
+; RV64IM-NEXT:    sd a3, 168(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a3, a2, 50
+; RV64IM-NEXT:    sd a3, 160(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a3, a2, 51
+; RV64IM-NEXT:    sd a3, 152(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a3, a2, 52
+; RV64IM-NEXT:    sd a3, 144(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a3, a2, 53
+; RV64IM-NEXT:    sd a3, 136(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a3, a2, 54
+; RV64IM-NEXT:    sd a3, 128(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli t1, a2, 55
+; RV64IM-NEXT:    slli t0, a2, 56
+; RV64IM-NEXT:    slli a7, a2, 57
+; RV64IM-NEXT:    slli a6, a2, 58
+; RV64IM-NEXT:    slli a5, a2, 59
+; RV64IM-NEXT:    slli a4, a2, 60
+; RV64IM-NEXT:    slli a3, a2, 61
+; RV64IM-NEXT:    slli a2, a2, 62
+; RV64IM-NEXT:    ld t2, 216(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and t3, a1, t2
+; RV64IM-NEXT:    and t2, a1, t4
+; RV64IM-NEXT:    sd t2, 120(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and t4, a1, s4
+; RV64IM-NEXT:    and s0, a1, s0
+; RV64IM-NEXT:    sd s0, 112(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and s1, a1, s1
+; RV64IM-NEXT:    sd s1, 104(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and t2, a1, t5
+; RV64IM-NEXT:    sd t2, 96(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and s1, a1, t6
+; RV64IM-NEXT:    and t2, a1, s5
+; RV64IM-NEXT:    sd t2, 88(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and t2, a1, s6
+; RV64IM-NEXT:    sd t2, 80(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and s4, a1, s9
+; RV64IM-NEXT:    and s5, a1, s10
+; RV64IM-NEXT:    and s6, a1, s11
+; RV64IM-NEXT:    and t6, a1, ra
+; RV64IM-NEXT:    ld t2, 208(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and t2, a1, t2
+; RV64IM-NEXT:    sd t2, 72(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld t2, 200(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and t2, a1, t2
+; RV64IM-NEXT:    sd t2, 64(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld t2, 192(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s10, a1, t2
+; RV64IM-NEXT:    ld t2, 184(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s11, a1, t2
+; RV64IM-NEXT:    ld t2, 176(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and ra, a1, t2
+; RV64IM-NEXT:    ld t2, 168(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and t2, a1, t2
+; RV64IM-NEXT:    sd t2, 184(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld t2, 160(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and t2, a1, t2
+; RV64IM-NEXT:    sd t2, 160(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld t2, 152(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and t2, a1, t2
+; RV64IM-NEXT:    sd t2, 152(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld t2, 144(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and t2, a1, t2
+; RV64IM-NEXT:    sd t2, 144(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld t2, 136(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and t2, a1, t2
+; RV64IM-NEXT:    sd t2, 136(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld t2, 128(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and t2, a1, t2
+; RV64IM-NEXT:    sd t2, 128(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and t1, a1, t1
+; RV64IM-NEXT:    and t0, a1, t0
+; RV64IM-NEXT:    sd t0, 56(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a7, a1, a7
+; RV64IM-NEXT:    sd a7, 48(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a6, a1, a6
+; RV64IM-NEXT:    and a5, a1, a5
+; RV64IM-NEXT:    sd a5, 40(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a4, a1, a4
+; RV64IM-NEXT:    sd a4, 32(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a3, a1, a3
+; RV64IM-NEXT:    sd a3, 24(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a2, a1, a2
+; RV64IM-NEXT:    sd a2, 16(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    andi a2, a1, 1024
+; RV64IM-NEXT:    srliw a3, a1, 31
+; RV64IM-NEXT:    srli a1, a1, 63
+; RV64IM-NEXT:    mul s9, a0, a2
+; RV64IM-NEXT:    slli a3, a3, 31
+; RV64IM-NEXT:    slli a1, a1, 63
+; RV64IM-NEXT:    mul s7, a0, s7
+; RV64IM-NEXT:    ld a2, 288(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    sd a2, 192(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul s2, a0, s2
+; RV64IM-NEXT:    mul a2, a0, s3
+; RV64IM-NEXT:    sd a2, 176(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a2, 280(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    sd a2, 216(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul s0, a0, s8
+; RV64IM-NEXT:    ld a2, 272(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    sd a2, 8(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a2, 264(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    sd a2, 208(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a2, 256(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    sd a2, 272(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a2, 248(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul t2, a0, a2
+; RV64IM-NEXT:    ld a2, 240(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    sd a2, 0(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a2, 232(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    sd a2, 200(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a2, 224(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    sd a2, 256(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul a2, a0, a3
+; RV64IM-NEXT:    sd a2, 280(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 288(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul a5, a0, t3
+; RV64IM-NEXT:    ld a1, 120(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a7, a0, a1
+; RV64IM-NEXT:    mul t4, a0, t4
+; RV64IM-NEXT:    ld a1, 112(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul t5, a0, a1
+; RV64IM-NEXT:    ld a1, 104(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 120(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 96(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 224(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul a1, a0, s1
+; RV64IM-NEXT:    sd a1, 240(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 88(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 264(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 80(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul s1, a0, a1
+; RV64IM-NEXT:    mul s4, a0, s4
+; RV64IM-NEXT:    mul s5, a0, s5
+; RV64IM-NEXT:    mul s6, a0, s6
+; RV64IM-NEXT:    mul a1, a0, t6
+; RV64IM-NEXT:    sd a1, 112(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 72(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 168(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 64(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 232(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul a1, a0, s10
+; RV64IM-NEXT:    sd a1, 248(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul s10, a0, s11
+; RV64IM-NEXT:    mul s11, a0, ra
+; RV64IM-NEXT:    ld a1, 184(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul ra, a0, a1
+; RV64IM-NEXT:    ld a1, 160(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul t0, a0, a1
+; RV64IM-NEXT:    ld a1, 152(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul t3, a0, a1
+; RV64IM-NEXT:    ld a1, 144(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul s3, a0, a1
+; RV64IM-NEXT:    ld a1, 136(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 152(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 128(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 160(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul a1, a0, t1
+; RV64IM-NEXT:    sd a1, 184(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 56(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a2, a0, a1
+; RV64IM-NEXT:    ld a1, 48(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    mul a3, a0, a6
+; RV64IM-NEXT:    ld a4, 40(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a4, a0, a4
+; RV64IM-NEXT:    ld a6, 32(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a6, a0, a6
+; RV64IM-NEXT:    ld t1, 24(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul t1, a0, t1
+; RV64IM-NEXT:    ld t6, 16(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul t6, a0, t6
+; RV64IM-NEXT:    ld a0, 336(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s8, 328(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a0, a0, s8
+; RV64IM-NEXT:    ld s8, 320(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor s9, s8, s9
+; RV64IM-NEXT:    xor a5, a5, s7
+; RV64IM-NEXT:    ld s7, 312(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor s2, s7, s2
+; RV64IM-NEXT:    ld s7, 304(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor s0, s7, s0
+; RV64IM-NEXT:    ld s7, 296(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t2, s7, t2
+; RV64IM-NEXT:    xor a7, a7, t4
+; RV64IM-NEXT:    xor t4, s1, s4
+; RV64IM-NEXT:    xor s1, s10, s11
+; RV64IM-NEXT:    xor a1, a2, a1
+; RV64IM-NEXT:    xor a0, a0, s9
+; RV64IM-NEXT:    ld a2, 192(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a2, a5, a2
+; RV64IM-NEXT:    ld a5, 176(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a5, s2, a5
+; RV64IM-NEXT:    ld s2, 8(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor s0, s0, s2
+; RV64IM-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t2, t2, s2
+; RV64IM-NEXT:    xor a7, a7, t5
+; RV64IM-NEXT:    xor t4, t4, s5
+; RV64IM-NEXT:    xor t5, s1, ra
+; RV64IM-NEXT:    xor a1, a1, a3
+; RV64IM-NEXT:    xor a0, a0, a2
+; RV64IM-NEXT:    ld a2, 216(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a2, a5, a2
+; RV64IM-NEXT:    ld a3, 208(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a3, s0, a3
+; RV64IM-NEXT:    ld a5, 200(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a5, t2, a5
+; RV64IM-NEXT:    ld t2, 120(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a7, a7, t2
+; RV64IM-NEXT:    xor t2, t4, s6
+; RV64IM-NEXT:    xor t0, t5, t0
+; RV64IM-NEXT:    xor a1, a1, a4
+; RV64IM-NEXT:    xor a0, a0, a2
+; RV64IM-NEXT:    ld a2, 272(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a2, a3, a2
+; RV64IM-NEXT:    ld a3, 256(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a3, a5, a3
+; RV64IM-NEXT:    ld a4, 224(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a4, a7, a4
+; RV64IM-NEXT:    ld a5, 112(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a5, t2, a5
+; RV64IM-NEXT:    xor a7, t0, t3
+; RV64IM-NEXT:    xor a1, a1, a6
+; RV64IM-NEXT:    xor a0, a0, a2
+; RV64IM-NEXT:    ld a2, 280(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a2, a3, a2
+; RV64IM-NEXT:    ld a3, 240(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a3, a4, a3
+; RV64IM-NEXT:    ld a4, 168(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a4, a5, a4
+; RV64IM-NEXT:    xor a5, a7, s3
+; RV64IM-NEXT:    xor a1, a1, t1
+; RV64IM-NEXT:    xor a0, a0, a2
+; RV64IM-NEXT:    ld a2, 264(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a2, a3, a2
+; RV64IM-NEXT:    ld a3, 232(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a3, a4, a3
+; RV64IM-NEXT:    ld a4, 152(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a4, a5, a4
+; RV64IM-NEXT:    xor a1, a1, t6
+; RV64IM-NEXT:    xor a0, a0, a2
+; RV64IM-NEXT:    ld a2, 248(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a2, a3, a2
+; RV64IM-NEXT:    ld a3, 160(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a3, a4, a3
+; RV64IM-NEXT:    xor a0, a0, a2
+; RV64IM-NEXT:    ld a2, 184(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a2, a3, a2
+; RV64IM-NEXT:    xor a0, a0, a2
+; RV64IM-NEXT:    ld a2, 288(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a1, a1, a2
+; RV64IM-NEXT:    xor a0, a0, a1
+; RV64IM-NEXT:    ld ra, 440(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s0, 432(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s1, 424(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s2, 416(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s3, 408(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s4, 400(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s5, 392(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s6, 384(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s7, 376(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s8, 368(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s9, 360(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s10, 352(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s11, 344(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    addi sp, sp, 448
+; RV64IM-NEXT:    ret
+  %res = call i32 @llvm.clmul.i32(i32 %a, i32 %b)
+  ret i32 %res
+}
+
+define i64 @clmul_i64(i64 %a, i64 %b) nounwind {
+; RV32IM-LABEL: clmul_i64:
+; RV32IM:       # %bb.0:
+; RV32IM-NEXT:    addi sp, sp, -368
+; RV32IM-NEXT:    sw ra, 364(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s0, 360(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s1, 356(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s2, 352(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s3, 348(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s4, 344(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s5, 340(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s6, 336(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s7, 332(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s8, 328(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s9, 324(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s10, 320(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s11, 316(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mv t0, a1
+; RV32IM-NEXT:    andi t3, a2, 4
+; RV32IM-NEXT:    andi t5, a2, 2
+; RV32IM-NEXT:    andi a4, a2, 1
+; RV32IM-NEXT:    andi t6, a2, 8
+; RV32IM-NEXT:    andi s0, a2, 16
+; RV32IM-NEXT:    andi s1, a2, 32
+; RV32IM-NEXT:    andi s9, a2, 64
+; RV32IM-NEXT:    andi t1, a2, 128
+; RV32IM-NEXT:    andi s2, a2, 256
+; RV32IM-NEXT:    andi ra, a2, 512
+; RV32IM-NEXT:    andi s11, a2, 1024
+; RV32IM-NEXT:    andi s4, a3, 1
+; RV32IM-NEXT:    mul a5, a1, t3
+; RV32IM-NEXT:    mulhu a6, a0, t3
+; RV32IM-NEXT:    mul a1, a1, t5
+; RV32IM-NEXT:    mulhu a7, a0, t5
+; RV32IM-NEXT:    mul t2, t0, t6
+; RV32IM-NEXT:    mulhu t4, a0, t6
+; RV32IM-NEXT:    mul s3, t0, s0
+; RV32IM-NEXT:    mulhu s5, a0, s0
+; RV32IM-NEXT:    mul s6, t0, s1
+; RV32IM-NEXT:    mulhu s7, a0, s1
+; RV32IM-NEXT:    sw s9, 296(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul s8, t0, s9
+; RV32IM-NEXT:    or a5, a6, a5
+; RV32IM-NEXT:    mulhu s9, a0, s9
+; RV32IM-NEXT:    or a6, a7, a1
+; RV32IM-NEXT:    mul s10, t0, t1
+; RV32IM-NEXT:    or a1, t4, t2
+; RV32IM-NEXT:    mulhu t4, a0, t1
+; RV32IM-NEXT:    or a7, s5, s3
+; RV32IM-NEXT:    mul s3, t0, s2
+; RV32IM-NEXT:    or t2, s7, s6
+; RV32IM-NEXT:    mulhu s5, a0, s2
+; RV32IM-NEXT:    or s6, s9, s8
+; RV32IM-NEXT:    sw s6, 308(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw ra, 136(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul s6, t0, ra
+; RV32IM-NEXT:    or t4, t4, s10
+; RV32IM-NEXT:    mulhu s7, a0, ra
+; RV32IM-NEXT:    or s3, s5, s3
+; RV32IM-NEXT:    sw s11, 300(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul s5, t0, s11
+; RV32IM-NEXT:    or s6, s7, s6
+; RV32IM-NEXT:    sw s6, 304(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mulhu s6, a0, s11
+; RV32IM-NEXT:    or s5, s6, s5
+; RV32IM-NEXT:    sw s5, 312(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    andi s5, a3, 2
+; RV32IM-NEXT:    mul s4, a0, s4
+; RV32IM-NEXT:    mul s5, a0, s5
+; RV32IM-NEXT:    mul t5, a0, t5
+; RV32IM-NEXT:    xor s9, s4, s5
+; RV32IM-NEXT:    mul s4, a0, a4
+; RV32IM-NEXT:    xor s6, s4, t5
+; RV32IM-NEXT:    lui s4, 2
+; RV32IM-NEXT:    mul t3, a0, t3
+; RV32IM-NEXT:    mul t5, a0, t6
+; RV32IM-NEXT:    xor s7, t3, t5
+; RV32IM-NEXT:    lui s10, 4
+; RV32IM-NEXT:    mul a4, t0, a4
+; RV32IM-NEXT:    mul t3, a0, s0
+; RV32IM-NEXT:    mul t5, a0, s1
+; RV32IM-NEXT:    xor s0, t3, t5
+; RV32IM-NEXT:    lui s5, 1
+; RV32IM-NEXT:    and t6, a2, s5
+; RV32IM-NEXT:    mul t1, a0, t1
+; RV32IM-NEXT:    mul t3, a0, s2
+; RV32IM-NEXT:    xor s1, t1, t3
+; RV32IM-NEXT:    and t3, a2, s4
+; RV32IM-NEXT:    xor a4, a4, a6
+; RV32IM-NEXT:    sw a4, 276(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and t1, a2, s10
+; RV32IM-NEXT:    xor a1, a5, a1
+; RV32IM-NEXT:    sw a1, 272(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw t6, 204(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul a4, t0, t6
+; RV32IM-NEXT:    xor a1, a7, t2
+; RV32IM-NEXT:    sw a1, 268(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mulhu a5, a0, t6
+; RV32IM-NEXT:    xor a1, t4, s3
+; RV32IM-NEXT:    sw a1, 260(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw t3, 196(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul a6, t0, t3
+; RV32IM-NEXT:    or a4, a5, a4
+; RV32IM-NEXT:    sw a4, 288(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mulhu a4, a0, t3
+; RV32IM-NEXT:    or a1, a4, a6
+; RV32IM-NEXT:    sw a1, 292(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw t1, 200(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul a4, t0, t1
+; RV32IM-NEXT:    mulhu a5, a0, t1
+; RV32IM-NEXT:    or a4, a5, a4
+; RV32IM-NEXT:    sw a4, 256(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lui a1, 8
+; RV32IM-NEXT:    and a1, a2, a1
+; RV32IM-NEXT:    sw a1, 188(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul a4, t0, a1
+; RV32IM-NEXT:    mulhu a5, a0, a1
+; RV32IM-NEXT:    or a4, a5, a4
+; RV32IM-NEXT:    sw a4, 248(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lui a1, 16
+; RV32IM-NEXT:    and a1, a2, a1
+; RV32IM-NEXT:    lui s8, 16
+; RV32IM-NEXT:    sw a1, 184(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul a4, t0, a1
+; RV32IM-NEXT:    mulhu a5, a0, a1
+; RV32IM-NEXT:    or a4, a5, a4
+; RV32IM-NEXT:    sw a4, 264(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lui a1, 32
+; RV32IM-NEXT:    and a1, a2, a1
+; RV32IM-NEXT:    sw a1, 176(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul a4, t0, a1
+; RV32IM-NEXT:    mulhu a5, a0, a1
+; RV32IM-NEXT:    or a4, a5, a4
+; RV32IM-NEXT:    sw a4, 280(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lui t2, 64
+; RV32IM-NEXT:    and a1, a2, t2
+; RV32IM-NEXT:    sw a1, 172(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul a4, t0, a1
+; RV32IM-NEXT:    mulhu a5, a0, a1
+; RV32IM-NEXT:    or a4, a5, a4
+; RV32IM-NEXT:    sw a4, 284(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lui s3, 128
+; RV32IM-NEXT:    and a1, a2, s3
+; RV32IM-NEXT:    sw a1, 164(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul a4, t0, a1
+; RV32IM-NEXT:    mulhu a5, a0, a1
+; RV32IM-NEXT:    or a4, a5, a4
+; RV32IM-NEXT:    sw a4, 232(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lui a1, 256
+; RV32IM-NEXT:    and a1, a2, a1
+; RV32IM-NEXT:    lui t5, 256
+; RV32IM-NEXT:    sw a1, 160(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul a4, t0, a1
+; RV32IM-NEXT:    mulhu a5, a0, a1
+; RV32IM-NEXT:    or a4, a5, a4
+; RV32IM-NEXT:    sw a4, 220(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lui t3, 512
+; RV32IM-NEXT:    and a1, a2, t3
+; RV32IM-NEXT:    sw a1, 156(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul a4, t0, a1
+; RV32IM-NEXT:    mulhu a5, a0, a1
+; RV32IM-NEXT:    or a4, a5, a4
+; RV32IM-NEXT:    sw a4, 236(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lui t4, 1024
+; RV32IM-NEXT:    and a1, a2, t4
+; RV32IM-NEXT:    sw a1, 152(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul a4, t0, a1
+; RV32IM-NEXT:    mulhu a5, a0, a1
+; RV32IM-NEXT:    or a4, a5, a4
+; RV32IM-NEXT:    sw a4, 240(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lui t6, 2048
+; RV32IM-NEXT:    and a1, a2, t6
+; RV32IM-NEXT:    sw a1, 148(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul a4, t0, a1
+; RV32IM-NEXT:    mulhu a5, a0, a1
+; RV32IM-NEXT:    or a4, a5, a4
+; RV32IM-NEXT:    sw a4, 244(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lui s2, 4096
+; RV32IM-NEXT:    and a1, a2, s2
+; RV32IM-NEXT:    sw a1, 144(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul a4, t0, a1
+; RV32IM-NEXT:    mulhu a5, a0, a1
+; RV32IM-NEXT:    or a4, a5, a4
+; RV32IM-NEXT:    sw a4, 252(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lui a1, 8192
+; RV32IM-NEXT:    and a1, a2, a1
+; RV32IM-NEXT:    sw a1, 140(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul a4, t0, a1
+; RV32IM-NEXT:    mulhu a5, a0, a1
+; RV32IM-NEXT:    or a4, a5, a4
+; RV32IM-NEXT:    sw a4, 180(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lui a1, 16384
+; RV32IM-NEXT:    and a1, a2, a1
+; RV32IM-NEXT:    lui s4, 16384
+; RV32IM-NEXT:    sw a1, 132(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul a4, t0, a1
+; RV32IM-NEXT:    mulhu a5, a0, a1
+; RV32IM-NEXT:    or a4, a5, a4
+; RV32IM-NEXT:    sw a4, 168(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lui a1, 32768
+; RV32IM-NEXT:    and a1, a2, a1
+; RV32IM-NEXT:    lui t1, 32768
+; RV32IM-NEXT:    sw a1, 124(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul a4, t0, a1
+; RV32IM-NEXT:    mulhu a5, a0, a1
+; RV32IM-NEXT:    or a4, a5, a4
+; RV32IM-NEXT:    sw a4, 192(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lui a1, 65536
+; RV32IM-NEXT:    and a1, a2, a1
+; RV32IM-NEXT:    lui a7, 65536
+; RV32IM-NEXT:    sw a1, 104(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul a4, t0, a1
+; RV32IM-NEXT:    mulhu a5, a0, a1
+; RV32IM-NEXT:    or a4, a5, a4
+; RV32IM-NEXT:    sw a4, 208(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lui a1, 131072
+; RV32IM-NEXT:    and a1, a2, a1
+; RV32IM-NEXT:    lui a6, 131072
+; RV32IM-NEXT:    sw a1, 88(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul a4, t0, a1
+; RV32IM-NEXT:    mulhu a5, a0, a1
+; RV32IM-NEXT:    or a4, a5, a4
+; RV32IM-NEXT:    sw a4, 212(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lui a5, 262144
+; RV32IM-NEXT:    and a1, a2, a5
+; RV32IM-NEXT:    sw a1, 68(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul a4, t0, a1
+; RV32IM-NEXT:    mulhu ra, a0, a1
+; RV32IM-NEXT:    or a1, ra, a4
+; RV32IM-NEXT:    sw a1, 216(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lui ra, 524288
+; RV32IM-NEXT:    and s10, a2, ra
+; RV32IM-NEXT:    mul a1, t0, s10
+; RV32IM-NEXT:    mulhu s11, a0, s10
+; RV32IM-NEXT:    or a1, s11, a1
+; RV32IM-NEXT:    sw a1, 224(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    andi a1, a3, 4
+; RV32IM-NEXT:    mul a1, a0, a1
+; RV32IM-NEXT:    xor a1, s9, a1
+; RV32IM-NEXT:    sw a1, 116(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a1, a3, s3
+; RV32IM-NEXT:    and s11, a3, t5
+; RV32IM-NEXT:    mul a1, a0, a1
+; RV32IM-NEXT:    mul s11, a0, s11
+; RV32IM-NEXT:    xor a1, a1, s11
+; RV32IM-NEXT:    sw a1, 96(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a5, a3, a5
+; RV32IM-NEXT:    and a1, a3, ra
+; RV32IM-NEXT:    mul a5, a0, a5
+; RV32IM-NEXT:    mul a1, a0, a1
+; RV32IM-NEXT:    xor a1, a5, a1
+; RV32IM-NEXT:    sw a1, 228(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    xor a1, s6, s7
+; RV32IM-NEXT:    sw a1, 72(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw a1, 296(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a1, a0, a1
+; RV32IM-NEXT:    xor a1, s0, a1
+; RV32IM-NEXT:    sw a1, 64(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw a1, 136(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a1, a0, a1
+; RV32IM-NEXT:    xor a1, s1, a1
+; RV32IM-NEXT:    sw a1, 60(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    li a1, 1
+; RV32IM-NEXT:    slli a1, a1, 11
+; RV32IM-NEXT:    and s6, a3, s5
+; RV32IM-NEXT:    lui a4, 2
+; RV32IM-NEXT:    and s11, a3, a4
+; RV32IM-NEXT:    lui a4, 4
+; RV32IM-NEXT:    and s5, a3, a4
+; RV32IM-NEXT:    lui a4, 8
+; RV32IM-NEXT:    and s7, a3, a4
+; RV32IM-NEXT:    and s8, a3, s8
+; RV32IM-NEXT:    lui a4, 32
+; RV32IM-NEXT:    and s9, a3, a4
+; RV32IM-NEXT:    and t2, a3, t2
+; RV32IM-NEXT:    and t3, a3, t3
+; RV32IM-NEXT:    and t4, a3, t4
+; RV32IM-NEXT:    and t5, a3, t6
+; RV32IM-NEXT:    and t6, a3, s2
+; RV32IM-NEXT:    lui s0, 8192
+; RV32IM-NEXT:    and s0, a3, s0
+; RV32IM-NEXT:    and s1, a3, s4
+; RV32IM-NEXT:    and s2, a3, t1
+; RV32IM-NEXT:    and s3, a3, a7
+; RV32IM-NEXT:    and s4, a3, a6
+; RV32IM-NEXT:    and t1, a2, a1
+; RV32IM-NEXT:    and a1, a3, a1
+; RV32IM-NEXT:    sw a1, 16(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    andi a1, a3, 8
+; RV32IM-NEXT:    andi ra, a3, 16
+; RV32IM-NEXT:    andi a2, a3, 32
+; RV32IM-NEXT:    andi a4, a3, 64
+; RV32IM-NEXT:    andi a5, a3, 128
+; RV32IM-NEXT:    andi a6, a3, 256
+; RV32IM-NEXT:    andi a7, a3, 512
+; RV32IM-NEXT:    andi a3, a3, 1024
+; RV32IM-NEXT:    mul a1, a0, a1
+; RV32IM-NEXT:    sw a1, 24(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul a1, a0, ra
+; RV32IM-NEXT:    sw a1, 36(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul a1, a0, a2
+; RV32IM-NEXT:    sw a1, 48(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul a1, a0, a4
+; RV32IM-NEXT:    sw a1, 80(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul a1, a0, a5
+; RV32IM-NEXT:    sw a1, 120(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul a1, a0, a6
+; RV32IM-NEXT:    sw a1, 136(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul a1, a0, a7
+; RV32IM-NEXT:    sw a1, 296(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul ra, a0, a3
+; RV32IM-NEXT:    lw a1, 300(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a1, a0, a1
+; RV32IM-NEXT:    sw a1, 12(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul a1, a0, s6
+; RV32IM-NEXT:    sw a1, 28(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul a1, a0, s11
+; RV32IM-NEXT:    sw a1, 40(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul a1, a0, s5
+; RV32IM-NEXT:    sw a1, 52(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul a1, a0, s7
+; RV32IM-NEXT:    sw a1, 84(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul a1, a0, s8
+; RV32IM-NEXT:    sw a1, 112(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul a1, a0, s9
+; RV32IM-NEXT:    sw a1, 128(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul a1, a0, t2
+; RV32IM-NEXT:    sw a1, 300(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul a3, a0, t3
+; RV32IM-NEXT:    mul a1, a0, t4
+; RV32IM-NEXT:    sw a1, 20(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul a1, a0, t5
+; RV32IM-NEXT:    sw a1, 32(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul a1, a0, t6
+; RV32IM-NEXT:    sw a1, 44(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul a1, a0, s0
+; RV32IM-NEXT:    sw a1, 56(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul a1, a0, s1
+; RV32IM-NEXT:    sw a1, 76(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul a1, a0, s2
+; RV32IM-NEXT:    sw a1, 92(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul a1, a0, s3
+; RV32IM-NEXT:    sw a1, 100(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul a1, a0, s4
+; RV32IM-NEXT:    sw a1, 108(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw a1, 204(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul s11, a0, a1
+; RV32IM-NEXT:    lw a1, 196(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a1, a0, a1
+; RV32IM-NEXT:    sw a1, 196(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw a1, 200(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul t3, a0, a1
+; RV32IM-NEXT:    lw a1, 188(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul t4, a0, a1
+; RV32IM-NEXT:    lw a1, 184(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul t5, a0, a1
+; RV32IM-NEXT:    lw a1, 176(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul t2, a0, a1
+; RV32IM-NEXT:    lw a1, 172(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a1, a0, a1
+; RV32IM-NEXT:    sw a1, 204(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw a1, 164(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul t6, a0, a1
+; RV32IM-NEXT:    lw a1, 160(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul s0, a0, a1
+; RV32IM-NEXT:    lw a1, 156(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul s1, a0, a1
+; RV32IM-NEXT:    lw a1, 152(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul s2, a0, a1
+; RV32IM-NEXT:    lw a1, 148(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a7, a0, a1
+; RV32IM-NEXT:    lw a1, 144(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a1, a0, a1
+; RV32IM-NEXT:    sw a1, 200(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw a1, 140(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul s3, a0, a1
+; RV32IM-NEXT:    lw a1, 132(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul s4, a0, a1
+; RV32IM-NEXT:    lw a1, 124(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul s5, a0, a1
+; RV32IM-NEXT:    lw a1, 104(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul s6, a0, a1
+; RV32IM-NEXT:    lw a1, 88(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul s7, a0, a1
+; RV32IM-NEXT:    lw a1, 68(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a5, a0, a1
+; RV32IM-NEXT:    mul a6, a0, s10
+; RV32IM-NEXT:    mul t0, t0, t1
+; RV32IM-NEXT:    lw a1, 16(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul s8, a0, a1
+; RV32IM-NEXT:    mulhu s9, a0, t1
+; RV32IM-NEXT:    mul a4, a0, t1
+; RV32IM-NEXT:    xor t1, t3, t4
+; RV32IM-NEXT:    xor t3, t6, s0
+; RV32IM-NEXT:    xor t4, s3, s4
+; RV32IM-NEXT:    lw a0, 276(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw a1, 272(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor t6, a0, a1
+; RV32IM-NEXT:    lw a0, 308(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s0, 268(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor s0, s0, a0
+; RV32IM-NEXT:    lw a0, 304(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw a1, 260(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor s3, a1, a0
+; RV32IM-NEXT:    or t0, s9, t0
+; RV32IM-NEXT:    lw a0, 256(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw a1, 248(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor s4, a0, a1
+; RV32IM-NEXT:    lw a0, 232(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw a1, 220(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor s9, a0, a1
+; RV32IM-NEXT:    lw a0, 180(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw a1, 168(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor s10, a0, a1
+; RV32IM-NEXT:    lw a0, 116(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw a2, 24(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a2, a0, a2
+; RV32IM-NEXT:    xor s8, ra, s8
+; RV32IM-NEXT:    lw a0, 96(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a3, a0, a3
+; RV32IM-NEXT:    lw a0, 72(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw a1, 64(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a1, a0, a1
+; RV32IM-NEXT:    lw a0, 60(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a0, a0, ra
+; RV32IM-NEXT:    xor a4, a4, s11
+; RV32IM-NEXT:    xor t1, t1, t5
+; RV32IM-NEXT:    xor t3, t3, s1
+; RV32IM-NEXT:    xor t4, t4, s5
+; RV32IM-NEXT:    xor t5, t6, s0
+; RV32IM-NEXT:    lw t6, 312(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor t6, s3, t6
+; RV32IM-NEXT:    lw s0, 288(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor t0, t0, s0
+; RV32IM-NEXT:    lw s0, 264(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor s0, s4, s0
+; RV32IM-NEXT:    lw s1, 236(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor s1, s9, s1
+; RV32IM-NEXT:    lw s3, 192(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor s3, s10, s3
+; RV32IM-NEXT:    lw s4, 36(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a2, a2, s4
+; RV32IM-NEXT:    lw s4, 28(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor s4, s8, s4
+; RV32IM-NEXT:    lw s5, 20(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a3, a3, s5
+; RV32IM-NEXT:    xor a0, a1, a0
+; RV32IM-NEXT:    lw a1, 196(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a1, a4, a1
+; RV32IM-NEXT:    xor a4, t1, t2
+; RV32IM-NEXT:    xor t1, t3, s2
+; RV32IM-NEXT:    xor t2, t4, s6
+; RV32IM-NEXT:    xor t3, t5, t6
+; RV32IM-NEXT:    lw t4, 292(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor t0, t0, t4
+; RV32IM-NEXT:    lw t4, 280(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor t4, s0, t4
+; RV32IM-NEXT:    lw t5, 240(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor t5, s1, t5
+; RV32IM-NEXT:    lw t6, 208(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor t6, s3, t6
+; RV32IM-NEXT:    lw s0, 48(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a2, a2, s0
+; RV32IM-NEXT:    lw s0, 40(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor s0, s4, s0
+; RV32IM-NEXT:    lw s1, 32(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a3, a3, s1
+; RV32IM-NEXT:    xor a0, a0, a1
+; RV32IM-NEXT:    lw a1, 204(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a1, a4, a1
+; RV32IM-NEXT:    xor a4, t1, a7
+; RV32IM-NEXT:    xor a7, t2, s7
+; RV32IM-NEXT:    xor t0, t3, t0
+; RV32IM-NEXT:    lw t1, 284(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor t1, t4, t1
+; RV32IM-NEXT:    lw t2, 244(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor t2, t5, t2
+; RV32IM-NEXT:    lw t3, 212(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor t3, t6, t3
+; RV32IM-NEXT:    lw t4, 80(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a2, a2, t4
+; RV32IM-NEXT:    lw t4, 52(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor t4, s0, t4
+; RV32IM-NEXT:    lw t5, 44(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a3, a3, t5
+; RV32IM-NEXT:    xor a0, a0, a1
+; RV32IM-NEXT:    lw a1, 200(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a1, a4, a1
+; RV32IM-NEXT:    xor a4, a7, a5
+; RV32IM-NEXT:    xor a5, t0, t1
+; RV32IM-NEXT:    lw a7, 252(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a7, t2, a7
+; RV32IM-NEXT:    lw t0, 216(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor t0, t3, t0
+; RV32IM-NEXT:    lw t1, 120(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a2, a2, t1
+; RV32IM-NEXT:    lw t1, 84(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor t1, t4, t1
+; RV32IM-NEXT:    lw t2, 56(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a3, a3, t2
+; RV32IM-NEXT:    xor a0, a0, a1
+; RV32IM-NEXT:    xor a4, a4, a6
+; RV32IM-NEXT:    xor a1, a5, a7
+; RV32IM-NEXT:    lw a5, 224(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a5, t0, a5
+; RV32IM-NEXT:    lw a6, 136(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a2, a2, a6
+; RV32IM-NEXT:    lw a6, 112(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a6, t1, a6
+; RV32IM-NEXT:    lw a7, 76(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a3, a3, a7
+; RV32IM-NEXT:    xor a1, a1, a5
+; RV32IM-NEXT:    lw a5, 296(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a2, a2, a5
+; RV32IM-NEXT:    lw a5, 128(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a5, a6, a5
+; RV32IM-NEXT:    lw a6, 92(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a3, a3, a6
+; RV32IM-NEXT:    xor a1, a1, a2
+; RV32IM-NEXT:    lw a2, 300(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a2, a5, a2
+; RV32IM-NEXT:    lw a5, 100(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a3, a3, a5
+; RV32IM-NEXT:    xor a1, a1, a2
+; RV32IM-NEXT:    lw a2, 108(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a2, a3, a2
+; RV32IM-NEXT:    xor a1, a1, a2
+; RV32IM-NEXT:    lw a2, 228(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a1, a1, a2
+; RV32IM-NEXT:    xor a0, a0, a4
+; RV32IM-NEXT:    lw ra, 364(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s0, 360(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s1, 356(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s2, 352(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s3, 348(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s4, 344(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s5, 340(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s6, 336(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s7, 332(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s8, 328(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s9, 324(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s10, 320(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s11, 316(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    addi sp, sp, 368
+; RV32IM-NEXT:    ret
+;
+; RV64IM-LABEL: clmul_i64:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    addi sp, sp, -448
+; RV64IM-NEXT:    sd ra, 440(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s0, 432(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s1, 424(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s2, 416(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s3, 408(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s4, 400(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s5, 392(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s6, 384(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s7, 376(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s8, 368(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s9, 360(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s10, 352(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s11, 344(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    andi t2, a1, 2
+; RV64IM-NEXT:    andi t4, a1, 1
+; RV64IM-NEXT:    andi a6, a1, 4
+; RV64IM-NEXT:    andi t0, a1, 8
+; RV64IM-NEXT:    andi a5, a1, 16
+; RV64IM-NEXT:    andi a7, a1, 32
+; RV64IM-NEXT:    andi a3, a1, 64
+; RV64IM-NEXT:    andi t1, a1, 128
+; RV64IM-NEXT:    andi t3, a1, 256
+; RV64IM-NEXT:    andi a4, a1, 512
+; RV64IM-NEXT:    li a2, 1
+; RV64IM-NEXT:    lui s7, 1
+; RV64IM-NEXT:    lui t6, 2
+; RV64IM-NEXT:    lui s0, 4
+; RV64IM-NEXT:    lui s1, 8
+; RV64IM-NEXT:    lui s2, 16
+; RV64IM-NEXT:    lui s3, 32
+; RV64IM-NEXT:    lui s4, 64
+; RV64IM-NEXT:    lui s5, 128
+; RV64IM-NEXT:    lui s6, 256
+; RV64IM-NEXT:    lui s8, 512
+; RV64IM-NEXT:    lui s9, 1024
+; RV64IM-NEXT:    lui s10, 2048
+; RV64IM-NEXT:    lui s11, 4096
+; RV64IM-NEXT:    lui ra, 8192
+; RV64IM-NEXT:    lui t5, 16384
+; RV64IM-NEXT:    mul t2, a0, t2
+; RV64IM-NEXT:    mul t4, a0, t4
+; RV64IM-NEXT:    xor t2, t4, t2
+; RV64IM-NEXT:    lui t4, 32768
+; RV64IM-NEXT:    mul a6, a0, a6
+; RV64IM-NEXT:    mul t0, a0, t0
+; RV64IM-NEXT:    xor a6, a6, t0
+; RV64IM-NEXT:    lui t0, 65536
+; RV64IM-NEXT:    mul a5, a0, a5
+; RV64IM-NEXT:    mul a7, a0, a7
+; RV64IM-NEXT:    xor a5, a5, a7
+; RV64IM-NEXT:    lui a7, 131072
+; RV64IM-NEXT:    mul t1, a0, t1
+; RV64IM-NEXT:    mul t3, a0, t3
+; RV64IM-NEXT:    xor t1, t1, t3
+; RV64IM-NEXT:    lui t3, 262144
+; RV64IM-NEXT:    mul a3, a0, a3
+; RV64IM-NEXT:    mul a4, a0, a4
+; RV64IM-NEXT:    xor a6, t2, a6
+; RV64IM-NEXT:    sd a6, 336(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a6, a2, 11
+; RV64IM-NEXT:    sd a6, 216(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and s7, a1, s7
+; RV64IM-NEXT:    and a6, a1, t6
+; RV64IM-NEXT:    sd a6, 288(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and s0, a1, s0
+; RV64IM-NEXT:    and s1, a1, s1
+; RV64IM-NEXT:    and s2, a1, s2
+; RV64IM-NEXT:    and s3, a1, s3
+; RV64IM-NEXT:    and a6, a1, s4
+; RV64IM-NEXT:    sd a6, 280(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a6, a1, s5
+; RV64IM-NEXT:    and t2, a1, s6
+; RV64IM-NEXT:    and s8, a1, s8
+; RV64IM-NEXT:    and t6, a1, s9
+; RV64IM-NEXT:    sd t6, 272(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and t6, a1, s10
+; RV64IM-NEXT:    sd t6, 264(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and t6, a1, s11
+; RV64IM-NEXT:    sd t6, 256(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and t6, a1, ra
+; RV64IM-NEXT:    and t5, a1, t5
+; RV64IM-NEXT:    and t4, a1, t4
+; RV64IM-NEXT:    sd t4, 248(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and t0, a1, t0
+; RV64IM-NEXT:    sd t0, 240(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a7, a1, a7
+; RV64IM-NEXT:    sd a7, 232(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a7, a1, t3
+; RV64IM-NEXT:    sd a7, 224(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    xor a3, a5, a3
+; RV64IM-NEXT:    sd a3, 328(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli t4, a2, 32
+; RV64IM-NEXT:    xor a3, t1, a4
+; RV64IM-NEXT:    sd a3, 320(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli s4, a2, 33
+; RV64IM-NEXT:    mul a3, a0, s0
+; RV64IM-NEXT:    mul a4, a0, s1
+; RV64IM-NEXT:    xor a3, a3, a4
+; RV64IM-NEXT:    sd a3, 312(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli s0, a2, 34
+; RV64IM-NEXT:    mul a3, a0, a6
+; RV64IM-NEXT:    mul a4, a0, t2
+; RV64IM-NEXT:    xor a3, a3, a4
+; RV64IM-NEXT:    sd a3, 304(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli s1, a2, 35
+; RV64IM-NEXT:    mul a3, a0, t6
+; RV64IM-NEXT:    mul a4, a0, t5
+; RV64IM-NEXT:    xor a3, a3, a4
+; RV64IM-NEXT:    sd a3, 296(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli t5, a2, 36
+; RV64IM-NEXT:    slli t6, a2, 37
+; RV64IM-NEXT:    slli s5, a2, 38
+; RV64IM-NEXT:    slli s6, a2, 39
+; RV64IM-NEXT:    slli s9, a2, 40
+; RV64IM-NEXT:    slli s10, a2, 41
+; RV64IM-NEXT:    slli s11, a2, 42
+; RV64IM-NEXT:    slli ra, a2, 43
+; RV64IM-NEXT:    slli a3, a2, 44
+; RV64IM-NEXT:    sd a3, 208(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a3, a2, 45
+; RV64IM-NEXT:    sd a3, 200(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a3, a2, 46
+; RV64IM-NEXT:    sd a3, 192(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a3, a2, 47
+; RV64IM-NEXT:    sd a3, 184(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a3, a2, 48
+; RV64IM-NEXT:    sd a3, 176(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a3, a2, 49
+; RV64IM-NEXT:    sd a3, 168(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a3, a2, 50
+; RV64IM-NEXT:    sd a3, 160(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a3, a2, 51
+; RV64IM-NEXT:    sd a3, 152(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a3, a2, 52
+; RV64IM-NEXT:    sd a3, 144(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a3, a2, 53
+; RV64IM-NEXT:    sd a3, 136(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a3, a2, 54
+; RV64IM-NEXT:    sd a3, 128(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli t1, a2, 55
+; RV64IM-NEXT:    slli t0, a2, 56
+; RV64IM-NEXT:    slli a7, a2, 57
+; RV64IM-NEXT:    slli a6, a2, 58
+; RV64IM-NEXT:    slli a5, a2, 59
+; RV64IM-NEXT:    slli a4, a2, 60
+; RV64IM-NEXT:    slli a3, a2, 61
+; RV64IM-NEXT:    slli a2, a2, 62
+; RV64IM-NEXT:    ld t2, 216(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and t3, a1, t2
+; RV64IM-NEXT:    and t2, a1, t4
+; RV64IM-NEXT:    sd t2, 120(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and t4, a1, s4
+; RV64IM-NEXT:    and s0, a1, s0
+; RV64IM-NEXT:    sd s0, 112(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and s1, a1, s1
+; RV64IM-NEXT:    sd s1, 104(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and t2, a1, t5
+; RV64IM-NEXT:    sd t2, 96(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and s1, a1, t6
+; RV64IM-NEXT:    and t2, a1, s5
+; RV64IM-NEXT:    sd t2, 88(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and t2, a1, s6
+; RV64IM-NEXT:    sd t2, 80(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and s4, a1, s9
+; RV64IM-NEXT:    and s5, a1, s10
+; RV64IM-NEXT:    and s6, a1, s11
+; RV64IM-NEXT:    and t6, a1, ra
+; RV64IM-NEXT:    ld t2, 208(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and t2, a1, t2
+; RV64IM-NEXT:    sd t2, 72(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld t2, 200(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and t2, a1, t2
+; RV64IM-NEXT:    sd t2, 64(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld t2, 192(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s10, a1, t2
+; RV64IM-NEXT:    ld t2, 184(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s11, a1, t2
+; RV64IM-NEXT:    ld t2, 176(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and ra, a1, t2
+; RV64IM-NEXT:    ld t2, 168(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and t2, a1, t2
+; RV64IM-NEXT:    sd t2, 184(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld t2, 160(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and t2, a1, t2
+; RV64IM-NEXT:    sd t2, 160(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld t2, 152(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and t2, a1, t2
+; RV64IM-NEXT:    sd t2, 152(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld t2, 144(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and t2, a1, t2
+; RV64IM-NEXT:    sd t2, 144(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld t2, 136(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and t2, a1, t2
+; RV64IM-NEXT:    sd t2, 136(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld t2, 128(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and t2, a1, t2
+; RV64IM-NEXT:    sd t2, 128(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and t1, a1, t1
+; RV64IM-NEXT:    and t0, a1, t0
+; RV64IM-NEXT:    sd t0, 56(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a7, a1, a7
+; RV64IM-NEXT:    sd a7, 48(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a6, a1, a6
+; RV64IM-NEXT:    and a5, a1, a5
+; RV64IM-NEXT:    sd a5, 40(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a4, a1, a4
+; RV64IM-NEXT:    sd a4, 32(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a3, a1, a3
+; RV64IM-NEXT:    sd a3, 24(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a2, a1, a2
+; RV64IM-NEXT:    sd a2, 16(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    andi a2, a1, 1024
+; RV64IM-NEXT:    srliw a3, a1, 31
+; RV64IM-NEXT:    srli a1, a1, 63
+; RV64IM-NEXT:    mul s9, a0, a2
+; RV64IM-NEXT:    slli a3, a3, 31
+; RV64IM-NEXT:    slli a1, a1, 63
+; RV64IM-NEXT:    mul s7, a0, s7
+; RV64IM-NEXT:    ld a2, 288(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    sd a2, 192(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul s2, a0, s2
+; RV64IM-NEXT:    mul a2, a0, s3
+; RV64IM-NEXT:    sd a2, 176(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a2, 280(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    sd a2, 216(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul s0, a0, s8
+; RV64IM-NEXT:    ld a2, 272(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    sd a2, 8(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a2, 264(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    sd a2, 208(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a2, 256(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    sd a2, 272(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a2, 248(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul t2, a0, a2
+; RV64IM-NEXT:    ld a2, 240(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    sd a2, 0(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a2, 232(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    sd a2, 200(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a2, 224(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    sd a2, 256(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul a2, a0, a3
+; RV64IM-NEXT:    sd a2, 280(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 288(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul a5, a0, t3
+; RV64IM-NEXT:    ld a1, 120(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a7, a0, a1
+; RV64IM-NEXT:    mul t4, a0, t4
+; RV64IM-NEXT:    ld a1, 112(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul t5, a0, a1
+; RV64IM-NEXT:    ld a1, 104(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 120(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 96(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 224(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul a1, a0, s1
+; RV64IM-NEXT:    sd a1, 240(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 88(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 264(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 80(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul s1, a0, a1
+; RV64IM-NEXT:    mul s4, a0, s4
+; RV64IM-NEXT:    mul s5, a0, s5
+; RV64IM-NEXT:    mul s6, a0, s6
+; RV64IM-NEXT:    mul a1, a0, t6
+; RV64IM-NEXT:    sd a1, 112(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 72(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 168(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 64(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 232(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul a1, a0, s10
+; RV64IM-NEXT:    sd a1, 248(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul s10, a0, s11
+; RV64IM-NEXT:    mul s11, a0, ra
+; RV64IM-NEXT:    ld a1, 184(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul ra, a0, a1
+; RV64IM-NEXT:    ld a1, 160(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul t0, a0, a1
+; RV64IM-NEXT:    ld a1, 152(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul t3, a0, a1
+; RV64IM-NEXT:    ld a1, 144(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul s3, a0, a1
+; RV64IM-NEXT:    ld a1, 136(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 152(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 128(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 160(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul a1, a0, t1
+; RV64IM-NEXT:    sd a1, 184(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 56(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a2, a0, a1
+; RV64IM-NEXT:    ld a1, 48(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    mul a3, a0, a6
+; RV64IM-NEXT:    ld a4, 40(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a4, a0, a4
+; RV64IM-NEXT:    ld a6, 32(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a6, a0, a6
+; RV64IM-NEXT:    ld t1, 24(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul t1, a0, t1
+; RV64IM-NEXT:    ld t6, 16(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul t6, a0, t6
+; RV64IM-NEXT:    ld a0, 336(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s8, 328(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a0, a0, s8
+; RV64IM-NEXT:    ld s8, 320(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor s9, s8, s9
+; RV64IM-NEXT:    xor a5, a5, s7
+; RV64IM-NEXT:    ld s7, 312(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor s2, s7, s2
+; RV64IM-NEXT:    ld s7, 304(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor s0, s7, s0
+; RV64IM-NEXT:    ld s7, 296(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t2, s7, t2
+; RV64IM-NEXT:    xor a7, a7, t4
+; RV64IM-NEXT:    xor t4, s1, s4
+; RV64IM-NEXT:    xor s1, s10, s11
+; RV64IM-NEXT:    xor a1, a2, a1
+; RV64IM-NEXT:    xor a0, a0, s9
+; RV64IM-NEXT:    ld a2, 192(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a2, a5, a2
+; RV64IM-NEXT:    ld a5, 176(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a5, s2, a5
+; RV64IM-NEXT:    ld s2, 8(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor s0, s0, s2
+; RV64IM-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t2, t2, s2
+; RV64IM-NEXT:    xor a7, a7, t5
+; RV64IM-NEXT:    xor t4, t4, s5
+; RV64IM-NEXT:    xor t5, s1, ra
+; RV64IM-NEXT:    xor a1, a1, a3
+; RV64IM-NEXT:    xor a0, a0, a2
+; RV64IM-NEXT:    ld a2, 216(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a2, a5, a2
+; RV64IM-NEXT:    ld a3, 208(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a3, s0, a3
+; RV64IM-NEXT:    ld a5, 200(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a5, t2, a5
+; RV64IM-NEXT:    ld t2, 120(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a7, a7, t2
+; RV64IM-NEXT:    xor t2, t4, s6
+; RV64IM-NEXT:    xor t0, t5, t0
+; RV64IM-NEXT:    xor a1, a1, a4
+; RV64IM-NEXT:    xor a0, a0, a2
+; RV64IM-NEXT:    ld a2, 272(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a2, a3, a2
+; RV64IM-NEXT:    ld a3, 256(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a3, a5, a3
+; RV64IM-NEXT:    ld a4, 224(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a4, a7, a4
+; RV64IM-NEXT:    ld a5, 112(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a5, t2, a5
+; RV64IM-NEXT:    xor a7, t0, t3
+; RV64IM-NEXT:    xor a1, a1, a6
+; RV64IM-NEXT:    xor a0, a0, a2
+; RV64IM-NEXT:    ld a2, 280(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a2, a3, a2
+; RV64IM-NEXT:    ld a3, 240(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a3, a4, a3
+; RV64IM-NEXT:    ld a4, 168(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a4, a5, a4
+; RV64IM-NEXT:    xor a5, a7, s3
+; RV64IM-NEXT:    xor a1, a1, t1
+; RV64IM-NEXT:    xor a0, a0, a2
+; RV64IM-NEXT:    ld a2, 264(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a2, a3, a2
+; RV64IM-NEXT:    ld a3, 232(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a3, a4, a3
+; RV64IM-NEXT:    ld a4, 152(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a4, a5, a4
+; RV64IM-NEXT:    xor a1, a1, t6
+; RV64IM-NEXT:    xor a0, a0, a2
+; RV64IM-NEXT:    ld a2, 248(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a2, a3, a2
+; RV64IM-NEXT:    ld a3, 160(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a3, a4, a3
+; RV64IM-NEXT:    xor a0, a0, a2
+; RV64IM-NEXT:    ld a2, 184(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a2, a3, a2
+; RV64IM-NEXT:    xor a0, a0, a2
+; RV64IM-NEXT:    ld a2, 288(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a1, a1, a2
+; RV64IM-NEXT:    xor a0, a0, a1
+; RV64IM-NEXT:    ld ra, 440(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s0, 432(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s1, 424(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s2, 416(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s3, 408(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s4, 400(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s5, 392(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s6, 384(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s7, 376(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s8, 368(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s9, 360(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s10, 352(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s11, 344(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    addi sp, sp, 448
+; RV64IM-NEXT:    ret
+  %res = call i64 @llvm.clmul.i64(i64 %a, i64 %b)
+  ret i64 %res
+}
+
+define i4 @clmul_constfold_i4() nounwind {
+; CHECK-LABEL: clmul_constfold_i4:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a0, 2
+; CHECK-NEXT:    ret
+  %res = call i4 @llvm.clmul.i4(i4 1, i4 2)
+  ret i4 %res
+}
+
+define i16 @clmul_constfold_i16() nounwind {
+; RV32IM-LABEL: clmul_constfold_i16:
+; RV32IM:       # %bb.0:
+; RV32IM-NEXT:    lui a0, 699051
+; RV32IM-NEXT:    addi a0, a0, -1366
+; RV32IM-NEXT:    ret
+;
+; RV64IM-LABEL: clmul_constfold_i16:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    lui a0, %hi(.LCPI6_0)
+; RV64IM-NEXT:    ld a0, %lo(.LCPI6_0)(a0)
+; RV64IM-NEXT:    ret
+  %res = call i16 @llvm.clmul.i16(i16 -2, i16 -1)
+  ret i16 %res
+}
+
+define i4 @clmulr_i4(i4 %a, i4 %b) nounwind {
+; RV32IM-LABEL: clmulr_i4:
+; RV32IM:       # %bb.0:
+; RV32IM-NEXT:    addi sp, sp, -144
+; RV32IM-NEXT:    sw ra, 140(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s0, 136(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s1, 132(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s2, 128(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s3, 124(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s4, 120(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s5, 116(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s6, 112(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s7, 108(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s8, 104(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s9, 100(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s10, 96(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s11, 92(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    srli a3, a0, 8
+; RV32IM-NEXT:    lui s9, 16
+; RV32IM-NEXT:    srli a4, a0, 24
+; RV32IM-NEXT:    slli a2, a0, 24
+; RV32IM-NEXT:    lui a7, 61681
+; RV32IM-NEXT:    lui ra, 209715
+; RV32IM-NEXT:    lui a1, 349525
+; RV32IM-NEXT:    li s0, 1
+; RV32IM-NEXT:    lui t1, 1
+; RV32IM-NEXT:    lui t2, 2
+; RV32IM-NEXT:    lui t3, 4
+; RV32IM-NEXT:    lui t4, 8
+; RV32IM-NEXT:    lui t0, 32
+; RV32IM-NEXT:    lui a6, 64
+; RV32IM-NEXT:    lui a5, 128
+; RV32IM-NEXT:    lui s1, 256
+; RV32IM-NEXT:    lui t5, 512
+; RV32IM-NEXT:    lui t6, 1024
+; RV32IM-NEXT:    lui s4, 2048
+; RV32IM-NEXT:    lui s2, 4096
+; RV32IM-NEXT:    lui s3, 8192
+; RV32IM-NEXT:    lui s7, 16384
+; RV32IM-NEXT:    lui s5, 32768
+; RV32IM-NEXT:    lui s6, 65536
+; RV32IM-NEXT:    lui s11, 131072
+; RV32IM-NEXT:    lui s8, 262144
+; RV32IM-NEXT:    addi s10, s9, -256
+; RV32IM-NEXT:    and a3, a3, s10
+; RV32IM-NEXT:    or a3, a3, a4
+; RV32IM-NEXT:    addi a7, a7, -241
+; RV32IM-NEXT:    sw a7, 80(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    addi a4, ra, 819
+; RV32IM-NEXT:    sw a4, 84(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    addi a1, a1, 1365
+; RV32IM-NEXT:    sw a1, 88(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    slli s0, s0, 11
+; RV32IM-NEXT:    and a0, a0, s10
+; RV32IM-NEXT:    slli a0, a0, 8
+; RV32IM-NEXT:    or a0, a2, a0
+; RV32IM-NEXT:    or a0, a0, a3
+; RV32IM-NEXT:    srli a2, a0, 4
+; RV32IM-NEXT:    and a0, a0, a7
+; RV32IM-NEXT:    and a2, a2, a7
+; RV32IM-NEXT:    slli a0, a0, 4
+; RV32IM-NEXT:    or a0, a2, a0
+; RV32IM-NEXT:    srli a2, a0, 2
+; RV32IM-NEXT:    and a0, a0, a4
+; RV32IM-NEXT:    and a2, a2, a4
+; RV32IM-NEXT:    slli a0, a0, 2
+; RV32IM-NEXT:    or a0, a2, a0
+; RV32IM-NEXT:    srli a2, a0, 1
+; RV32IM-NEXT:    and a0, a0, a1
+; RV32IM-NEXT:    and a2, a2, a1
+; RV32IM-NEXT:    slli a0, a0, 1
+; RV32IM-NEXT:    or a3, a2, a0
+; RV32IM-NEXT:    andi a0, a3, 2
+; RV32IM-NEXT:    andi a1, a3, 1
+; RV32IM-NEXT:    and a4, a3, s0
+; RV32IM-NEXT:    and a7, a3, t1
+; RV32IM-NEXT:    and s0, a3, t2
+; RV32IM-NEXT:    and ra, a3, t3
+; RV32IM-NEXT:    and a2, a3, t4
+; RV32IM-NEXT:    sw a2, 68(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a2, a3, s9
+; RV32IM-NEXT:    sw a2, 64(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a2, a3, t0
+; RV32IM-NEXT:    sw a2, 60(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a6, a3, a6
+; RV32IM-NEXT:    and a5, a3, a5
+; RV32IM-NEXT:    and s1, a3, s1
+; RV32IM-NEXT:    sw s1, 56(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a2, a3, t5
+; RV32IM-NEXT:    sw a2, 52(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and t6, a3, t6
+; RV32IM-NEXT:    and a2, a3, s4
+; RV32IM-NEXT:    sw a2, 48(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and s2, a3, s2
+; RV32IM-NEXT:    and a2, a3, s3
+; RV32IM-NEXT:    sw a2, 44(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a2, a3, s7
+; RV32IM-NEXT:    sw a2, 40(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a2, a3, s5
+; RV32IM-NEXT:    sw a2, 36(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a2, a3, s6
+; RV32IM-NEXT:    sw a2, 32(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a2, a3, s11
+; RV32IM-NEXT:    sw a2, 28(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a2, a3, s8
+; RV32IM-NEXT:    sw a2, 24(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lui a2, 524288
+; RV32IM-NEXT:    and a2, a3, a2
+; RV32IM-NEXT:    sw a2, 20(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul a0, a3, a0
+; RV32IM-NEXT:    sw a0, 72(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul a0, a3, a1
+; RV32IM-NEXT:    sw a0, 16(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    andi a0, a3, 4
+; RV32IM-NEXT:    mul a0, a3, a0
+; RV32IM-NEXT:    sw a0, 12(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    andi a1, a3, 8
+; RV32IM-NEXT:    mul a0, a3, a1
+; RV32IM-NEXT:    sw a0, 0(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    andi a2, a3, 16
+; RV32IM-NEXT:    mul s9, a3, a2
+; RV32IM-NEXT:    andi t0, a3, 32
+; RV32IM-NEXT:    mul s6, a3, t0
+; RV32IM-NEXT:    andi t1, a3, 64
+; RV32IM-NEXT:    mul a0, a3, t1
+; RV32IM-NEXT:    sw a0, 4(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    andi t2, a3, 128
+; RV32IM-NEXT:    mul a0, a3, t2
+; RV32IM-NEXT:    sw a0, 76(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    andi t2, a3, 256
+; RV32IM-NEXT:    mul s1, a3, t2
+; RV32IM-NEXT:    andi t3, a3, 512
+; RV32IM-NEXT:    mul t5, a3, t3
+; RV32IM-NEXT:    andi t4, a3, 1024
+; RV32IM-NEXT:    mul s5, a3, t4
+; RV32IM-NEXT:    mul s8, a3, a4
+; RV32IM-NEXT:    mul a0, a3, a7
+; RV32IM-NEXT:    sw a0, 8(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul t2, a3, s0
+; RV32IM-NEXT:    mul a7, a3, ra
+; RV32IM-NEXT:    lw a0, 68(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul s0, a3, a0
+; RV32IM-NEXT:    lw a0, 64(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul s4, a3, a0
+; RV32IM-NEXT:    lw a0, 60(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul s7, a3, a0
+; RV32IM-NEXT:    mul a0, a3, a6
+; RV32IM-NEXT:    sw a0, 68(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul a6, a3, a5
+; RV32IM-NEXT:    lw a0, 56(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a4, a3, a0
+; RV32IM-NEXT:    lw a0, 52(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul t1, a3, a0
+; RV32IM-NEXT:    mul t4, a3, t6
+; RV32IM-NEXT:    lw a0, 48(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul s3, a3, a0
+; RV32IM-NEXT:    mul a2, a3, s2
+; RV32IM-NEXT:    lw a0, 44(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a1, a3, a0
+; RV32IM-NEXT:    lw a0, 40(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a5, a3, a0
+; RV32IM-NEXT:    lw a0, 36(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul t0, a3, a0
+; RV32IM-NEXT:    lw a0, 32(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul t3, a3, a0
+; RV32IM-NEXT:    lw a0, 28(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul t6, a3, a0
+; RV32IM-NEXT:    lw a0, 24(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul s2, a3, a0
+; RV32IM-NEXT:    lw a0, 20(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a3, a3, a0
+; RV32IM-NEXT:    lw a0, 72(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s11, 16(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a0, s11, a0
+; RV32IM-NEXT:    lw s11, 12(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw ra, 0(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor s11, s11, ra
+; RV32IM-NEXT:    xor s6, s9, s6
+; RV32IM-NEXT:    xor t5, s1, t5
+; RV32IM-NEXT:    xor a7, t2, a7
+; RV32IM-NEXT:    xor a4, a6, a4
+; RV32IM-NEXT:    xor a1, a2, a1
+; RV32IM-NEXT:    xor a0, a0, s11
+; RV32IM-NEXT:    lw a2, 4(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a2, s6, a2
+; RV32IM-NEXT:    xor a6, t5, s5
+; RV32IM-NEXT:    xor a7, a7, s0
+; RV32IM-NEXT:    xor a4, a4, t1
+; RV32IM-NEXT:    xor a1, a1, a5
+; RV32IM-NEXT:    xor a0, a0, a2
+; RV32IM-NEXT:    xor a2, a6, s8
+; RV32IM-NEXT:    xor a5, a7, s4
+; RV32IM-NEXT:    xor a4, a4, t4
+; RV32IM-NEXT:    xor a1, a1, t0
+; RV32IM-NEXT:    lw a6, 76(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a0, a0, a6
+; RV32IM-NEXT:    lw a6, 8(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a2, a2, a6
+; RV32IM-NEXT:    xor a5, a5, s7
+; RV32IM-NEXT:    xor a4, a4, s3
+; RV32IM-NEXT:    xor a1, a1, t3
+; RV32IM-NEXT:    lw a6, 68(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a5, a5, a6
+; RV32IM-NEXT:    xor a1, a1, t6
+; RV32IM-NEXT:    xor a2, a0, a2
+; RV32IM-NEXT:    xor a2, a2, a5
+; RV32IM-NEXT:    slli a0, a0, 24
+; RV32IM-NEXT:    xor a1, a1, s2
+; RV32IM-NEXT:    xor a2, a2, a4
+; RV32IM-NEXT:    xor a1, a1, a3
+; RV32IM-NEXT:    and a3, a2, s10
+; RV32IM-NEXT:    srli a4, a2, 8
+; RV32IM-NEXT:    xor a1, a2, a1
+; RV32IM-NEXT:    slli a3, a3, 8
+; RV32IM-NEXT:    and a2, a4, s10
+; RV32IM-NEXT:    srli a1, a1, 24
+; RV32IM-NEXT:    or a0, a0, a3
+; RV32IM-NEXT:    or a1, a2, a1
+; RV32IM-NEXT:    or a0, a0, a1
+; RV32IM-NEXT:    srli a1, a0, 4
+; RV32IM-NEXT:    lw a2, 80(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    and a0, a0, a2
+; RV32IM-NEXT:    and a1, a1, a2
+; RV32IM-NEXT:    slli a0, a0, 4
+; RV32IM-NEXT:    or a0, a1, a0
+; RV32IM-NEXT:    srli a1, a0, 2
+; RV32IM-NEXT:    lw a2, 84(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    and a0, a0, a2
+; RV32IM-NEXT:    and a1, a1, a2
+; RV32IM-NEXT:    slli a0, a0, 2
+; RV32IM-NEXT:    or a0, a1, a0
+; RV32IM-NEXT:    srli a1, a0, 1
+; RV32IM-NEXT:    lw a2, 88(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    and a0, a0, a2
+; RV32IM-NEXT:    and a1, a1, a2
+; RV32IM-NEXT:    slli a0, a0, 1
+; RV32IM-NEXT:    or a0, a1, a0
+; RV32IM-NEXT:    lw ra, 140(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s0, 136(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s1, 132(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s2, 128(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s3, 124(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s4, 120(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s5, 116(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s6, 112(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s7, 108(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s8, 104(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s9, 100(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s10, 96(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s11, 92(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    addi sp, sp, 144
+; RV32IM-NEXT:    ret
+;
+; RV64IM-LABEL: clmulr_i4:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    addi sp, sp, -448
+; RV64IM-NEXT:    sd ra, 440(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s0, 432(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s1, 424(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s2, 416(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s3, 408(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s4, 400(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s5, 392(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s6, 384(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s7, 376(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s8, 368(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s9, 360(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s10, 352(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s11, 344(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    srli a2, a0, 24
+; RV64IM-NEXT:    srli a6, a0, 8
+; RV64IM-NEXT:    li a3, 255
+; RV64IM-NEXT:    srli a5, a0, 40
+; RV64IM-NEXT:    lui s3, 16
+; RV64IM-NEXT:    srli s0, a0, 56
+; RV64IM-NEXT:    srliw t2, a0, 24
+; RV64IM-NEXT:    slli t0, a0, 56
+; RV64IM-NEXT:    lui t3, 61681
+; RV64IM-NEXT:    lui t4, 209715
+; RV64IM-NEXT:    lui t6, 349525
+; RV64IM-NEXT:    li a7, 1
+; RV64IM-NEXT:    lui s5, 2
+; RV64IM-NEXT:    lui t1, 4
+; RV64IM-NEXT:    lui a4, 128
+; RV64IM-NEXT:    lui s7, 256
+; RV64IM-NEXT:    lui s8, 4096
+; RV64IM-NEXT:    lui s10, 8192
+; RV64IM-NEXT:    lui a1, 4080
+; RV64IM-NEXT:    and a2, a2, a1
+; RV64IM-NEXT:    slli a3, a3, 24
+; RV64IM-NEXT:    sd a3, 336(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    addi s1, s3, -256
+; RV64IM-NEXT:    and t5, a0, a1
+; RV64IM-NEXT:    slli a1, t2, 32
+; RV64IM-NEXT:    addi s9, t3, -241
+; RV64IM-NEXT:    addi t4, t4, 819
+; RV64IM-NEXT:    addi t2, t6, 1365
+; RV64IM-NEXT:    slli t3, a7, 11
+; RV64IM-NEXT:    slli s11, a7, 32
+; RV64IM-NEXT:    slli ra, a7, 33
+; RV64IM-NEXT:    slli t6, a7, 34
+; RV64IM-NEXT:    slli s2, a7, 35
+; RV64IM-NEXT:    slli s4, a7, 36
+; RV64IM-NEXT:    sd s4, 256(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a3, a6, a3
+; RV64IM-NEXT:    or a2, a3, a2
+; RV64IM-NEXT:    slli a3, a7, 37
+; RV64IM-NEXT:    sd a3, 248(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s1, 304(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a3, a5, s1
+; RV64IM-NEXT:    or a3, a3, s0
+; RV64IM-NEXT:    slli a5, a7, 38
+; RV64IM-NEXT:    sd a5, 232(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli t5, t5, 24
+; RV64IM-NEXT:    and a0, a0, s1
+; RV64IM-NEXT:    or a1, t5, a1
+; RV64IM-NEXT:    slli a5, s9, 32
+; RV64IM-NEXT:    add a5, s9, a5
+; RV64IM-NEXT:    slli s0, t4, 32
+; RV64IM-NEXT:    add t4, t4, s0
+; RV64IM-NEXT:    slli s4, t2, 32
+; RV64IM-NEXT:    slli a0, a0, 40
+; RV64IM-NEXT:    add t2, t2, s4
+; RV64IM-NEXT:    or a2, a2, a3
+; RV64IM-NEXT:    or a0, t0, a0
+; RV64IM-NEXT:    or a0, a0, a1
+; RV64IM-NEXT:    or a0, a0, a2
+; RV64IM-NEXT:    srli a1, a0, 4
+; RV64IM-NEXT:    sd a5, 312(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a0, a0, a5
+; RV64IM-NEXT:    and a1, a1, a5
+; RV64IM-NEXT:    slli a0, a0, 4
+; RV64IM-NEXT:    or a0, a1, a0
+; RV64IM-NEXT:    srli a1, a0, 2
+; RV64IM-NEXT:    sd t4, 320(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a0, a0, t4
+; RV64IM-NEXT:    and a1, a1, t4
+; RV64IM-NEXT:    slli a0, a0, 2
+; RV64IM-NEXT:    or a0, a1, a0
+; RV64IM-NEXT:    srli a1, a0, 1
+; RV64IM-NEXT:    sd t2, 328(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a0, a0, t2
+; RV64IM-NEXT:    and a1, a1, t2
+; RV64IM-NEXT:    slli a0, a0, 1
+; RV64IM-NEXT:    or t0, a1, a0
+; RV64IM-NEXT:    andi a0, t0, 2
+; RV64IM-NEXT:    andi a1, t0, 1
+; RV64IM-NEXT:    andi a2, t0, 4
+; RV64IM-NEXT:    andi a3, t0, 8
+; RV64IM-NEXT:    andi a5, t0, 16
+; RV64IM-NEXT:    mul a0, t0, a0
+; RV64IM-NEXT:    mul a1, t0, a1
+; RV64IM-NEXT:    xor a0, a1, a0
+; RV64IM-NEXT:    sd a0, 296(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    andi a0, t0, 32
+; RV64IM-NEXT:    mul a1, t0, a2
+; RV64IM-NEXT:    mul a2, t0, a3
+; RV64IM-NEXT:    xor a1, a1, a2
+; RV64IM-NEXT:    sd a1, 288(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    andi a1, t0, 256
+; RV64IM-NEXT:    mul a2, t0, a5
+; RV64IM-NEXT:    mul a0, t0, a0
+; RV64IM-NEXT:    xor a0, a2, a0
+; RV64IM-NEXT:    sd a0, 280(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    andi a0, t0, 512
+; RV64IM-NEXT:    mul a1, t0, a1
+; RV64IM-NEXT:    mul a0, t0, a0
+; RV64IM-NEXT:    xor a0, a1, a0
+; RV64IM-NEXT:    sd a0, 272(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli t4, a7, 39
+; RV64IM-NEXT:    and a0, t0, s5
+; RV64IM-NEXT:    and a1, t0, t1
+; RV64IM-NEXT:    mul a0, t0, a0
+; RV64IM-NEXT:    mul a1, t0, a1
+; RV64IM-NEXT:    xor a0, a0, a1
+; RV64IM-NEXT:    sd a0, 264(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a0, a7, 40
+; RV64IM-NEXT:    and a1, t0, a4
+; RV64IM-NEXT:    and a2, t0, s7
+; RV64IM-NEXT:    mul a1, t0, a1
+; RV64IM-NEXT:    mul a2, t0, a2
+; RV64IM-NEXT:    xor a1, a1, a2
+; RV64IM-NEXT:    sd a1, 240(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, a7, 41
+; RV64IM-NEXT:    and a2, t0, s8
+; RV64IM-NEXT:    and a3, t0, s10
+; RV64IM-NEXT:    mul a2, t0, a2
+; RV64IM-NEXT:    mul a3, t0, a3
+; RV64IM-NEXT:    xor a2, a2, a3
+; RV64IM-NEXT:    sd a2, 224(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a2, a7, 48
+; RV64IM-NEXT:    and a3, t0, s11
+; RV64IM-NEXT:    and a4, t0, ra
+; RV64IM-NEXT:    mul a3, t0, a3
+; RV64IM-NEXT:    mul a4, t0, a4
+; RV64IM-NEXT:    xor a3, a3, a4
+; RV64IM-NEXT:    sd a3, 216(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a3, a7, 49
+; RV64IM-NEXT:    and a0, t0, a0
+; RV64IM-NEXT:    and a1, t0, a1
+; RV64IM-NEXT:    mul a0, t0, a0
+; RV64IM-NEXT:    mul a1, t0, a1
+; RV64IM-NEXT:    xor a0, a0, a1
+; RV64IM-NEXT:    sd a0, 208(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a0, a7, 56
+; RV64IM-NEXT:    and a1, t0, a2
+; RV64IM-NEXT:    and a2, t0, a3
+; RV64IM-NEXT:    mul a1, t0, a1
+; RV64IM-NEXT:    mul a2, t0, a2
+; RV64IM-NEXT:    xor a1, a1, a2
+; RV64IM-NEXT:    sd a1, 200(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, a7, 57
+; RV64IM-NEXT:    and a0, t0, a0
+; RV64IM-NEXT:    and a1, t0, a1
+; RV64IM-NEXT:    mul a0, t0, a0
+; RV64IM-NEXT:    mul a1, t0, a1
+; RV64IM-NEXT:    xor a0, a0, a1
+; RV64IM-NEXT:    sd a0, 192(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a2, a7, 42
+; RV64IM-NEXT:    slli ra, a7, 43
+; RV64IM-NEXT:    slli a3, a7, 44
+; RV64IM-NEXT:    slli a4, a7, 45
+; RV64IM-NEXT:    slli t5, a7, 46
+; RV64IM-NEXT:    slli s0, a7, 47
+; RV64IM-NEXT:    slli s1, a7, 50
+; RV64IM-NEXT:    slli a0, a7, 51
+; RV64IM-NEXT:    sd a0, 184(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a0, a7, 52
+; RV64IM-NEXT:    sd a0, 176(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a0, a7, 53
+; RV64IM-NEXT:    sd a0, 168(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a0, a7, 54
+; RV64IM-NEXT:    sd a0, 160(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a0, a7, 55
+; RV64IM-NEXT:    sd a0, 152(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a0, a7, 58
+; RV64IM-NEXT:    sd a0, 144(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a0, a7, 59
+; RV64IM-NEXT:    sd a0, 136(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a0, a7, 60
+; RV64IM-NEXT:    sd a0, 120(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a0, a7, 61
+; RV64IM-NEXT:    sd a0, 80(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a7, a7, 62
+; RV64IM-NEXT:    sd a7, 48(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a0, t0, t3
+; RV64IM-NEXT:    sd a0, 128(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui s7, 1
+; RV64IM-NEXT:    and a0, t0, s7
+; RV64IM-NEXT:    sd a0, 112(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui s8, 8
+; RV64IM-NEXT:    and a0, t0, s8
+; RV64IM-NEXT:    sd a0, 104(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a0, t0, s3
+; RV64IM-NEXT:    sd a0, 96(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui s6, 32
+; RV64IM-NEXT:    and a0, t0, s6
+; RV64IM-NEXT:    sd a0, 88(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui s10, 64
+; RV64IM-NEXT:    and a0, t0, s10
+; RV64IM-NEXT:    sd a0, 72(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui s11, 512
+; RV64IM-NEXT:    and a0, t0, s11
+; RV64IM-NEXT:    sd a0, 64(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui s4, 1024
+; RV64IM-NEXT:    and a0, t0, s4
+; RV64IM-NEXT:    sd a0, 56(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui s5, 2048
+; RV64IM-NEXT:    and a0, t0, s5
+; RV64IM-NEXT:    sd a0, 40(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui s9, 16384
+; RV64IM-NEXT:    and a0, t0, s9
+; RV64IM-NEXT:    sd a0, 32(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui a5, 32768
+; RV64IM-NEXT:    and a5, t0, a5
+; RV64IM-NEXT:    lui a6, 65536
+; RV64IM-NEXT:    and a6, t0, a6
+; RV64IM-NEXT:    lui t1, 131072
+; RV64IM-NEXT:    and t1, t0, t1
+; RV64IM-NEXT:    lui t2, 262144
+; RV64IM-NEXT:    and t2, t0, t2
+; RV64IM-NEXT:    and a0, t0, t6
+; RV64IM-NEXT:    sd a0, 24(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a0, t0, s2
+; RV64IM-NEXT:    sd a0, 16(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a0, 256(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a0, t0, a0
+; RV64IM-NEXT:    sd a0, 8(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a0, 248(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a0, t0, a0
+; RV64IM-NEXT:    sd a0, 0(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a0, 232(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a0, t0, a0
+; RV64IM-NEXT:    and a1, t0, t4
+; RV64IM-NEXT:    and a7, t0, a2
+; RV64IM-NEXT:    and ra, t0, ra
+; RV64IM-NEXT:    and t3, t0, a3
+; RV64IM-NEXT:    and t4, t0, a4
+; RV64IM-NEXT:    and t5, t0, t5
+; RV64IM-NEXT:    and t6, t0, s0
+; RV64IM-NEXT:    and s0, t0, s1
+; RV64IM-NEXT:    ld a2, 184(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s1, t0, a2
+; RV64IM-NEXT:    ld a2, 176(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s2, t0, a2
+; RV64IM-NEXT:    ld a2, 168(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s3, t0, a2
+; RV64IM-NEXT:    ld a2, 160(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s4, t0, a2
+; RV64IM-NEXT:    ld a2, 152(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s5, t0, a2
+; RV64IM-NEXT:    ld a2, 144(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s6, t0, a2
+; RV64IM-NEXT:    ld a2, 136(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s7, t0, a2
+; RV64IM-NEXT:    ld a2, 120(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s8, t0, a2
+; RV64IM-NEXT:    ld a2, 80(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s9, t0, a2
+; RV64IM-NEXT:    ld a2, 48(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s10, t0, a2
+; RV64IM-NEXT:    andi s11, t0, 64
+; RV64IM-NEXT:    mul a2, t0, s11
+; RV64IM-NEXT:    sd a2, 80(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    andi s11, t0, 128
+; RV64IM-NEXT:    mul a2, t0, s11
+; RV64IM-NEXT:    sd a2, 232(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    andi s11, t0, 1024
+; RV64IM-NEXT:    mul a2, t0, s11
+; RV64IM-NEXT:    sd a2, 48(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a2, 128(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a2, t0, a2
+; RV64IM-NEXT:    sd a2, 120(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a2, 112(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a2, t0, a2
+; RV64IM-NEXT:    sd a2, 176(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a2, 104(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul s11, t0, a2
+; RV64IM-NEXT:    ld a2, 96(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a2, t0, a2
+; RV64IM-NEXT:    sd a2, 104(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a2, 88(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a2, t0, a2
+; RV64IM-NEXT:    sd a2, 168(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a2, 72(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a2, t0, a2
+; RV64IM-NEXT:    sd a2, 256(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a2, 64(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a4, t0, a2
+; RV64IM-NEXT:    ld a2, 56(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a2, t0, a2
+; RV64IM-NEXT:    sd a2, 96(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a2, 40(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a2, t0, a2
+; RV64IM-NEXT:    sd a2, 136(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a2, 32(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a3, t0, a2
+; RV64IM-NEXT:    mul a2, t0, a5
+; RV64IM-NEXT:    sd a2, 88(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul a2, t0, a6
+; RV64IM-NEXT:    sd a2, 128(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul a2, t0, t1
+; RV64IM-NEXT:    sd a2, 160(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul a2, t0, t2
+; RV64IM-NEXT:    sd a2, 248(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    srliw t2, t0, 31
+; RV64IM-NEXT:    slli t2, t2, 31
+; RV64IM-NEXT:    ld a2, 24(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a2, t0, a2
+; RV64IM-NEXT:    ld a5, 16(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a5, t0, a5
+; RV64IM-NEXT:    ld a6, 8(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul t1, t0, a6
+; RV64IM-NEXT:    ld a6, 0(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a6, t0, a6
+; RV64IM-NEXT:    sd a6, 112(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul a0, t0, a0
+; RV64IM-NEXT:    sd a0, 152(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul a0, t0, a1
+; RV64IM-NEXT:    sd a0, 184(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul a7, t0, a7
+; RV64IM-NEXT:    mul ra, t0, ra
+; RV64IM-NEXT:    mul a6, t0, t3
+; RV64IM-NEXT:    mul t4, t0, t4
+; RV64IM-NEXT:    mul t5, t0, t5
+; RV64IM-NEXT:    mul a0, t0, t6
+; RV64IM-NEXT:    sd a0, 144(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul t6, t0, s0
+; RV64IM-NEXT:    mul s0, t0, s1
+; RV64IM-NEXT:    mul s1, t0, s2
+; RV64IM-NEXT:    mul s2, t0, s3
+; RV64IM-NEXT:    mul s3, t0, s4
+; RV64IM-NEXT:    mul s4, t0, s5
+; RV64IM-NEXT:    mul s5, t0, s6
+; RV64IM-NEXT:    mul s6, t0, s7
+; RV64IM-NEXT:    mul s7, t0, s8
+; RV64IM-NEXT:    mul s8, t0, s9
+; RV64IM-NEXT:    mul s9, t0, s10
+; RV64IM-NEXT:    srli s10, t0, 63
+; RV64IM-NEXT:    slli s10, s10, 63
+; RV64IM-NEXT:    mul t2, t0, t2
+; RV64IM-NEXT:    mul t0, t0, s10
+; RV64IM-NEXT:    ld a0, 296(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld a1, 288(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor s10, a0, a1
+; RV64IM-NEXT:    ld a0, 280(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld a1, 80(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a0, a0, a1
+; RV64IM-NEXT:    ld a1, 272(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld t3, 48(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a1, a1, t3
+; RV64IM-NEXT:    ld t3, 264(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor s11, t3, s11
+; RV64IM-NEXT:    ld t3, 240(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a4, t3, a4
+; RV64IM-NEXT:    ld t3, 224(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a3, t3, a3
+; RV64IM-NEXT:    ld t3, 216(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a2, t3, a2
+; RV64IM-NEXT:    ld t3, 208(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a7, t3, a7
+; RV64IM-NEXT:    ld t3, 200(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t6, t3, t6
+; RV64IM-NEXT:    ld t3, 192(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor s5, t3, s5
+; RV64IM-NEXT:    xor a0, s10, a0
+; RV64IM-NEXT:    ld t3, 120(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a1, a1, t3
+; RV64IM-NEXT:    ld t3, 104(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor s10, s11, t3
+; RV64IM-NEXT:    ld t3, 96(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a4, a4, t3
+; RV64IM-NEXT:    ld t3, 88(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a3, a3, t3
+; RV64IM-NEXT:    xor a2, a2, a5
+; RV64IM-NEXT:    xor a5, a7, ra
+; RV64IM-NEXT:    xor a7, t6, s0
+; RV64IM-NEXT:    xor t6, s5, s6
+; RV64IM-NEXT:    ld t3, 232(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a0, a0, t3
+; RV64IM-NEXT:    ld t3, 176(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a1, a1, t3
+; RV64IM-NEXT:    ld t3, 168(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor s0, s10, t3
+; RV64IM-NEXT:    ld t3, 136(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a4, a4, t3
+; RV64IM-NEXT:    ld t3, 128(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a3, a3, t3
+; RV64IM-NEXT:    xor a2, a2, t1
+; RV64IM-NEXT:    xor a5, a5, a6
+; RV64IM-NEXT:    xor a6, a7, s1
+; RV64IM-NEXT:    xor a7, t6, s7
+; RV64IM-NEXT:    ld t1, 256(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t1, s0, t1
+; RV64IM-NEXT:    ld t3, 160(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a3, a3, t3
+; RV64IM-NEXT:    ld t3, 112(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a2, a2, t3
+; RV64IM-NEXT:    xor a5, a5, t4
+; RV64IM-NEXT:    xor a6, a6, s2
+; RV64IM-NEXT:    xor a7, a7, s8
+; RV64IM-NEXT:    xor a1, a0, a1
+; RV64IM-NEXT:    xor a1, a1, t1
+; RV64IM-NEXT:    ld t1, 248(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a3, a3, t1
+; RV64IM-NEXT:    ld t1, 152(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a2, a2, t1
+; RV64IM-NEXT:    xor a5, a5, t5
+; RV64IM-NEXT:    xor a6, a6, s3
+; RV64IM-NEXT:    xor a7, a7, s9
+; RV64IM-NEXT:    xor a1, a1, a4
+; RV64IM-NEXT:    xor a3, a3, t2
+; RV64IM-NEXT:    ld a4, 184(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a2, a2, a4
+; RV64IM-NEXT:    ld a4, 144(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a4, a5, a4
+; RV64IM-NEXT:    xor a5, a6, s4
+; RV64IM-NEXT:    slli a0, a0, 56
+; RV64IM-NEXT:    xor a6, a7, t0
+; RV64IM-NEXT:    ld t0, 304(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a7, a1, t0
+; RV64IM-NEXT:    xor a1, a1, a3
+; RV64IM-NEXT:    slli a7, a7, 40
+; RV64IM-NEXT:    xor a1, a1, a2
+; RV64IM-NEXT:    or a0, a0, a7
+; RV64IM-NEXT:    lui a7, 4080
+; RV64IM-NEXT:    and a2, a1, a7
+; RV64IM-NEXT:    xor a4, a1, a4
+; RV64IM-NEXT:    srli a1, a1, 8
+; RV64IM-NEXT:    slli a2, a2, 24
+; RV64IM-NEXT:    xor a5, a4, a5
+; RV64IM-NEXT:    ld a3, 336(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a1, a1, a3
+; RV64IM-NEXT:    srli a4, a4, 24
+; RV64IM-NEXT:    srliw a3, a5, 24
+; RV64IM-NEXT:    and a4, a4, a7
+; RV64IM-NEXT:    srli a7, a5, 40
+; RV64IM-NEXT:    xor a5, a5, a6
+; RV64IM-NEXT:    slli a3, a3, 32
+; RV64IM-NEXT:    or a1, a1, a4
+; RV64IM-NEXT:    and a4, a7, t0
+; RV64IM-NEXT:    srli a5, a5, 56
+; RV64IM-NEXT:    or a2, a2, a3
+; RV64IM-NEXT:    or a4, a4, a5
+; RV64IM-NEXT:    or a0, a0, a2
+; RV64IM-NEXT:    or a1, a1, a4
+; RV64IM-NEXT:    or a0, a0, a1
+; RV64IM-NEXT:    srli a1, a0, 4
+; RV64IM-NEXT:    ld a2, 312(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a0, a0, a2
+; RV64IM-NEXT:    and a1, a1, a2
+; RV64IM-NEXT:    slli a0, a0, 4
+; RV64IM-NEXT:    or a0, a1, a0
+; RV64IM-NEXT:    srli a1, a0, 2
+; RV64IM-NEXT:    ld a2, 320(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a0, a0, a2
+; RV64IM-NEXT:    and a1, a1, a2
+; RV64IM-NEXT:    slli a0, a0, 2
+; RV64IM-NEXT:    or a0, a1, a0
+; RV64IM-NEXT:    srli a1, a0, 1
+; RV64IM-NEXT:    ld a2, 328(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a0, a0, a2
+; RV64IM-NEXT:    and a1, a1, a2
+; RV64IM-NEXT:    slli a0, a0, 1
+; RV64IM-NEXT:    or a0, a1, a0
+; RV64IM-NEXT:    ld ra, 440(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s0, 432(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s1, 424(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s2, 416(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s3, 408(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s4, 400(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s5, 392(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s6, 384(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s7, 376(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s8, 368(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s9, 360(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s10, 352(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s11, 344(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    addi sp, sp, 448
+; RV64IM-NEXT:    ret
+  %res = call i4 @llvm.clmulr.i4(i4 %a, i4 %b)
+  ret i4 %res
+}
+
+define i8 @clmulr_i8(i8 %a, i8 %b) nounwind {
+; RV32IM-LABEL: clmulr_i8:
+; RV32IM:       # %bb.0:
+; RV32IM-NEXT:    addi sp, sp, -144
+; RV32IM-NEXT:    sw ra, 140(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s0, 136(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s1, 132(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s2, 128(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s3, 124(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s4, 120(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s5, 116(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s6, 112(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s7, 108(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s8, 104(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s9, 100(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s10, 96(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s11, 92(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    srli a3, a0, 8
+; RV32IM-NEXT:    lui s9, 16
+; RV32IM-NEXT:    srli a4, a0, 24
+; RV32IM-NEXT:    slli a2, a0, 24
+; RV32IM-NEXT:    lui a7, 61681
+; RV32IM-NEXT:    lui ra, 209715
+; RV32IM-NEXT:    lui a1, 349525
+; RV32IM-NEXT:    li s0, 1
+; RV32IM-NEXT:    lui t1, 1
+; RV32IM-NEXT:    lui t2, 2
+; RV32IM-NEXT:    lui t3, 4
+; RV32IM-NEXT:    lui t4, 8
+; RV32IM-NEXT:    lui t0, 32
+; RV32IM-NEXT:    lui a6, 64
+; RV32IM-NEXT:    lui a5, 128
+; RV32IM-NEXT:    lui s1, 256
+; RV32IM-NEXT:    lui t5, 512
+; RV32IM-NEXT:    lui t6, 1024
+; RV32IM-NEXT:    lui s4, 2048
+; RV32IM-NEXT:    lui s2, 4096
+; RV32IM-NEXT:    lui s3, 8192
+; RV32IM-NEXT:    lui s7, 16384
+; RV32IM-NEXT:    lui s5, 32768
+; RV32IM-NEXT:    lui s6, 65536
+; RV32IM-NEXT:    lui s11, 131072
+; RV32IM-NEXT:    lui s8, 262144
+; RV32IM-NEXT:    addi s10, s9, -256
+; RV32IM-NEXT:    and a3, a3, s10
+; RV32IM-NEXT:    or a3, a3, a4
+; RV32IM-NEXT:    addi a7, a7, -241
+; RV32IM-NEXT:    sw a7, 80(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    addi a4, ra, 819
+; RV32IM-NEXT:    sw a4, 84(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    addi a1, a1, 1365
+; RV32IM-NEXT:    sw a1, 88(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    slli s0, s0, 11
+; RV32IM-NEXT:    and a0, a0, s10
+; RV32IM-NEXT:    slli a0, a0, 8
+; RV32IM-NEXT:    or a0, a2, a0
+; RV32IM-NEXT:    or a0, a0, a3
+; RV32IM-NEXT:    srli a2, a0, 4
+; RV32IM-NEXT:    and a0, a0, a7
+; RV32IM-NEXT:    and a2, a2, a7
+; RV32IM-NEXT:    slli a0, a0, 4
+; RV32IM-NEXT:    or a0, a2, a0
+; RV32IM-NEXT:    srli a2, a0, 2
+; RV32IM-NEXT:    and a0, a0, a4
+; RV32IM-NEXT:    and a2, a2, a4
+; RV32IM-NEXT:    slli a0, a0, 2
+; RV32IM-NEXT:    or a0, a2, a0
+; RV32IM-NEXT:    srli a2, a0, 1
+; RV32IM-NEXT:    and a0, a0, a1
+; RV32IM-NEXT:    and a2, a2, a1
+; RV32IM-NEXT:    slli a0, a0, 1
+; RV32IM-NEXT:    or a3, a2, a0
+; RV32IM-NEXT:    andi a0, a3, 2
+; RV32IM-NEXT:    andi a1, a3, 1
+; RV32IM-NEXT:    and a4, a3, s0
+; RV32IM-NEXT:    and a7, a3, t1
+; RV32IM-NEXT:    and s0, a3, t2
+; RV32IM-NEXT:    and ra, a3, t3
+; RV32IM-NEXT:    and a2, a3, t4
+; RV32IM-NEXT:    sw a2, 68(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a2, a3, s9
+; RV32IM-NEXT:    sw a2, 64(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a2, a3, t0
+; RV32IM-NEXT:    sw a2, 60(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a6, a3, a6
+; RV32IM-NEXT:    and a5, a3, a5
+; RV32IM-NEXT:    and s1, a3, s1
+; RV32IM-NEXT:    sw s1, 56(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a2, a3, t5
+; RV32IM-NEXT:    sw a2, 52(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and t6, a3, t6
+; RV32IM-NEXT:    and a2, a3, s4
+; RV32IM-NEXT:    sw a2, 48(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and s2, a3, s2
+; RV32IM-NEXT:    and a2, a3, s3
+; RV32IM-NEXT:    sw a2, 44(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a2, a3, s7
+; RV32IM-NEXT:    sw a2, 40(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a2, a3, s5
+; RV32IM-NEXT:    sw a2, 36(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a2, a3, s6
+; RV32IM-NEXT:    sw a2, 32(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a2, a3, s11
+; RV32IM-NEXT:    sw a2, 28(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a2, a3, s8
+; RV32IM-NEXT:    sw a2, 24(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lui a2, 524288
+; RV32IM-NEXT:    and a2, a3, a2
+; RV32IM-NEXT:    sw a2, 20(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul a0, a3, a0
+; RV32IM-NEXT:    sw a0, 72(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul a0, a3, a1
+; RV32IM-NEXT:    sw a0, 16(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    andi a0, a3, 4
+; RV32IM-NEXT:    mul a0, a3, a0
+; RV32IM-NEXT:    sw a0, 12(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    andi a1, a3, 8
+; RV32IM-NEXT:    mul a0, a3, a1
+; RV32IM-NEXT:    sw a0, 0(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    andi a2, a3, 16
+; RV32IM-NEXT:    mul s9, a3, a2
+; RV32IM-NEXT:    andi t0, a3, 32
+; RV32IM-NEXT:    mul s6, a3, t0
+; RV32IM-NEXT:    andi t1, a3, 64
+; RV32IM-NEXT:    mul a0, a3, t1
+; RV32IM-NEXT:    sw a0, 4(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    andi t2, a3, 128
+; RV32IM-NEXT:    mul a0, a3, t2
+; RV32IM-NEXT:    sw a0, 76(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    andi t2, a3, 256
+; RV32IM-NEXT:    mul s1, a3, t2
+; RV32IM-NEXT:    andi t3, a3, 512
+; RV32IM-NEXT:    mul t5, a3, t3
+; RV32IM-NEXT:    andi t4, a3, 1024
+; RV32IM-NEXT:    mul s5, a3, t4
+; RV32IM-NEXT:    mul s8, a3, a4
+; RV32IM-NEXT:    mul a0, a3, a7
+; RV32IM-NEXT:    sw a0, 8(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul t2, a3, s0
+; RV32IM-NEXT:    mul a7, a3, ra
+; RV32IM-NEXT:    lw a0, 68(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul s0, a3, a0
+; RV32IM-NEXT:    lw a0, 64(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul s4, a3, a0
+; RV32IM-NEXT:    lw a0, 60(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul s7, a3, a0
+; RV32IM-NEXT:    mul a0, a3, a6
+; RV32IM-NEXT:    sw a0, 68(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul a6, a3, a5
+; RV32IM-NEXT:    lw a0, 56(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a4, a3, a0
+; RV32IM-NEXT:    lw a0, 52(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul t1, a3, a0
+; RV32IM-NEXT:    mul t4, a3, t6
+; RV32IM-NEXT:    lw a0, 48(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul s3, a3, a0
+; RV32IM-NEXT:    mul a2, a3, s2
+; RV32IM-NEXT:    lw a0, 44(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a1, a3, a0
+; RV32IM-NEXT:    lw a0, 40(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a5, a3, a0
+; RV32IM-NEXT:    lw a0, 36(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul t0, a3, a0
+; RV32IM-NEXT:    lw a0, 32(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul t3, a3, a0
+; RV32IM-NEXT:    lw a0, 28(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul t6, a3, a0
+; RV32IM-NEXT:    lw a0, 24(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul s2, a3, a0
+; RV32IM-NEXT:    lw a0, 20(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a3, a3, a0
+; RV32IM-NEXT:    lw a0, 72(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s11, 16(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a0, s11, a0
+; RV32IM-NEXT:    lw s11, 12(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw ra, 0(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor s11, s11, ra
+; RV32IM-NEXT:    xor s6, s9, s6
+; RV32IM-NEXT:    xor t5, s1, t5
+; RV32IM-NEXT:    xor a7, t2, a7
+; RV32IM-NEXT:    xor a4, a6, a4
+; RV32IM-NEXT:    xor a1, a2, a1
+; RV32IM-NEXT:    xor a0, a0, s11
+; RV32IM-NEXT:    lw a2, 4(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a2, s6, a2
+; RV32IM-NEXT:    xor a6, t5, s5
+; RV32IM-NEXT:    xor a7, a7, s0
+; RV32IM-NEXT:    xor a4, a4, t1
+; RV32IM-NEXT:    xor a1, a1, a5
+; RV32IM-NEXT:    xor a0, a0, a2
+; RV32IM-NEXT:    xor a2, a6, s8
+; RV32IM-NEXT:    xor a5, a7, s4
+; RV32IM-NEXT:    xor a4, a4, t4
+; RV32IM-NEXT:    xor a1, a1, t0
+; RV32IM-NEXT:    lw a6, 76(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a0, a0, a6
+; RV32IM-NEXT:    lw a6, 8(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a2, a2, a6
+; RV32IM-NEXT:    xor a5, a5, s7
+; RV32IM-NEXT:    xor a4, a4, s3
+; RV32IM-NEXT:    xor a1, a1, t3
+; RV32IM-NEXT:    lw a6, 68(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a5, a5, a6
+; RV32IM-NEXT:    xor a1, a1, t6
+; RV32IM-NEXT:    xor a2, a0, a2
+; RV32IM-NEXT:    xor a2, a2, a5
+; RV32IM-NEXT:    slli a0, a0, 24
+; RV32IM-NEXT:    xor a1, a1, s2
+; RV32IM-NEXT:    xor a2, a2, a4
+; RV32IM-NEXT:    xor a1, a1, a3
+; RV32IM-NEXT:    and a3, a2, s10
+; RV32IM-NEXT:    srli a4, a2, 8
+; RV32IM-NEXT:    xor a1, a2, a1
+; RV32IM-NEXT:    slli a3, a3, 8
+; RV32IM-NEXT:    and a2, a4, s10
+; RV32IM-NEXT:    srli a1, a1, 24
+; RV32IM-NEXT:    or a0, a0, a3
+; RV32IM-NEXT:    or a1, a2, a1
+; RV32IM-NEXT:    or a0, a0, a1
+; RV32IM-NEXT:    srli a1, a0, 4
+; RV32IM-NEXT:    lw a2, 80(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    and a0, a0, a2
+; RV32IM-NEXT:    and a1, a1, a2
+; RV32IM-NEXT:    slli a0, a0, 4
+; RV32IM-NEXT:    or a0, a1, a0
+; RV32IM-NEXT:    srli a1, a0, 2
+; RV32IM-NEXT:    lw a2, 84(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    and a0, a0, a2
+; RV32IM-NEXT:    and a1, a1, a2
+; RV32IM-NEXT:    slli a0, a0, 2
+; RV32IM-NEXT:    or a0, a1, a0
+; RV32IM-NEXT:    srli a1, a0, 1
+; RV32IM-NEXT:    lw a2, 88(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    and a0, a0, a2
+; RV32IM-NEXT:    and a1, a1, a2
+; RV32IM-NEXT:    slli a0, a0, 1
+; RV32IM-NEXT:    or a0, a1, a0
+; RV32IM-NEXT:    lw ra, 140(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s0, 136(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s1, 132(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s2, 128(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s3, 124(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s4, 120(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s5, 116(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s6, 112(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s7, 108(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s8, 104(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s9, 100(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s10, 96(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s11, 92(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    addi sp, sp, 144
+; RV32IM-NEXT:    ret
+;
+; RV64IM-LABEL: clmulr_i8:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    addi sp, sp, -448
+; RV64IM-NEXT:    sd ra, 440(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s0, 432(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s1, 424(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s2, 416(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s3, 408(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s4, 400(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s5, 392(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s6, 384(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s7, 376(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s8, 368(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s9, 360(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s10, 352(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s11, 344(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    srli a2, a0, 24
+; RV64IM-NEXT:    srli a6, a0, 8
+; RV64IM-NEXT:    li a3, 255
+; RV64IM-NEXT:    srli a5, a0, 40
+; RV64IM-NEXT:    lui s3, 16
+; RV64IM-NEXT:    srli s0, a0, 56
+; RV64IM-NEXT:    srliw t2, a0, 24
+; RV64IM-NEXT:    slli t0, a0, 56
+; RV64IM-NEXT:    lui t3, 61681
+; RV64IM-NEXT:    lui t4, 209715
+; RV64IM-NEXT:    lui t6, 349525
+; RV64IM-NEXT:    li a7, 1
+; RV64IM-NEXT:    lui s5, 2
+; RV64IM-NEXT:    lui t1, 4
+; RV64IM-NEXT:    lui a4, 128
+; RV64IM-NEXT:    lui s7, 256
+; RV64IM-NEXT:    lui s8, 4096
+; RV64IM-NEXT:    lui s10, 8192
+; RV64IM-NEXT:    lui a1, 4080
+; RV64IM-NEXT:    and a2, a2, a1
+; RV64IM-NEXT:    slli a3, a3, 24
+; RV64IM-NEXT:    sd a3, 336(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    addi s1, s3, -256
+; RV64IM-NEXT:    and t5, a0, a1
+; RV64IM-NEXT:    slli a1, t2, 32
+; RV64IM-NEXT:    addi s9, t3, -241
+; RV64IM-NEXT:    addi t4, t4, 819
+; RV64IM-NEXT:    addi t2, t6, 1365
+; RV64IM-NEXT:    slli t3, a7, 11
+; RV64IM-NEXT:    slli s11, a7, 32
+; RV64IM-NEXT:    slli ra, a7, 33
+; RV64IM-NEXT:    slli t6, a7, 34
+; RV64IM-NEXT:    slli s2, a7, 35
+; RV64IM-NEXT:    slli s4, a7, 36
+; RV64IM-NEXT:    sd s4, 256(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a3, a6, a3
+; RV64IM-NEXT:    or a2, a3, a2
+; RV64IM-NEXT:    slli a3, a7, 37
+; RV64IM-NEXT:    sd a3, 248(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s1, 304(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a3, a5, s1
+; RV64IM-NEXT:    or a3, a3, s0
+; RV64IM-NEXT:    slli a5, a7, 38
+; RV64IM-NEXT:    sd a5, 232(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli t5, t5, 24
+; RV64IM-NEXT:    and a0, a0, s1
+; RV64IM-NEXT:    or a1, t5, a1
+; RV64IM-NEXT:    slli a5, s9, 32
+; RV64IM-NEXT:    add a5, s9, a5
+; RV64IM-NEXT:    slli s0, t4, 32
+; RV64IM-NEXT:    add t4, t4, s0
+; RV64IM-NEXT:    slli s4, t2, 32
+; RV64IM-NEXT:    slli a0, a0, 40
+; RV64IM-NEXT:    add t2, t2, s4
+; RV64IM-NEXT:    or a2, a2, a3
+; RV64IM-NEXT:    or a0, t0, a0
+; RV64IM-NEXT:    or a0, a0, a1
+; RV64IM-NEXT:    or a0, a0, a2
+; RV64IM-NEXT:    srli a1, a0, 4
+; RV64IM-NEXT:    sd a5, 312(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a0, a0, a5
+; RV64IM-NEXT:    and a1, a1, a5
+; RV64IM-NEXT:    slli a0, a0, 4
+; RV64IM-NEXT:    or a0, a1, a0
+; RV64IM-NEXT:    srli a1, a0, 2
+; RV64IM-NEXT:    sd t4, 320(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a0, a0, t4
+; RV64IM-NEXT:    and a1, a1, t4
+; RV64IM-NEXT:    slli a0, a0, 2
+; RV64IM-NEXT:    or a0, a1, a0
+; RV64IM-NEXT:    srli a1, a0, 1
+; RV64IM-NEXT:    sd t2, 328(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a0, a0, t2
+; RV64IM-NEXT:    and a1, a1, t2
+; RV64IM-NEXT:    slli a0, a0, 1
+; RV64IM-NEXT:    or t0, a1, a0
+; RV64IM-NEXT:    andi a0, t0, 2
+; RV64IM-NEXT:    andi a1, t0, 1
+; RV64IM-NEXT:    andi a2, t0, 4
+; RV64IM-NEXT:    andi a3, t0, 8
+; RV64IM-NEXT:    andi a5, t0, 16
+; RV64IM-NEXT:    mul a0, t0, a0
+; RV64IM-NEXT:    mul a1, t0, a1
+; RV64IM-NEXT:    xor a0, a1, a0
+; RV64IM-NEXT:    sd a0, 296(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    andi a0, t0, 32
+; RV64IM-NEXT:    mul a1, t0, a2
+; RV64IM-NEXT:    mul a2, t0, a3
+; RV64IM-NEXT:    xor a1, a1, a2
+; RV64IM-NEXT:    sd a1, 288(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    andi a1, t0, 256
+; RV64IM-NEXT:    mul a2, t0, a5
+; RV64IM-NEXT:    mul a0, t0, a0
+; RV64IM-NEXT:    xor a0, a2, a0
+; RV64IM-NEXT:    sd a0, 280(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    andi a0, t0, 512
+; RV64IM-NEXT:    mul a1, t0, a1
+; RV64IM-NEXT:    mul a0, t0, a0
+; RV64IM-NEXT:    xor a0, a1, a0
+; RV64IM-NEXT:    sd a0, 272(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli t4, a7, 39
+; RV64IM-NEXT:    and a0, t0, s5
+; RV64IM-NEXT:    and a1, t0, t1
+; RV64IM-NEXT:    mul a0, t0, a0
+; RV64IM-NEXT:    mul a1, t0, a1
+; RV64IM-NEXT:    xor a0, a0, a1
+; RV64IM-NEXT:    sd a0, 264(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a0, a7, 40
+; RV64IM-NEXT:    and a1, t0, a4
+; RV64IM-NEXT:    and a2, t0, s7
+; RV64IM-NEXT:    mul a1, t0, a1
+; RV64IM-NEXT:    mul a2, t0, a2
+; RV64IM-NEXT:    xor a1, a1, a2
+; RV64IM-NEXT:    sd a1, 240(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, a7, 41
+; RV64IM-NEXT:    and a2, t0, s8
+; RV64IM-NEXT:    and a3, t0, s10
+; RV64IM-NEXT:    mul a2, t0, a2
+; RV64IM-NEXT:    mul a3, t0, a3
+; RV64IM-NEXT:    xor a2, a2, a3
+; RV64IM-NEXT:    sd a2, 224(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a2, a7, 48
+; RV64IM-NEXT:    and a3, t0, s11
+; RV64IM-NEXT:    and a4, t0, ra
+; RV64IM-NEXT:    mul a3, t0, a3
+; RV64IM-NEXT:    mul a4, t0, a4
+; RV64IM-NEXT:    xor a3, a3, a4
+; RV64IM-NEXT:    sd a3, 216(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a3, a7, 49
+; RV64IM-NEXT:    and a0, t0, a0
+; RV64IM-NEXT:    and a1, t0, a1
+; RV64IM-NEXT:    mul a0, t0, a0
+; RV64IM-NEXT:    mul a1, t0, a1
+; RV64IM-NEXT:    xor a0, a0, a1
+; RV64IM-NEXT:    sd a0, 208(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a0, a7, 56
+; RV64IM-NEXT:    and a1, t0, a2
+; RV64IM-NEXT:    and a2, t0, a3
+; RV64IM-NEXT:    mul a1, t0, a1
+; RV64IM-NEXT:    mul a2, t0, a2
+; RV64IM-NEXT:    xor a1, a1, a2
+; RV64IM-NEXT:    sd a1, 200(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, a7, 57
+; RV64IM-NEXT:    and a0, t0, a0
+; RV64IM-NEXT:    and a1, t0, a1
+; RV64IM-NEXT:    mul a0, t0, a0
+; RV64IM-NEXT:    mul a1, t0, a1
+; RV64IM-NEXT:    xor a0, a0, a1
+; RV64IM-NEXT:    sd a0, 192(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a2, a7, 42
+; RV64IM-NEXT:    slli ra, a7, 43
+; RV64IM-NEXT:    slli a3, a7, 44
+; RV64IM-NEXT:    slli a4, a7, 45
+; RV64IM-NEXT:    slli t5, a7, 46
+; RV64IM-NEXT:    slli s0, a7, 47
+; RV64IM-NEXT:    slli s1, a7, 50
+; RV64IM-NEXT:    slli a0, a7, 51
+; RV64IM-NEXT:    sd a0, 184(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a0, a7, 52
+; RV64IM-NEXT:    sd a0, 176(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a0, a7, 53
+; RV64IM-NEXT:    sd a0, 168(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a0, a7, 54
+; RV64IM-NEXT:    sd a0, 160(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a0, a7, 55
+; RV64IM-NEXT:    sd a0, 152(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a0, a7, 58
+; RV64IM-NEXT:    sd a0, 144(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a0, a7, 59
+; RV64IM-NEXT:    sd a0, 136(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a0, a7, 60
+; RV64IM-NEXT:    sd a0, 120(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a0, a7, 61
+; RV64IM-NEXT:    sd a0, 80(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a7, a7, 62
+; RV64IM-NEXT:    sd a7, 48(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a0, t0, t3
+; RV64IM-NEXT:    sd a0, 128(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui s7, 1
+; RV64IM-NEXT:    and a0, t0, s7
+; RV64IM-NEXT:    sd a0, 112(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui s8, 8
+; RV64IM-NEXT:    and a0, t0, s8
+; RV64IM-NEXT:    sd a0, 104(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a0, t0, s3
+; RV64IM-NEXT:    sd a0, 96(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui s6, 32
+; RV64IM-NEXT:    and a0, t0, s6
+; RV64IM-NEXT:    sd a0, 88(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui s10, 64
+; RV64IM-NEXT:    and a0, t0, s10
+; RV64IM-NEXT:    sd a0, 72(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui s11, 512
+; RV64IM-NEXT:    and a0, t0, s11
+; RV64IM-NEXT:    sd a0, 64(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui s4, 1024
+; RV64IM-NEXT:    and a0, t0, s4
+; RV64IM-NEXT:    sd a0, 56(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui s5, 2048
+; RV64IM-NEXT:    and a0, t0, s5
+; RV64IM-NEXT:    sd a0, 40(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui s9, 16384
+; RV64IM-NEXT:    and a0, t0, s9
+; RV64IM-NEXT:    sd a0, 32(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui a5, 32768
+; RV64IM-NEXT:    and a5, t0, a5
+; RV64IM-NEXT:    lui a6, 65536
+; RV64IM-NEXT:    and a6, t0, a6
+; RV64IM-NEXT:    lui t1, 131072
+; RV64IM-NEXT:    and t1, t0, t1
+; RV64IM-NEXT:    lui t2, 262144
+; RV64IM-NEXT:    and t2, t0, t2
+; RV64IM-NEXT:    and a0, t0, t6
+; RV64IM-NEXT:    sd a0, 24(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a0, t0, s2
+; RV64IM-NEXT:    sd a0, 16(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a0, 256(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a0, t0, a0
+; RV64IM-NEXT:    sd a0, 8(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a0, 248(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a0, t0, a0
+; RV64IM-NEXT:    sd a0, 0(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a0, 232(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a0, t0, a0
+; RV64IM-NEXT:    and a1, t0, t4
+; RV64IM-NEXT:    and a7, t0, a2
+; RV64IM-NEXT:    and ra, t0, ra
+; RV64IM-NEXT:    and t3, t0, a3
+; RV64IM-NEXT:    and t4, t0, a4
+; RV64IM-NEXT:    and t5, t0, t5
+; RV64IM-NEXT:    and t6, t0, s0
+; RV64IM-NEXT:    and s0, t0, s1
+; RV64IM-NEXT:    ld a2, 184(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s1, t0, a2
+; RV64IM-NEXT:    ld a2, 176(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s2, t0, a2
+; RV64IM-NEXT:    ld a2, 168(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s3, t0, a2
+; RV64IM-NEXT:    ld a2, 160(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s4, t0, a2
+; RV64IM-NEXT:    ld a2, 152(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s5, t0, a2
+; RV64IM-NEXT:    ld a2, 144(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s6, t0, a2
+; RV64IM-NEXT:    ld a2, 136(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s7, t0, a2
+; RV64IM-NEXT:    ld a2, 120(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s8, t0, a2
+; RV64IM-NEXT:    ld a2, 80(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s9, t0, a2
+; RV64IM-NEXT:    ld a2, 48(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s10, t0, a2
+; RV64IM-NEXT:    andi s11, t0, 64
+; RV64IM-NEXT:    mul a2, t0, s11
+; RV64IM-NEXT:    sd a2, 80(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    andi s11, t0, 128
+; RV64IM-NEXT:    mul a2, t0, s11
+; RV64IM-NEXT:    sd a2, 232(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    andi s11, t0, 1024
+; RV64IM-NEXT:    mul a2, t0, s11
+; RV64IM-NEXT:    sd a2, 48(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a2, 128(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a2, t0, a2
+; RV64IM-NEXT:    sd a2, 120(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a2, 112(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a2, t0, a2
+; RV64IM-NEXT:    sd a2, 176(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a2, 104(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul s11, t0, a2
+; RV64IM-NEXT:    ld a2, 96(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a2, t0, a2
+; RV64IM-NEXT:    sd a2, 104(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a2, 88(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a2, t0, a2
+; RV64IM-NEXT:    sd a2, 168(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a2, 72(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a2, t0, a2
+; RV64IM-NEXT:    sd a2, 256(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a2, 64(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a4, t0, a2
+; RV64IM-NEXT:    ld a2, 56(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a2, t0, a2
+; RV64IM-NEXT:    sd a2, 96(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a2, 40(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a2, t0, a2
+; RV64IM-NEXT:    sd a2, 136(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a2, 32(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a3, t0, a2
+; RV64IM-NEXT:    mul a2, t0, a5
+; RV64IM-NEXT:    sd a2, 88(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul a2, t0, a6
+; RV64IM-NEXT:    sd a2, 128(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul a2, t0, t1
+; RV64IM-NEXT:    sd a2, 160(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul a2, t0, t2
+; RV64IM-NEXT:    sd a2, 248(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    srliw t2, t0, 31
+; RV64IM-NEXT:    slli t2, t2, 31
+; RV64IM-NEXT:    ld a2, 24(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a2, t0, a2
+; RV64IM-NEXT:    ld a5, 16(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a5, t0, a5
+; RV64IM-NEXT:    ld a6, 8(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul t1, t0, a6
+; RV64IM-NEXT:    ld a6, 0(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a6, t0, a6
+; RV64IM-NEXT:    sd a6, 112(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul a0, t0, a0
+; RV64IM-NEXT:    sd a0, 152(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul a0, t0, a1
+; RV64IM-NEXT:    sd a0, 184(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul a7, t0, a7
+; RV64IM-NEXT:    mul ra, t0, ra
+; RV64IM-NEXT:    mul a6, t0, t3
+; RV64IM-NEXT:    mul t4, t0, t4
+; RV64IM-NEXT:    mul t5, t0, t5
+; RV64IM-NEXT:    mul a0, t0, t6
+; RV64IM-NEXT:    sd a0, 144(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul t6, t0, s0
+; RV64IM-NEXT:    mul s0, t0, s1
+; RV64IM-NEXT:    mul s1, t0, s2
+; RV64IM-NEXT:    mul s2, t0, s3
+; RV64IM-NEXT:    mul s3, t0, s4
+; RV64IM-NEXT:    mul s4, t0, s5
+; RV64IM-NEXT:    mul s5, t0, s6
+; RV64IM-NEXT:    mul s6, t0, s7
+; RV64IM-NEXT:    mul s7, t0, s8
+; RV64IM-NEXT:    mul s8, t0, s9
+; RV64IM-NEXT:    mul s9, t0, s10
+; RV64IM-NEXT:    srli s10, t0, 63
+; RV64IM-NEXT:    slli s10, s10, 63
+; RV64IM-NEXT:    mul t2, t0, t2
+; RV64IM-NEXT:    mul t0, t0, s10
+; RV64IM-NEXT:    ld a0, 296(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld a1, 288(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor s10, a0, a1
+; RV64IM-NEXT:    ld a0, 280(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld a1, 80(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a0, a0, a1
+; RV64IM-NEXT:    ld a1, 272(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld t3, 48(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a1, a1, t3
+; RV64IM-NEXT:    ld t3, 264(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor s11, t3, s11
+; RV64IM-NEXT:    ld t3, 240(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a4, t3, a4
+; RV64IM-NEXT:    ld t3, 224(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a3, t3, a3
+; RV64IM-NEXT:    ld t3, 216(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a2, t3, a2
+; RV64IM-NEXT:    ld t3, 208(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a7, t3, a7
+; RV64IM-NEXT:    ld t3, 200(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t6, t3, t6
+; RV64IM-NEXT:    ld t3, 192(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor s5, t3, s5
+; RV64IM-NEXT:    xor a0, s10, a0
+; RV64IM-NEXT:    ld t3, 120(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a1, a1, t3
+; RV64IM-NEXT:    ld t3, 104(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor s10, s11, t3
+; RV64IM-NEXT:    ld t3, 96(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a4, a4, t3
+; RV64IM-NEXT:    ld t3, 88(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a3, a3, t3
+; RV64IM-NEXT:    xor a2, a2, a5
+; RV64IM-NEXT:    xor a5, a7, ra
+; RV64IM-NEXT:    xor a7, t6, s0
+; RV64IM-NEXT:    xor t6, s5, s6
+; RV64IM-NEXT:    ld t3, 232(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a0, a0, t3
+; RV64IM-NEXT:    ld t3, 176(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a1, a1, t3
+; RV64IM-NEXT:    ld t3, 168(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor s0, s10, t3
+; RV64IM-NEXT:    ld t3, 136(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a4, a4, t3
+; RV64IM-NEXT:    ld t3, 128(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a3, a3, t3
+; RV64IM-NEXT:    xor a2, a2, t1
+; RV64IM-NEXT:    xor a5, a5, a6
+; RV64IM-NEXT:    xor a6, a7, s1
+; RV64IM-NEXT:    xor a7, t6, s7
+; RV64IM-NEXT:    ld t1, 256(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t1, s0, t1
+; RV64IM-NEXT:    ld t3, 160(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a3, a3, t3
+; RV64IM-NEXT:    ld t3, 112(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a2, a2, t3
+; RV64IM-NEXT:    xor a5, a5, t4
+; RV64IM-NEXT:    xor a6, a6, s2
+; RV64IM-NEXT:    xor a7, a7, s8
+; RV64IM-NEXT:    xor a1, a0, a1
+; RV64IM-NEXT:    xor a1, a1, t1
+; RV64IM-NEXT:    ld t1, 248(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a3, a3, t1
+; RV64IM-NEXT:    ld t1, 152(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a2, a2, t1
+; RV64IM-NEXT:    xor a5, a5, t5
+; RV64IM-NEXT:    xor a6, a6, s3
+; RV64IM-NEXT:    xor a7, a7, s9
+; RV64IM-NEXT:    xor a1, a1, a4
+; RV64IM-NEXT:    xor a3, a3, t2
+; RV64IM-NEXT:    ld a4, 184(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a2, a2, a4
+; RV64IM-NEXT:    ld a4, 144(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a4, a5, a4
+; RV64IM-NEXT:    xor a5, a6, s4
+; RV64IM-NEXT:    slli a0, a0, 56
+; RV64IM-NEXT:    xor a6, a7, t0
+; RV64IM-NEXT:    ld t0, 304(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a7, a1, t0
+; RV64IM-NEXT:    xor a1, a1, a3
+; RV64IM-NEXT:    slli a7, a7, 40
+; RV64IM-NEXT:    xor a1, a1, a2
+; RV64IM-NEXT:    or a0, a0, a7
+; RV64IM-NEXT:    lui a7, 4080
+; RV64IM-NEXT:    and a2, a1, a7
+; RV64IM-NEXT:    xor a4, a1, a4
+; RV64IM-NEXT:    srli a1, a1, 8
+; RV64IM-NEXT:    slli a2, a2, 24
+; RV64IM-NEXT:    xor a5, a4, a5
+; RV64IM-NEXT:    ld a3, 336(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a1, a1, a3
+; RV64IM-NEXT:    srli a4, a4, 24
+; RV64IM-NEXT:    srliw a3, a5, 24
+; RV64IM-NEXT:    and a4, a4, a7
+; RV64IM-NEXT:    srli a7, a5, 40
+; RV64IM-NEXT:    xor a5, a5, a6
+; RV64IM-NEXT:    slli a3, a3, 32
+; RV64IM-NEXT:    or a1, a1, a4
+; RV64IM-NEXT:    and a4, a7, t0
+; RV64IM-NEXT:    srli a5, a5, 56
+; RV64IM-NEXT:    or a2, a2, a3
+; RV64IM-NEXT:    or a4, a4, a5
+; RV64IM-NEXT:    or a0, a0, a2
+; RV64IM-NEXT:    or a1, a1, a4
+; RV64IM-NEXT:    or a0, a0, a1
+; RV64IM-NEXT:    srli a1, a0, 4
+; RV64IM-NEXT:    ld a2, 312(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a0, a0, a2
+; RV64IM-NEXT:    and a1, a1, a2
+; RV64IM-NEXT:    slli a0, a0, 4
+; RV64IM-NEXT:    or a0, a1, a0
+; RV64IM-NEXT:    srli a1, a0, 2
+; RV64IM-NEXT:    ld a2, 320(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a0, a0, a2
+; RV64IM-NEXT:    and a1, a1, a2
+; RV64IM-NEXT:    slli a0, a0, 2
+; RV64IM-NEXT:    or a0, a1, a0
+; RV64IM-NEXT:    srli a1, a0, 1
+; RV64IM-NEXT:    ld a2, 328(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a0, a0, a2
+; RV64IM-NEXT:    and a1, a1, a2
+; RV64IM-NEXT:    slli a0, a0, 1
+; RV64IM-NEXT:    or a0, a1, a0
+; RV64IM-NEXT:    ld ra, 440(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s0, 432(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s1, 424(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s2, 416(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s3, 408(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s4, 400(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s5, 392(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s6, 384(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s7, 376(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s8, 368(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s9, 360(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s10, 352(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s11, 344(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    addi sp, sp, 448
+; RV64IM-NEXT:    ret
+  %res = call i8 @llvm.clmulr.i8(i8 %a, i8 %b)
+  ret i8 %res
+}
+
+define i16 @clmulr_i16(i16 %a, i16 %b) nounwind {
+; RV32IM-LABEL: clmulr_i16:
+; RV32IM:       # %bb.0:
+; RV32IM-NEXT:    addi sp, sp, -144
+; RV32IM-NEXT:    sw ra, 140(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s0, 136(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s1, 132(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s2, 128(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s3, 124(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s4, 120(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s5, 116(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s6, 112(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s7, 108(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s8, 104(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s9, 100(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s10, 96(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s11, 92(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    srli a3, a0, 8
+; RV32IM-NEXT:    lui s9, 16
+; RV32IM-NEXT:    srli a4, a0, 24
+; RV32IM-NEXT:    slli a2, a0, 24
+; RV32IM-NEXT:    lui a7, 61681
+; RV32IM-NEXT:    lui ra, 209715
+; RV32IM-NEXT:    lui a1, 349525
+; RV32IM-NEXT:    li s0, 1
+; RV32IM-NEXT:    lui t1, 1
+; RV32IM-NEXT:    lui t2, 2
+; RV32IM-NEXT:    lui t3, 4
+; RV32IM-NEXT:    lui t4, 8
+; RV32IM-NEXT:    lui t0, 32
+; RV32IM-NEXT:    lui a6, 64
+; RV32IM-NEXT:    lui a5, 128
+; RV32IM-NEXT:    lui s1, 256
+; RV32IM-NEXT:    lui t5, 512
+; RV32IM-NEXT:    lui t6, 1024
+; RV32IM-NEXT:    lui s4, 2048
+; RV32IM-NEXT:    lui s2, 4096
+; RV32IM-NEXT:    lui s3, 8192
+; RV32IM-NEXT:    lui s7, 16384
+; RV32IM-NEXT:    lui s5, 32768
+; RV32IM-NEXT:    lui s6, 65536
+; RV32IM-NEXT:    lui s11, 131072
+; RV32IM-NEXT:    lui s8, 262144
+; RV32IM-NEXT:    addi s10, s9, -256
+; RV32IM-NEXT:    and a3, a3, s10
+; RV32IM-NEXT:    or a3, a3, a4
+; RV32IM-NEXT:    addi a7, a7, -241
+; RV32IM-NEXT:    sw a7, 80(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    addi a4, ra, 819
+; RV32IM-NEXT:    sw a4, 84(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    addi a1, a1, 1365
+; RV32IM-NEXT:    sw a1, 88(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    slli s0, s0, 11
+; RV32IM-NEXT:    and a0, a0, s10
+; RV32IM-NEXT:    slli a0, a0, 8
+; RV32IM-NEXT:    or a0, a2, a0
+; RV32IM-NEXT:    or a0, a0, a3
+; RV32IM-NEXT:    srli a2, a0, 4
+; RV32IM-NEXT:    and a0, a0, a7
+; RV32IM-NEXT:    and a2, a2, a7
+; RV32IM-NEXT:    slli a0, a0, 4
+; RV32IM-NEXT:    or a0, a2, a0
+; RV32IM-NEXT:    srli a2, a0, 2
+; RV32IM-NEXT:    and a0, a0, a4
+; RV32IM-NEXT:    and a2, a2, a4
+; RV32IM-NEXT:    slli a0, a0, 2
+; RV32IM-NEXT:    or a0, a2, a0
+; RV32IM-NEXT:    srli a2, a0, 1
+; RV32IM-NEXT:    and a0, a0, a1
+; RV32IM-NEXT:    and a2, a2, a1
+; RV32IM-NEXT:    slli a0, a0, 1
+; RV32IM-NEXT:    or a3, a2, a0
+; RV32IM-NEXT:    andi a0, a3, 2
+; RV32IM-NEXT:    andi a1, a3, 1
+; RV32IM-NEXT:    and a4, a3, s0
+; RV32IM-NEXT:    and a7, a3, t1
+; RV32IM-NEXT:    and s0, a3, t2
+; RV32IM-NEXT:    and ra, a3, t3
+; RV32IM-NEXT:    and a2, a3, t4
+; RV32IM-NEXT:    sw a2, 68(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a2, a3, s9
+; RV32IM-NEXT:    sw a2, 64(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a2, a3, t0
+; RV32IM-NEXT:    sw a2, 60(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a6, a3, a6
+; RV32IM-NEXT:    and a5, a3, a5
+; RV32IM-NEXT:    and s1, a3, s1
+; RV32IM-NEXT:    sw s1, 56(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a2, a3, t5
+; RV32IM-NEXT:    sw a2, 52(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and t6, a3, t6
+; RV32IM-NEXT:    and a2, a3, s4
+; RV32IM-NEXT:    sw a2, 48(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and s2, a3, s2
+; RV32IM-NEXT:    and a2, a3, s3
+; RV32IM-NEXT:    sw a2, 44(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a2, a3, s7
+; RV32IM-NEXT:    sw a2, 40(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a2, a3, s5
+; RV32IM-NEXT:    sw a2, 36(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a2, a3, s6
+; RV32IM-NEXT:    sw a2, 32(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a2, a3, s11
+; RV32IM-NEXT:    sw a2, 28(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a2, a3, s8
+; RV32IM-NEXT:    sw a2, 24(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lui a2, 524288
+; RV32IM-NEXT:    and a2, a3, a2
+; RV32IM-NEXT:    sw a2, 20(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul a0, a3, a0
+; RV32IM-NEXT:    sw a0, 72(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul a0, a3, a1
+; RV32IM-NEXT:    sw a0, 16(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    andi a0, a3, 4
+; RV32IM-NEXT:    mul a0, a3, a0
+; RV32IM-NEXT:    sw a0, 12(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    andi a1, a3, 8
+; RV32IM-NEXT:    mul a0, a3, a1
+; RV32IM-NEXT:    sw a0, 0(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    andi a2, a3, 16
+; RV32IM-NEXT:    mul s9, a3, a2
+; RV32IM-NEXT:    andi t0, a3, 32
+; RV32IM-NEXT:    mul s6, a3, t0
+; RV32IM-NEXT:    andi t1, a3, 64
+; RV32IM-NEXT:    mul a0, a3, t1
+; RV32IM-NEXT:    sw a0, 4(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    andi t2, a3, 128
+; RV32IM-NEXT:    mul a0, a3, t2
+; RV32IM-NEXT:    sw a0, 76(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    andi t2, a3, 256
+; RV32IM-NEXT:    mul s1, a3, t2
+; RV32IM-NEXT:    andi t3, a3, 512
+; RV32IM-NEXT:    mul t5, a3, t3
+; RV32IM-NEXT:    andi t4, a3, 1024
+; RV32IM-NEXT:    mul s5, a3, t4
+; RV32IM-NEXT:    mul s8, a3, a4
+; RV32IM-NEXT:    mul a0, a3, a7
+; RV32IM-NEXT:    sw a0, 8(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul t2, a3, s0
+; RV32IM-NEXT:    mul a7, a3, ra
+; RV32IM-NEXT:    lw a0, 68(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul s0, a3, a0
+; RV32IM-NEXT:    lw a0, 64(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul s4, a3, a0
+; RV32IM-NEXT:    lw a0, 60(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul s7, a3, a0
+; RV32IM-NEXT:    mul a0, a3, a6
+; RV32IM-NEXT:    sw a0, 68(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul a6, a3, a5
+; RV32IM-NEXT:    lw a0, 56(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a4, a3, a0
+; RV32IM-NEXT:    lw a0, 52(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul t1, a3, a0
+; RV32IM-NEXT:    mul t4, a3, t6
+; RV32IM-NEXT:    lw a0, 48(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul s3, a3, a0
+; RV32IM-NEXT:    mul a2, a3, s2
+; RV32IM-NEXT:    lw a0, 44(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a1, a3, a0
+; RV32IM-NEXT:    lw a0, 40(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a5, a3, a0
+; RV32IM-NEXT:    lw a0, 36(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul t0, a3, a0
+; RV32IM-NEXT:    lw a0, 32(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul t3, a3, a0
+; RV32IM-NEXT:    lw a0, 28(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul t6, a3, a0
+; RV32IM-NEXT:    lw a0, 24(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul s2, a3, a0
+; RV32IM-NEXT:    lw a0, 20(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a3, a3, a0
+; RV32IM-NEXT:    lw a0, 72(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s11, 16(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a0, s11, a0
+; RV32IM-NEXT:    lw s11, 12(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw ra, 0(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor s11, s11, ra
+; RV32IM-NEXT:    xor s6, s9, s6
+; RV32IM-NEXT:    xor t5, s1, t5
+; RV32IM-NEXT:    xor a7, t2, a7
+; RV32IM-NEXT:    xor a4, a6, a4
+; RV32IM-NEXT:    xor a1, a2, a1
+; RV32IM-NEXT:    xor a0, a0, s11
+; RV32IM-NEXT:    lw a2, 4(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a2, s6, a2
+; RV32IM-NEXT:    xor a6, t5, s5
+; RV32IM-NEXT:    xor a7, a7, s0
+; RV32IM-NEXT:    xor a4, a4, t1
+; RV32IM-NEXT:    xor a1, a1, a5
+; RV32IM-NEXT:    xor a0, a0, a2
+; RV32IM-NEXT:    xor a2, a6, s8
+; RV32IM-NEXT:    xor a5, a7, s4
+; RV32IM-NEXT:    xor a4, a4, t4
+; RV32IM-NEXT:    xor a1, a1, t0
+; RV32IM-NEXT:    lw a6, 76(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a0, a0, a6
+; RV32IM-NEXT:    lw a6, 8(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a2, a2, a6
+; RV32IM-NEXT:    xor a5, a5, s7
+; RV32IM-NEXT:    xor a4, a4, s3
+; RV32IM-NEXT:    xor a1, a1, t3
+; RV32IM-NEXT:    lw a6, 68(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a5, a5, a6
+; RV32IM-NEXT:    xor a1, a1, t6
+; RV32IM-NEXT:    xor a2, a0, a2
+; RV32IM-NEXT:    xor a2, a2, a5
+; RV32IM-NEXT:    slli a0, a0, 24
+; RV32IM-NEXT:    xor a1, a1, s2
+; RV32IM-NEXT:    xor a2, a2, a4
+; RV32IM-NEXT:    xor a1, a1, a3
+; RV32IM-NEXT:    and a3, a2, s10
+; RV32IM-NEXT:    srli a4, a2, 8
+; RV32IM-NEXT:    xor a1, a2, a1
+; RV32IM-NEXT:    slli a3, a3, 8
+; RV32IM-NEXT:    and a2, a4, s10
+; RV32IM-NEXT:    srli a1, a1, 24
+; RV32IM-NEXT:    or a0, a0, a3
+; RV32IM-NEXT:    or a1, a2, a1
+; RV32IM-NEXT:    or a0, a0, a1
+; RV32IM-NEXT:    srli a1, a0, 4
+; RV32IM-NEXT:    lw a2, 80(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    and a0, a0, a2
+; RV32IM-NEXT:    and a1, a1, a2
+; RV32IM-NEXT:    slli a0, a0, 4
+; RV32IM-NEXT:    or a0, a1, a0
+; RV32IM-NEXT:    srli a1, a0, 2
+; RV32IM-NEXT:    lw a2, 84(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    and a0, a0, a2
+; RV32IM-NEXT:    and a1, a1, a2
+; RV32IM-NEXT:    slli a0, a0, 2
+; RV32IM-NEXT:    or a0, a1, a0
+; RV32IM-NEXT:    srli a1, a0, 1
+; RV32IM-NEXT:    lw a2, 88(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    and a0, a0, a2
+; RV32IM-NEXT:    and a1, a1, a2
+; RV32IM-NEXT:    slli a0, a0, 1
+; RV32IM-NEXT:    or a0, a1, a0
+; RV32IM-NEXT:    lw ra, 140(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s0, 136(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s1, 132(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s2, 128(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s3, 124(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s4, 120(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s5, 116(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s6, 112(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s7, 108(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s8, 104(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s9, 100(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s10, 96(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s11, 92(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    addi sp, sp, 144
+; RV32IM-NEXT:    ret
+;
+; RV64IM-LABEL: clmulr_i16:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    addi sp, sp, -448
+; RV64IM-NEXT:    sd ra, 440(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s0, 432(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s1, 424(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s2, 416(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s3, 408(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s4, 400(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s5, 392(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s6, 384(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s7, 376(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s8, 368(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s9, 360(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s10, 352(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s11, 344(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    srli a2, a0, 24
+; RV64IM-NEXT:    srli a6, a0, 8
+; RV64IM-NEXT:    li a3, 255
+; RV64IM-NEXT:    srli a5, a0, 40
+; RV64IM-NEXT:    lui s3, 16
+; RV64IM-NEXT:    srli s0, a0, 56
+; RV64IM-NEXT:    srliw t2, a0, 24
+; RV64IM-NEXT:    slli t0, a0, 56
+; RV64IM-NEXT:    lui t3, 61681
+; RV64IM-NEXT:    lui t4, 209715
+; RV64IM-NEXT:    lui t6, 349525
+; RV64IM-NEXT:    li a7, 1
+; RV64IM-NEXT:    lui s5, 2
+; RV64IM-NEXT:    lui t1, 4
+; RV64IM-NEXT:    lui a4, 128
+; RV64IM-NEXT:    lui s7, 256
+; RV64IM-NEXT:    lui s8, 4096
+; RV64IM-NEXT:    lui s10, 8192
+; RV64IM-NEXT:    lui a1, 4080
+; RV64IM-NEXT:    and a2, a2, a1
+; RV64IM-NEXT:    slli a3, a3, 24
+; RV64IM-NEXT:    sd a3, 336(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    addi s1, s3, -256
+; RV64IM-NEXT:    and t5, a0, a1
+; RV64IM-NEXT:    slli a1, t2, 32
+; RV64IM-NEXT:    addi s9, t3, -241
+; RV64IM-NEXT:    addi t4, t4, 819
+; RV64IM-NEXT:    addi t2, t6, 1365
+; RV64IM-NEXT:    slli t3, a7, 11
+; RV64IM-NEXT:    slli s11, a7, 32
+; RV64IM-NEXT:    slli ra, a7, 33
+; RV64IM-NEXT:    slli t6, a7, 34
+; RV64IM-NEXT:    slli s2, a7, 35
+; RV64IM-NEXT:    slli s4, a7, 36
+; RV64IM-NEXT:    sd s4, 256(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a3, a6, a3
+; RV64IM-NEXT:    or a2, a3, a2
+; RV64IM-NEXT:    slli a3, a7, 37
+; RV64IM-NEXT:    sd a3, 248(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s1, 304(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a3, a5, s1
+; RV64IM-NEXT:    or a3, a3, s0
+; RV64IM-NEXT:    slli a5, a7, 38
+; RV64IM-NEXT:    sd a5, 232(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli t5, t5, 24
+; RV64IM-NEXT:    and a0, a0, s1
+; RV64IM-NEXT:    or a1, t5, a1
+; RV64IM-NEXT:    slli a5, s9, 32
+; RV64IM-NEXT:    add a5, s9, a5
+; RV64IM-NEXT:    slli s0, t4, 32
+; RV64IM-NEXT:    add t4, t4, s0
+; RV64IM-NEXT:    slli s4, t2, 32
+; RV64IM-NEXT:    slli a0, a0, 40
+; RV64IM-NEXT:    add t2, t2, s4
+; RV64IM-NEXT:    or a2, a2, a3
+; RV64IM-NEXT:    or a0, t0, a0
+; RV64IM-NEXT:    or a0, a0, a1
+; RV64IM-NEXT:    or a0, a0, a2
+; RV64IM-NEXT:    srli a1, a0, 4
+; RV64IM-NEXT:    sd a5, 312(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a0, a0, a5
+; RV64IM-NEXT:    and a1, a1, a5
+; RV64IM-NEXT:    slli a0, a0, 4
+; RV64IM-NEXT:    or a0, a1, a0
+; RV64IM-NEXT:    srli a1, a0, 2
+; RV64IM-NEXT:    sd t4, 320(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a0, a0, t4
+; RV64IM-NEXT:    and a1, a1, t4
+; RV64IM-NEXT:    slli a0, a0, 2
+; RV64IM-NEXT:    or a0, a1, a0
+; RV64IM-NEXT:    srli a1, a0, 1
+; RV64IM-NEXT:    sd t2, 328(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a0, a0, t2
+; RV64IM-NEXT:    and a1, a1, t2
+; RV64IM-NEXT:    slli a0, a0, 1
+; RV64IM-NEXT:    or t0, a1, a0
+; RV64IM-NEXT:    andi a0, t0, 2
+; RV64IM-NEXT:    andi a1, t0, 1
+; RV64IM-NEXT:    andi a2, t0, 4
+; RV64IM-NEXT:    andi a3, t0, 8
+; RV64IM-NEXT:    andi a5, t0, 16
+; RV64IM-NEXT:    mul a0, t0, a0
+; RV64IM-NEXT:    mul a1, t0, a1
+; RV64IM-NEXT:    xor a0, a1, a0
+; RV64IM-NEXT:    sd a0, 296(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    andi a0, t0, 32
+; RV64IM-NEXT:    mul a1, t0, a2
+; RV64IM-NEXT:    mul a2, t0, a3
+; RV64IM-NEXT:    xor a1, a1, a2
+; RV64IM-NEXT:    sd a1, 288(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    andi a1, t0, 256
+; RV64IM-NEXT:    mul a2, t0, a5
+; RV64IM-NEXT:    mul a0, t0, a0
+; RV64IM-NEXT:    xor a0, a2, a0
+; RV64IM-NEXT:    sd a0, 280(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    andi a0, t0, 512
+; RV64IM-NEXT:    mul a1, t0, a1
+; RV64IM-NEXT:    mul a0, t0, a0
+; RV64IM-NEXT:    xor a0, a1, a0
+; RV64IM-NEXT:    sd a0, 272(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli t4, a7, 39
+; RV64IM-NEXT:    and a0, t0, s5
+; RV64IM-NEXT:    and a1, t0, t1
+; RV64IM-NEXT:    mul a0, t0, a0
+; RV64IM-NEXT:    mul a1, t0, a1
+; RV64IM-NEXT:    xor a0, a0, a1
+; RV64IM-NEXT:    sd a0, 264(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a0, a7, 40
+; RV64IM-NEXT:    and a1, t0, a4
+; RV64IM-NEXT:    and a2, t0, s7
+; RV64IM-NEXT:    mul a1, t0, a1
+; RV64IM-NEXT:    mul a2, t0, a2
+; RV64IM-NEXT:    xor a1, a1, a2
+; RV64IM-NEXT:    sd a1, 240(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, a7, 41
+; RV64IM-NEXT:    and a2, t0, s8
+; RV64IM-NEXT:    and a3, t0, s10
+; RV64IM-NEXT:    mul a2, t0, a2
+; RV64IM-NEXT:    mul a3, t0, a3
+; RV64IM-NEXT:    xor a2, a2, a3
+; RV64IM-NEXT:    sd a2, 224(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a2, a7, 48
+; RV64IM-NEXT:    and a3, t0, s11
+; RV64IM-NEXT:    and a4, t0, ra
+; RV64IM-NEXT:    mul a3, t0, a3
+; RV64IM-NEXT:    mul a4, t0, a4
+; RV64IM-NEXT:    xor a3, a3, a4
+; RV64IM-NEXT:    sd a3, 216(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a3, a7, 49
+; RV64IM-NEXT:    and a0, t0, a0
+; RV64IM-NEXT:    and a1, t0, a1
+; RV64IM-NEXT:    mul a0, t0, a0
+; RV64IM-NEXT:    mul a1, t0, a1
+; RV64IM-NEXT:    xor a0, a0, a1
+; RV64IM-NEXT:    sd a0, 208(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a0, a7, 56
+; RV64IM-NEXT:    and a1, t0, a2
+; RV64IM-NEXT:    and a2, t0, a3
+; RV64IM-NEXT:    mul a1, t0, a1
+; RV64IM-NEXT:    mul a2, t0, a2
+; RV64IM-NEXT:    xor a1, a1, a2
+; RV64IM-NEXT:    sd a1, 200(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, a7, 57
+; RV64IM-NEXT:    and a0, t0, a0
+; RV64IM-NEXT:    and a1, t0, a1
+; RV64IM-NEXT:    mul a0, t0, a0
+; RV64IM-NEXT:    mul a1, t0, a1
+; RV64IM-NEXT:    xor a0, a0, a1
+; RV64IM-NEXT:    sd a0, 192(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a2, a7, 42
+; RV64IM-NEXT:    slli ra, a7, 43
+; RV64IM-NEXT:    slli a3, a7, 44
+; RV64IM-NEXT:    slli a4, a7, 45
+; RV64IM-NEXT:    slli t5, a7, 46
+; RV64IM-NEXT:    slli s0, a7, 47
+; RV64IM-NEXT:    slli s1, a7, 50
+; RV64IM-NEXT:    slli a0, a7, 51
+; RV64IM-NEXT:    sd a0, 184(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a0, a7, 52
+; RV64IM-NEXT:    sd a0, 176(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a0, a7, 53
+; RV64IM-NEXT:    sd a0, 168(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a0, a7, 54
+; RV64IM-NEXT:    sd a0, 160(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a0, a7, 55
+; RV64IM-NEXT:    sd a0, 152(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a0, a7, 58
+; RV64IM-NEXT:    sd a0, 144(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a0, a7, 59
+; RV64IM-NEXT:    sd a0, 136(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a0, a7, 60
+; RV64IM-NEXT:    sd a0, 120(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a0, a7, 61
+; RV64IM-NEXT:    sd a0, 80(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a7, a7, 62
+; RV64IM-NEXT:    sd a7, 48(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a0, t0, t3
+; RV64IM-NEXT:    sd a0, 128(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui s7, 1
+; RV64IM-NEXT:    and a0, t0, s7
+; RV64IM-NEXT:    sd a0, 112(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui s8, 8
+; RV64IM-NEXT:    and a0, t0, s8
+; RV64IM-NEXT:    sd a0, 104(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a0, t0, s3
+; RV64IM-NEXT:    sd a0, 96(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui s6, 32
+; RV64IM-NEXT:    and a0, t0, s6
+; RV64IM-NEXT:    sd a0, 88(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui s10, 64
+; RV64IM-NEXT:    and a0, t0, s10
+; RV64IM-NEXT:    sd a0, 72(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui s11, 512
+; RV64IM-NEXT:    and a0, t0, s11
+; RV64IM-NEXT:    sd a0, 64(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui s4, 1024
+; RV64IM-NEXT:    and a0, t0, s4
+; RV64IM-NEXT:    sd a0, 56(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui s5, 2048
+; RV64IM-NEXT:    and a0, t0, s5
+; RV64IM-NEXT:    sd a0, 40(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui s9, 16384
+; RV64IM-NEXT:    and a0, t0, s9
+; RV64IM-NEXT:    sd a0, 32(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui a5, 32768
+; RV64IM-NEXT:    and a5, t0, a5
+; RV64IM-NEXT:    lui a6, 65536
+; RV64IM-NEXT:    and a6, t0, a6
+; RV64IM-NEXT:    lui t1, 131072
+; RV64IM-NEXT:    and t1, t0, t1
+; RV64IM-NEXT:    lui t2, 262144
+; RV64IM-NEXT:    and t2, t0, t2
+; RV64IM-NEXT:    and a0, t0, t6
+; RV64IM-NEXT:    sd a0, 24(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a0, t0, s2
+; RV64IM-NEXT:    sd a0, 16(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a0, 256(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a0, t0, a0
+; RV64IM-NEXT:    sd a0, 8(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a0, 248(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a0, t0, a0
+; RV64IM-NEXT:    sd a0, 0(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a0, 232(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a0, t0, a0
+; RV64IM-NEXT:    and a1, t0, t4
+; RV64IM-NEXT:    and a7, t0, a2
+; RV64IM-NEXT:    and ra, t0, ra
+; RV64IM-NEXT:    and t3, t0, a3
+; RV64IM-NEXT:    and t4, t0, a4
+; RV64IM-NEXT:    and t5, t0, t5
+; RV64IM-NEXT:    and t6, t0, s0
+; RV64IM-NEXT:    and s0, t0, s1
+; RV64IM-NEXT:    ld a2, 184(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s1, t0, a2
+; RV64IM-NEXT:    ld a2, 176(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s2, t0, a2
+; RV64IM-NEXT:    ld a2, 168(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s3, t0, a2
+; RV64IM-NEXT:    ld a2, 160(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s4, t0, a2
+; RV64IM-NEXT:    ld a2, 152(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s5, t0, a2
+; RV64IM-NEXT:    ld a2, 144(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s6, t0, a2
+; RV64IM-NEXT:    ld a2, 136(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s7, t0, a2
+; RV64IM-NEXT:    ld a2, 120(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s8, t0, a2
+; RV64IM-NEXT:    ld a2, 80(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s9, t0, a2
+; RV64IM-NEXT:    ld a2, 48(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s10, t0, a2
+; RV64IM-NEXT:    andi s11, t0, 64
+; RV64IM-NEXT:    mul a2, t0, s11
+; RV64IM-NEXT:    sd a2, 80(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    andi s11, t0, 128
+; RV64IM-NEXT:    mul a2, t0, s11
+; RV64IM-NEXT:    sd a2, 232(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    andi s11, t0, 1024
+; RV64IM-NEXT:    mul a2, t0, s11
+; RV64IM-NEXT:    sd a2, 48(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a2, 128(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a2, t0, a2
+; RV64IM-NEXT:    sd a2, 120(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a2, 112(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a2, t0, a2
+; RV64IM-NEXT:    sd a2, 176(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a2, 104(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul s11, t0, a2
+; RV64IM-NEXT:    ld a2, 96(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a2, t0, a2
+; RV64IM-NEXT:    sd a2, 104(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a2, 88(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a2, t0, a2
+; RV64IM-NEXT:    sd a2, 168(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a2, 72(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a2, t0, a2
+; RV64IM-NEXT:    sd a2, 256(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a2, 64(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a4, t0, a2
+; RV64IM-NEXT:    ld a2, 56(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a2, t0, a2
+; RV64IM-NEXT:    sd a2, 96(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a2, 40(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a2, t0, a2
+; RV64IM-NEXT:    sd a2, 136(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a2, 32(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a3, t0, a2
+; RV64IM-NEXT:    mul a2, t0, a5
+; RV64IM-NEXT:    sd a2, 88(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul a2, t0, a6
+; RV64IM-NEXT:    sd a2, 128(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul a2, t0, t1
+; RV64IM-NEXT:    sd a2, 160(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul a2, t0, t2
+; RV64IM-NEXT:    sd a2, 248(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    srliw t2, t0, 31
+; RV64IM-NEXT:    slli t2, t2, 31
+; RV64IM-NEXT:    ld a2, 24(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a2, t0, a2
+; RV64IM-NEXT:    ld a5, 16(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a5, t0, a5
+; RV64IM-NEXT:    ld a6, 8(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul t1, t0, a6
+; RV64IM-NEXT:    ld a6, 0(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a6, t0, a6
+; RV64IM-NEXT:    sd a6, 112(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul a0, t0, a0
+; RV64IM-NEXT:    sd a0, 152(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul a0, t0, a1
+; RV64IM-NEXT:    sd a0, 184(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul a7, t0, a7
+; RV64IM-NEXT:    mul ra, t0, ra
+; RV64IM-NEXT:    mul a6, t0, t3
+; RV64IM-NEXT:    mul t4, t0, t4
+; RV64IM-NEXT:    mul t5, t0, t5
+; RV64IM-NEXT:    mul a0, t0, t6
+; RV64IM-NEXT:    sd a0, 144(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul t6, t0, s0
+; RV64IM-NEXT:    mul s0, t0, s1
+; RV64IM-NEXT:    mul s1, t0, s2
+; RV64IM-NEXT:    mul s2, t0, s3
+; RV64IM-NEXT:    mul s3, t0, s4
+; RV64IM-NEXT:    mul s4, t0, s5
+; RV64IM-NEXT:    mul s5, t0, s6
+; RV64IM-NEXT:    mul s6, t0, s7
+; RV64IM-NEXT:    mul s7, t0, s8
+; RV64IM-NEXT:    mul s8, t0, s9
+; RV64IM-NEXT:    mul s9, t0, s10
+; RV64IM-NEXT:    srli s10, t0, 63
+; RV64IM-NEXT:    slli s10, s10, 63
+; RV64IM-NEXT:    mul t2, t0, t2
+; RV64IM-NEXT:    mul t0, t0, s10
+; RV64IM-NEXT:    ld a0, 296(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld a1, 288(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor s10, a0, a1
+; RV64IM-NEXT:    ld a0, 280(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld a1, 80(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a0, a0, a1
+; RV64IM-NEXT:    ld a1, 272(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld t3, 48(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a1, a1, t3
+; RV64IM-NEXT:    ld t3, 264(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor s11, t3, s11
+; RV64IM-NEXT:    ld t3, 240(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a4, t3, a4
+; RV64IM-NEXT:    ld t3, 224(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a3, t3, a3
+; RV64IM-NEXT:    ld t3, 216(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a2, t3, a2
+; RV64IM-NEXT:    ld t3, 208(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a7, t3, a7
+; RV64IM-NEXT:    ld t3, 200(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t6, t3, t6
+; RV64IM-NEXT:    ld t3, 192(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor s5, t3, s5
+; RV64IM-NEXT:    xor a0, s10, a0
+; RV64IM-NEXT:    ld t3, 120(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a1, a1, t3
+; RV64IM-NEXT:    ld t3, 104(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor s10, s11, t3
+; RV64IM-NEXT:    ld t3, 96(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a4, a4, t3
+; RV64IM-NEXT:    ld t3, 88(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a3, a3, t3
+; RV64IM-NEXT:    xor a2, a2, a5
+; RV64IM-NEXT:    xor a5, a7, ra
+; RV64IM-NEXT:    xor a7, t6, s0
+; RV64IM-NEXT:    xor t6, s5, s6
+; RV64IM-NEXT:    ld t3, 232(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a0, a0, t3
+; RV64IM-NEXT:    ld t3, 176(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a1, a1, t3
+; RV64IM-NEXT:    ld t3, 168(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor s0, s10, t3
+; RV64IM-NEXT:    ld t3, 136(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a4, a4, t3
+; RV64IM-NEXT:    ld t3, 128(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a3, a3, t3
+; RV64IM-NEXT:    xor a2, a2, t1
+; RV64IM-NEXT:    xor a5, a5, a6
+; RV64IM-NEXT:    xor a6, a7, s1
+; RV64IM-NEXT:    xor a7, t6, s7
+; RV64IM-NEXT:    ld t1, 256(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t1, s0, t1
+; RV64IM-NEXT:    ld t3, 160(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a3, a3, t3
+; RV64IM-NEXT:    ld t3, 112(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a2, a2, t3
+; RV64IM-NEXT:    xor a5, a5, t4
+; RV64IM-NEXT:    xor a6, a6, s2
+; RV64IM-NEXT:    xor a7, a7, s8
+; RV64IM-NEXT:    xor a1, a0, a1
+; RV64IM-NEXT:    xor a1, a1, t1
+; RV64IM-NEXT:    ld t1, 248(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a3, a3, t1
+; RV64IM-NEXT:    ld t1, 152(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a2, a2, t1
+; RV64IM-NEXT:    xor a5, a5, t5
+; RV64IM-NEXT:    xor a6, a6, s3
+; RV64IM-NEXT:    xor a7, a7, s9
+; RV64IM-NEXT:    xor a1, a1, a4
+; RV64IM-NEXT:    xor a3, a3, t2
+; RV64IM-NEXT:    ld a4, 184(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a2, a2, a4
+; RV64IM-NEXT:    ld a4, 144(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a4, a5, a4
+; RV64IM-NEXT:    xor a5, a6, s4
+; RV64IM-NEXT:    slli a0, a0, 56
+; RV64IM-NEXT:    xor a6, a7, t0
+; RV64IM-NEXT:    ld t0, 304(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a7, a1, t0
+; RV64IM-NEXT:    xor a1, a1, a3
+; RV64IM-NEXT:    slli a7, a7, 40
+; RV64IM-NEXT:    xor a1, a1, a2
+; RV64IM-NEXT:    or a0, a0, a7
+; RV64IM-NEXT:    lui a7, 4080
+; RV64IM-NEXT:    and a2, a1, a7
+; RV64IM-NEXT:    xor a4, a1, a4
+; RV64IM-NEXT:    srli a1, a1, 8
+; RV64IM-NEXT:    slli a2, a2, 24
+; RV64IM-NEXT:    xor a5, a4, a5
+; RV64IM-NEXT:    ld a3, 336(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a1, a1, a3
+; RV64IM-NEXT:    srli a4, a4, 24
+; RV64IM-NEXT:    srliw a3, a5, 24
+; RV64IM-NEXT:    and a4, a4, a7
+; RV64IM-NEXT:    srli a7, a5, 40
+; RV64IM-NEXT:    xor a5, a5, a6
+; RV64IM-NEXT:    slli a3, a3, 32
+; RV64IM-NEXT:    or a1, a1, a4
+; RV64IM-NEXT:    and a4, a7, t0
+; RV64IM-NEXT:    srli a5, a5, 56
+; RV64IM-NEXT:    or a2, a2, a3
+; RV64IM-NEXT:    or a4, a4, a5
+; RV64IM-NEXT:    or a0, a0, a2
+; RV64IM-NEXT:    or a1, a1, a4
+; RV64IM-NEXT:    or a0, a0, a1
+; RV64IM-NEXT:    srli a1, a0, 4
+; RV64IM-NEXT:    ld a2, 312(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a0, a0, a2
+; RV64IM-NEXT:    and a1, a1, a2
+; RV64IM-NEXT:    slli a0, a0, 4
+; RV64IM-NEXT:    or a0, a1, a0
+; RV64IM-NEXT:    srli a1, a0, 2
+; RV64IM-NEXT:    ld a2, 320(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a0, a0, a2
+; RV64IM-NEXT:    and a1, a1, a2
+; RV64IM-NEXT:    slli a0, a0, 2
+; RV64IM-NEXT:    or a0, a1, a0
+; RV64IM-NEXT:    srli a1, a0, 1
+; RV64IM-NEXT:    ld a2, 328(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a0, a0, a2
+; RV64IM-NEXT:    and a1, a1, a2
+; RV64IM-NEXT:    slli a0, a0, 1
+; RV64IM-NEXT:    or a0, a1, a0
+; RV64IM-NEXT:    ld ra, 440(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s0, 432(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s1, 424(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s2, 416(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s3, 408(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s4, 400(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s5, 392(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s6, 384(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s7, 376(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s8, 368(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s9, 360(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s10, 352(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s11, 344(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    addi sp, sp, 448
+; RV64IM-NEXT:    ret
+  %res = call i16 @llvm.clmulr.i16(i16 %a, i16 %b)
+  ret i16 %res
+}
+
+define i32 @clmulr_i32(i32 %a, i32 %b) nounwind {
+; RV32IM-LABEL: clmulr_i32:
+; RV32IM:       # %bb.0:
+; RV32IM-NEXT:    addi sp, sp, -144
+; RV32IM-NEXT:    sw ra, 140(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s0, 136(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s1, 132(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s2, 128(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s3, 124(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s4, 120(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s5, 116(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s6, 112(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s7, 108(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s8, 104(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s9, 100(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s10, 96(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s11, 92(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    srli a3, a0, 8
+; RV32IM-NEXT:    lui s9, 16
+; RV32IM-NEXT:    srli a4, a0, 24
+; RV32IM-NEXT:    slli a2, a0, 24
+; RV32IM-NEXT:    lui a7, 61681
+; RV32IM-NEXT:    lui ra, 209715
+; RV32IM-NEXT:    lui a1, 349525
+; RV32IM-NEXT:    li s0, 1
+; RV32IM-NEXT:    lui t1, 1
+; RV32IM-NEXT:    lui t2, 2
+; RV32IM-NEXT:    lui t3, 4
+; RV32IM-NEXT:    lui t4, 8
+; RV32IM-NEXT:    lui t0, 32
+; RV32IM-NEXT:    lui a6, 64
+; RV32IM-NEXT:    lui a5, 128
+; RV32IM-NEXT:    lui s1, 256
+; RV32IM-NEXT:    lui t5, 512
+; RV32IM-NEXT:    lui t6, 1024
+; RV32IM-NEXT:    lui s4, 2048
+; RV32IM-NEXT:    lui s2, 4096
+; RV32IM-NEXT:    lui s3, 8192
+; RV32IM-NEXT:    lui s7, 16384
+; RV32IM-NEXT:    lui s5, 32768
+; RV32IM-NEXT:    lui s6, 65536
+; RV32IM-NEXT:    lui s11, 131072
+; RV32IM-NEXT:    lui s8, 262144
+; RV32IM-NEXT:    addi s10, s9, -256
+; RV32IM-NEXT:    and a3, a3, s10
+; RV32IM-NEXT:    or a3, a3, a4
+; RV32IM-NEXT:    addi a7, a7, -241
+; RV32IM-NEXT:    sw a7, 80(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    addi a4, ra, 819
+; RV32IM-NEXT:    sw a4, 84(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    addi a1, a1, 1365
+; RV32IM-NEXT:    sw a1, 88(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    slli s0, s0, 11
+; RV32IM-NEXT:    and a0, a0, s10
+; RV32IM-NEXT:    slli a0, a0, 8
+; RV32IM-NEXT:    or a0, a2, a0
+; RV32IM-NEXT:    or a0, a0, a3
+; RV32IM-NEXT:    srli a2, a0, 4
+; RV32IM-NEXT:    and a0, a0, a7
+; RV32IM-NEXT:    and a2, a2, a7
+; RV32IM-NEXT:    slli a0, a0, 4
+; RV32IM-NEXT:    or a0, a2, a0
+; RV32IM-NEXT:    srli a2, a0, 2
+; RV32IM-NEXT:    and a0, a0, a4
+; RV32IM-NEXT:    and a2, a2, a4
+; RV32IM-NEXT:    slli a0, a0, 2
+; RV32IM-NEXT:    or a0, a2, a0
+; RV32IM-NEXT:    srli a2, a0, 1
+; RV32IM-NEXT:    and a0, a0, a1
+; RV32IM-NEXT:    and a2, a2, a1
+; RV32IM-NEXT:    slli a0, a0, 1
+; RV32IM-NEXT:    or a3, a2, a0
+; RV32IM-NEXT:    andi a0, a3, 2
+; RV32IM-NEXT:    andi a1, a3, 1
+; RV32IM-NEXT:    and a4, a3, s0
+; RV32IM-NEXT:    and a7, a3, t1
+; RV32IM-NEXT:    and s0, a3, t2
+; RV32IM-NEXT:    and ra, a3, t3
+; RV32IM-NEXT:    and a2, a3, t4
+; RV32IM-NEXT:    sw a2, 68(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a2, a3, s9
+; RV32IM-NEXT:    sw a2, 64(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a2, a3, t0
+; RV32IM-NEXT:    sw a2, 60(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a6, a3, a6
+; RV32IM-NEXT:    and a5, a3, a5
+; RV32IM-NEXT:    and s1, a3, s1
+; RV32IM-NEXT:    sw s1, 56(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a2, a3, t5
+; RV32IM-NEXT:    sw a2, 52(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and t6, a3, t6
+; RV32IM-NEXT:    and a2, a3, s4
+; RV32IM-NEXT:    sw a2, 48(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and s2, a3, s2
+; RV32IM-NEXT:    and a2, a3, s3
+; RV32IM-NEXT:    sw a2, 44(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a2, a3, s7
+; RV32IM-NEXT:    sw a2, 40(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a2, a3, s5
+; RV32IM-NEXT:    sw a2, 36(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a2, a3, s6
+; RV32IM-NEXT:    sw a2, 32(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a2, a3, s11
+; RV32IM-NEXT:    sw a2, 28(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a2, a3, s8
+; RV32IM-NEXT:    sw a2, 24(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lui a2, 524288
+; RV32IM-NEXT:    and a2, a3, a2
+; RV32IM-NEXT:    sw a2, 20(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul a0, a3, a0
+; RV32IM-NEXT:    sw a0, 72(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul a0, a3, a1
+; RV32IM-NEXT:    sw a0, 16(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    andi a0, a3, 4
+; RV32IM-NEXT:    mul a0, a3, a0
+; RV32IM-NEXT:    sw a0, 12(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    andi a1, a3, 8
+; RV32IM-NEXT:    mul a0, a3, a1
+; RV32IM-NEXT:    sw a0, 0(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    andi a2, a3, 16
+; RV32IM-NEXT:    mul s9, a3, a2
+; RV32IM-NEXT:    andi t0, a3, 32
+; RV32IM-NEXT:    mul s6, a3, t0
+; RV32IM-NEXT:    andi t1, a3, 64
+; RV32IM-NEXT:    mul a0, a3, t1
+; RV32IM-NEXT:    sw a0, 4(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    andi t2, a3, 128
+; RV32IM-NEXT:    mul a0, a3, t2
+; RV32IM-NEXT:    sw a0, 76(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    andi t2, a3, 256
+; RV32IM-NEXT:    mul s1, a3, t2
+; RV32IM-NEXT:    andi t3, a3, 512
+; RV32IM-NEXT:    mul t5, a3, t3
+; RV32IM-NEXT:    andi t4, a3, 1024
+; RV32IM-NEXT:    mul s5, a3, t4
+; RV32IM-NEXT:    mul s8, a3, a4
+; RV32IM-NEXT:    mul a0, a3, a7
+; RV32IM-NEXT:    sw a0, 8(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul t2, a3, s0
+; RV32IM-NEXT:    mul a7, a3, ra
+; RV32IM-NEXT:    lw a0, 68(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul s0, a3, a0
+; RV32IM-NEXT:    lw a0, 64(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul s4, a3, a0
+; RV32IM-NEXT:    lw a0, 60(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul s7, a3, a0
+; RV32IM-NEXT:    mul a0, a3, a6
+; RV32IM-NEXT:    sw a0, 68(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul a6, a3, a5
+; RV32IM-NEXT:    lw a0, 56(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a4, a3, a0
+; RV32IM-NEXT:    lw a0, 52(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul t1, a3, a0
+; RV32IM-NEXT:    mul t4, a3, t6
+; RV32IM-NEXT:    lw a0, 48(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul s3, a3, a0
+; RV32IM-NEXT:    mul a2, a3, s2
+; RV32IM-NEXT:    lw a0, 44(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a1, a3, a0
+; RV32IM-NEXT:    lw a0, 40(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a5, a3, a0
+; RV32IM-NEXT:    lw a0, 36(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul t0, a3, a0
+; RV32IM-NEXT:    lw a0, 32(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul t3, a3, a0
+; RV32IM-NEXT:    lw a0, 28(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul t6, a3, a0
+; RV32IM-NEXT:    lw a0, 24(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul s2, a3, a0
+; RV32IM-NEXT:    lw a0, 20(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a3, a3, a0
+; RV32IM-NEXT:    lw a0, 72(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s11, 16(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a0, s11, a0
+; RV32IM-NEXT:    lw s11, 12(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw ra, 0(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor s11, s11, ra
+; RV32IM-NEXT:    xor s6, s9, s6
+; RV32IM-NEXT:    xor t5, s1, t5
+; RV32IM-NEXT:    xor a7, t2, a7
+; RV32IM-NEXT:    xor a4, a6, a4
+; RV32IM-NEXT:    xor a1, a2, a1
+; RV32IM-NEXT:    xor a0, a0, s11
+; RV32IM-NEXT:    lw a2, 4(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a2, s6, a2
+; RV32IM-NEXT:    xor a6, t5, s5
+; RV32IM-NEXT:    xor a7, a7, s0
+; RV32IM-NEXT:    xor a4, a4, t1
+; RV32IM-NEXT:    xor a1, a1, a5
+; RV32IM-NEXT:    xor a0, a0, a2
+; RV32IM-NEXT:    xor a2, a6, s8
+; RV32IM-NEXT:    xor a5, a7, s4
+; RV32IM-NEXT:    xor a4, a4, t4
+; RV32IM-NEXT:    xor a1, a1, t0
+; RV32IM-NEXT:    lw a6, 76(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a0, a0, a6
+; RV32IM-NEXT:    lw a6, 8(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a2, a2, a6
+; RV32IM-NEXT:    xor a5, a5, s7
+; RV32IM-NEXT:    xor a4, a4, s3
+; RV32IM-NEXT:    xor a1, a1, t3
+; RV32IM-NEXT:    lw a6, 68(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a5, a5, a6
+; RV32IM-NEXT:    xor a1, a1, t6
+; RV32IM-NEXT:    xor a2, a0, a2
+; RV32IM-NEXT:    xor a2, a2, a5
+; RV32IM-NEXT:    slli a0, a0, 24
+; RV32IM-NEXT:    xor a1, a1, s2
+; RV32IM-NEXT:    xor a2, a2, a4
+; RV32IM-NEXT:    xor a1, a1, a3
+; RV32IM-NEXT:    and a3, a2, s10
+; RV32IM-NEXT:    srli a4, a2, 8
+; RV32IM-NEXT:    xor a1, a2, a1
+; RV32IM-NEXT:    slli a3, a3, 8
+; RV32IM-NEXT:    and a2, a4, s10
+; RV32IM-NEXT:    srli a1, a1, 24
+; RV32IM-NEXT:    or a0, a0, a3
+; RV32IM-NEXT:    or a1, a2, a1
+; RV32IM-NEXT:    or a0, a0, a1
+; RV32IM-NEXT:    srli a1, a0, 4
+; RV32IM-NEXT:    lw a2, 80(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    and a0, a0, a2
+; RV32IM-NEXT:    and a1, a1, a2
+; RV32IM-NEXT:    slli a0, a0, 4
+; RV32IM-NEXT:    or a0, a1, a0
+; RV32IM-NEXT:    srli a1, a0, 2
+; RV32IM-NEXT:    lw a2, 84(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    and a0, a0, a2
+; RV32IM-NEXT:    and a1, a1, a2
+; RV32IM-NEXT:    slli a0, a0, 2
+; RV32IM-NEXT:    or a0, a1, a0
+; RV32IM-NEXT:    srli a1, a0, 1
+; RV32IM-NEXT:    lw a2, 88(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    and a0, a0, a2
+; RV32IM-NEXT:    and a1, a1, a2
+; RV32IM-NEXT:    slli a0, a0, 1
+; RV32IM-NEXT:    or a0, a1, a0
+; RV32IM-NEXT:    lw ra, 140(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s0, 136(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s1, 132(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s2, 128(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s3, 124(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s4, 120(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s5, 116(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s6, 112(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s7, 108(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s8, 104(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s9, 100(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s10, 96(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s11, 92(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    addi sp, sp, 144
+; RV32IM-NEXT:    ret
+;
+; RV64IM-LABEL: clmulr_i32:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    addi sp, sp, -448
+; RV64IM-NEXT:    sd ra, 440(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s0, 432(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s1, 424(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s2, 416(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s3, 408(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s4, 400(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s5, 392(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s6, 384(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s7, 376(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s8, 368(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s9, 360(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s10, 352(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s11, 344(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    srli a2, a0, 24
+; RV64IM-NEXT:    srli a6, a0, 8
+; RV64IM-NEXT:    li a3, 255
+; RV64IM-NEXT:    srli a5, a0, 40
+; RV64IM-NEXT:    lui s3, 16
+; RV64IM-NEXT:    srli s0, a0, 56
+; RV64IM-NEXT:    srliw t2, a0, 24
+; RV64IM-NEXT:    slli t0, a0, 56
+; RV64IM-NEXT:    lui t3, 61681
+; RV64IM-NEXT:    lui t4, 209715
+; RV64IM-NEXT:    lui t6, 349525
+; RV64IM-NEXT:    li a7, 1
+; RV64IM-NEXT:    lui s5, 2
+; RV64IM-NEXT:    lui t1, 4
+; RV64IM-NEXT:    lui a4, 128
+; RV64IM-NEXT:    lui s7, 256
+; RV64IM-NEXT:    lui s8, 4096
+; RV64IM-NEXT:    lui s10, 8192
+; RV64IM-NEXT:    lui a1, 4080
+; RV64IM-NEXT:    and a2, a2, a1
+; RV64IM-NEXT:    slli a3, a3, 24
+; RV64IM-NEXT:    sd a3, 336(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    addi s1, s3, -256
+; RV64IM-NEXT:    and t5, a0, a1
+; RV64IM-NEXT:    slli a1, t2, 32
+; RV64IM-NEXT:    addi s9, t3, -241
+; RV64IM-NEXT:    addi t4, t4, 819
+; RV64IM-NEXT:    addi t2, t6, 1365
+; RV64IM-NEXT:    slli t3, a7, 11
+; RV64IM-NEXT:    slli s11, a7, 32
+; RV64IM-NEXT:    slli ra, a7, 33
+; RV64IM-NEXT:    slli t6, a7, 34
+; RV64IM-NEXT:    slli s2, a7, 35
+; RV64IM-NEXT:    slli s4, a7, 36
+; RV64IM-NEXT:    sd s4, 256(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a3, a6, a3
+; RV64IM-NEXT:    or a2, a3, a2
+; RV64IM-NEXT:    slli a3, a7, 37
+; RV64IM-NEXT:    sd a3, 248(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s1, 304(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a3, a5, s1
+; RV64IM-NEXT:    or a3, a3, s0
+; RV64IM-NEXT:    slli a5, a7, 38
+; RV64IM-NEXT:    sd a5, 232(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli t5, t5, 24
+; RV64IM-NEXT:    and a0, a0, s1
+; RV64IM-NEXT:    or a1, t5, a1
+; RV64IM-NEXT:    slli a5, s9, 32
+; RV64IM-NEXT:    add a5, s9, a5
+; RV64IM-NEXT:    slli s0, t4, 32
+; RV64IM-NEXT:    add t4, t4, s0
+; RV64IM-NEXT:    slli s4, t2, 32
+; RV64IM-NEXT:    slli a0, a0, 40
+; RV64IM-NEXT:    add t2, t2, s4
+; RV64IM-NEXT:    or a2, a2, a3
+; RV64IM-NEXT:    or a0, t0, a0
+; RV64IM-NEXT:    or a0, a0, a1
+; RV64IM-NEXT:    or a0, a0, a2
+; RV64IM-NEXT:    srli a1, a0, 4
+; RV64IM-NEXT:    sd a5, 312(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a0, a0, a5
+; RV64IM-NEXT:    and a1, a1, a5
+; RV64IM-NEXT:    slli a0, a0, 4
+; RV64IM-NEXT:    or a0, a1, a0
+; RV64IM-NEXT:    srli a1, a0, 2
+; RV64IM-NEXT:    sd t4, 320(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a0, a0, t4
+; RV64IM-NEXT:    and a1, a1, t4
+; RV64IM-NEXT:    slli a0, a0, 2
+; RV64IM-NEXT:    or a0, a1, a0
+; RV64IM-NEXT:    srli a1, a0, 1
+; RV64IM-NEXT:    sd t2, 328(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a0, a0, t2
+; RV64IM-NEXT:    and a1, a1, t2
+; RV64IM-NEXT:    slli a0, a0, 1
+; RV64IM-NEXT:    or t0, a1, a0
+; RV64IM-NEXT:    andi a0, t0, 2
+; RV64IM-NEXT:    andi a1, t0, 1
+; RV64IM-NEXT:    andi a2, t0, 4
+; RV64IM-NEXT:    andi a3, t0, 8
+; RV64IM-NEXT:    andi a5, t0, 16
+; RV64IM-NEXT:    mul a0, t0, a0
+; RV64IM-NEXT:    mul a1, t0, a1
+; RV64IM-NEXT:    xor a0, a1, a0
+; RV64IM-NEXT:    sd a0, 296(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    andi a0, t0, 32
+; RV64IM-NEXT:    mul a1, t0, a2
+; RV64IM-NEXT:    mul a2, t0, a3
+; RV64IM-NEXT:    xor a1, a1, a2
+; RV64IM-NEXT:    sd a1, 288(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    andi a1, t0, 256
+; RV64IM-NEXT:    mul a2, t0, a5
+; RV64IM-NEXT:    mul a0, t0, a0
+; RV64IM-NEXT:    xor a0, a2, a0
+; RV64IM-NEXT:    sd a0, 280(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    andi a0, t0, 512
+; RV64IM-NEXT:    mul a1, t0, a1
+; RV64IM-NEXT:    mul a0, t0, a0
+; RV64IM-NEXT:    xor a0, a1, a0
+; RV64IM-NEXT:    sd a0, 272(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli t4, a7, 39
+; RV64IM-NEXT:    and a0, t0, s5
+; RV64IM-NEXT:    and a1, t0, t1
+; RV64IM-NEXT:    mul a0, t0, a0
+; RV64IM-NEXT:    mul a1, t0, a1
+; RV64IM-NEXT:    xor a0, a0, a1
+; RV64IM-NEXT:    sd a0, 264(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a0, a7, 40
+; RV64IM-NEXT:    and a1, t0, a4
+; RV64IM-NEXT:    and a2, t0, s7
+; RV64IM-NEXT:    mul a1, t0, a1
+; RV64IM-NEXT:    mul a2, t0, a2
+; RV64IM-NEXT:    xor a1, a1, a2
+; RV64IM-NEXT:    sd a1, 240(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, a7, 41
+; RV64IM-NEXT:    and a2, t0, s8
+; RV64IM-NEXT:    and a3, t0, s10
+; RV64IM-NEXT:    mul a2, t0, a2
+; RV64IM-NEXT:    mul a3, t0, a3
+; RV64IM-NEXT:    xor a2, a2, a3
+; RV64IM-NEXT:    sd a2, 224(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a2, a7, 48
+; RV64IM-NEXT:    and a3, t0, s11
+; RV64IM-NEXT:    and a4, t0, ra
+; RV64IM-NEXT:    mul a3, t0, a3
+; RV64IM-NEXT:    mul a4, t0, a4
+; RV64IM-NEXT:    xor a3, a3, a4
+; RV64IM-NEXT:    sd a3, 216(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a3, a7, 49
+; RV64IM-NEXT:    and a0, t0, a0
+; RV64IM-NEXT:    and a1, t0, a1
+; RV64IM-NEXT:    mul a0, t0, a0
+; RV64IM-NEXT:    mul a1, t0, a1
+; RV64IM-NEXT:    xor a0, a0, a1
+; RV64IM-NEXT:    sd a0, 208(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a0, a7, 56
+; RV64IM-NEXT:    and a1, t0, a2
+; RV64IM-NEXT:    and a2, t0, a3
+; RV64IM-NEXT:    mul a1, t0, a1
+; RV64IM-NEXT:    mul a2, t0, a2
+; RV64IM-NEXT:    xor a1, a1, a2
+; RV64IM-NEXT:    sd a1, 200(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, a7, 57
+; RV64IM-NEXT:    and a0, t0, a0
+; RV64IM-NEXT:    and a1, t0, a1
+; RV64IM-NEXT:    mul a0, t0, a0
+; RV64IM-NEXT:    mul a1, t0, a1
+; RV64IM-NEXT:    xor a0, a0, a1
+; RV64IM-NEXT:    sd a0, 192(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a2, a7, 42
+; RV64IM-NEXT:    slli ra, a7, 43
+; RV64IM-NEXT:    slli a3, a7, 44
+; RV64IM-NEXT:    slli a4, a7, 45
+; RV64IM-NEXT:    slli t5, a7, 46
+; RV64IM-NEXT:    slli s0, a7, 47
+; RV64IM-NEXT:    slli s1, a7, 50
+; RV64IM-NEXT:    slli a0, a7, 51
+; RV64IM-NEXT:    sd a0, 184(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a0, a7, 52
+; RV64IM-NEXT:    sd a0, 176(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a0, a7, 53
+; RV64IM-NEXT:    sd a0, 168(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a0, a7, 54
+; RV64IM-NEXT:    sd a0, 160(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a0, a7, 55
+; RV64IM-NEXT:    sd a0, 152(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a0, a7, 58
+; RV64IM-NEXT:    sd a0, 144(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a0, a7, 59
+; RV64IM-NEXT:    sd a0, 136(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a0, a7, 60
+; RV64IM-NEXT:    sd a0, 120(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a0, a7, 61
+; RV64IM-NEXT:    sd a0, 80(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a7, a7, 62
+; RV64IM-NEXT:    sd a7, 48(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a0, t0, t3
+; RV64IM-NEXT:    sd a0, 128(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui s7, 1
+; RV64IM-NEXT:    and a0, t0, s7
+; RV64IM-NEXT:    sd a0, 112(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui s8, 8
+; RV64IM-NEXT:    and a0, t0, s8
+; RV64IM-NEXT:    sd a0, 104(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a0, t0, s3
+; RV64IM-NEXT:    sd a0, 96(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui s6, 32
+; RV64IM-NEXT:    and a0, t0, s6
+; RV64IM-NEXT:    sd a0, 88(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui s10, 64
+; RV64IM-NEXT:    and a0, t0, s10
+; RV64IM-NEXT:    sd a0, 72(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui s11, 512
+; RV64IM-NEXT:    and a0, t0, s11
+; RV64IM-NEXT:    sd a0, 64(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui s4, 1024
+; RV64IM-NEXT:    and a0, t0, s4
+; RV64IM-NEXT:    sd a0, 56(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui s5, 2048
+; RV64IM-NEXT:    and a0, t0, s5
+; RV64IM-NEXT:    sd a0, 40(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui s9, 16384
+; RV64IM-NEXT:    and a0, t0, s9
+; RV64IM-NEXT:    sd a0, 32(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui a5, 32768
+; RV64IM-NEXT:    and a5, t0, a5
+; RV64IM-NEXT:    lui a6, 65536
+; RV64IM-NEXT:    and a6, t0, a6
+; RV64IM-NEXT:    lui t1, 131072
+; RV64IM-NEXT:    and t1, t0, t1
+; RV64IM-NEXT:    lui t2, 262144
+; RV64IM-NEXT:    and t2, t0, t2
+; RV64IM-NEXT:    and a0, t0, t6
+; RV64IM-NEXT:    sd a0, 24(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a0, t0, s2
+; RV64IM-NEXT:    sd a0, 16(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a0, 256(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a0, t0, a0
+; RV64IM-NEXT:    sd a0, 8(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a0, 248(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a0, t0, a0
+; RV64IM-NEXT:    sd a0, 0(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a0, 232(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a0, t0, a0
+; RV64IM-NEXT:    and a1, t0, t4
+; RV64IM-NEXT:    and a7, t0, a2
+; RV64IM-NEXT:    and ra, t0, ra
+; RV64IM-NEXT:    and t3, t0, a3
+; RV64IM-NEXT:    and t4, t0, a4
+; RV64IM-NEXT:    and t5, t0, t5
+; RV64IM-NEXT:    and t6, t0, s0
+; RV64IM-NEXT:    and s0, t0, s1
+; RV64IM-NEXT:    ld a2, 184(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s1, t0, a2
+; RV64IM-NEXT:    ld a2, 176(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s2, t0, a2
+; RV64IM-NEXT:    ld a2, 168(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s3, t0, a2
+; RV64IM-NEXT:    ld a2, 160(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s4, t0, a2
+; RV64IM-NEXT:    ld a2, 152(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s5, t0, a2
+; RV64IM-NEXT:    ld a2, 144(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s6, t0, a2
+; RV64IM-NEXT:    ld a2, 136(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s7, t0, a2
+; RV64IM-NEXT:    ld a2, 120(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s8, t0, a2
+; RV64IM-NEXT:    ld a2, 80(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s9, t0, a2
+; RV64IM-NEXT:    ld a2, 48(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s10, t0, a2
+; RV64IM-NEXT:    andi s11, t0, 64
+; RV64IM-NEXT:    mul a2, t0, s11
+; RV64IM-NEXT:    sd a2, 80(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    andi s11, t0, 128
+; RV64IM-NEXT:    mul a2, t0, s11
+; RV64IM-NEXT:    sd a2, 232(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    andi s11, t0, 1024
+; RV64IM-NEXT:    mul a2, t0, s11
+; RV64IM-NEXT:    sd a2, 48(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a2, 128(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a2, t0, a2
+; RV64IM-NEXT:    sd a2, 120(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a2, 112(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a2, t0, a2
+; RV64IM-NEXT:    sd a2, 176(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a2, 104(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul s11, t0, a2
+; RV64IM-NEXT:    ld a2, 96(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a2, t0, a2
+; RV64IM-NEXT:    sd a2, 104(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a2, 88(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a2, t0, a2
+; RV64IM-NEXT:    sd a2, 168(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a2, 72(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a2, t0, a2
+; RV64IM-NEXT:    sd a2, 256(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a2, 64(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a4, t0, a2
+; RV64IM-NEXT:    ld a2, 56(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a2, t0, a2
+; RV64IM-NEXT:    sd a2, 96(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a2, 40(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a2, t0, a2
+; RV64IM-NEXT:    sd a2, 136(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a2, 32(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a3, t0, a2
+; RV64IM-NEXT:    mul a2, t0, a5
+; RV64IM-NEXT:    sd a2, 88(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul a2, t0, a6
+; RV64IM-NEXT:    sd a2, 128(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul a2, t0, t1
+; RV64IM-NEXT:    sd a2, 160(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul a2, t0, t2
+; RV64IM-NEXT:    sd a2, 248(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    srliw t2, t0, 31
+; RV64IM-NEXT:    slli t2, t2, 31
+; RV64IM-NEXT:    ld a2, 24(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a2, t0, a2
+; RV64IM-NEXT:    ld a5, 16(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a5, t0, a5
+; RV64IM-NEXT:    ld a6, 8(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul t1, t0, a6
+; RV64IM-NEXT:    ld a6, 0(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a6, t0, a6
+; RV64IM-NEXT:    sd a6, 112(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul a0, t0, a0
+; RV64IM-NEXT:    sd a0, 152(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul a0, t0, a1
+; RV64IM-NEXT:    sd a0, 184(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul a7, t0, a7
+; RV64IM-NEXT:    mul ra, t0, ra
+; RV64IM-NEXT:    mul a6, t0, t3
+; RV64IM-NEXT:    mul t4, t0, t4
+; RV64IM-NEXT:    mul t5, t0, t5
+; RV64IM-NEXT:    mul a0, t0, t6
+; RV64IM-NEXT:    sd a0, 144(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul t6, t0, s0
+; RV64IM-NEXT:    mul s0, t0, s1
+; RV64IM-NEXT:    mul s1, t0, s2
+; RV64IM-NEXT:    mul s2, t0, s3
+; RV64IM-NEXT:    mul s3, t0, s4
+; RV64IM-NEXT:    mul s4, t0, s5
+; RV64IM-NEXT:    mul s5, t0, s6
+; RV64IM-NEXT:    mul s6, t0, s7
+; RV64IM-NEXT:    mul s7, t0, s8
+; RV64IM-NEXT:    mul s8, t0, s9
+; RV64IM-NEXT:    mul s9, t0, s10
+; RV64IM-NEXT:    srli s10, t0, 63
+; RV64IM-NEXT:    slli s10, s10, 63
+; RV64IM-NEXT:    mul t2, t0, t2
+; RV64IM-NEXT:    mul t0, t0, s10
+; RV64IM-NEXT:    ld a0, 296(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld a1, 288(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor s10, a0, a1
+; RV64IM-NEXT:    ld a0, 280(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld a1, 80(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a0, a0, a1
+; RV64IM-NEXT:    ld a1, 272(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld t3, 48(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a1, a1, t3
+; RV64IM-NEXT:    ld t3, 264(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor s11, t3, s11
+; RV64IM-NEXT:    ld t3, 240(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a4, t3, a4
+; RV64IM-NEXT:    ld t3, 224(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a3, t3, a3
+; RV64IM-NEXT:    ld t3, 216(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a2, t3, a2
+; RV64IM-NEXT:    ld t3, 208(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a7, t3, a7
+; RV64IM-NEXT:    ld t3, 200(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t6, t3, t6
+; RV64IM-NEXT:    ld t3, 192(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor s5, t3, s5
+; RV64IM-NEXT:    xor a0, s10, a0
+; RV64IM-NEXT:    ld t3, 120(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a1, a1, t3
+; RV64IM-NEXT:    ld t3, 104(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor s10, s11, t3
+; RV64IM-NEXT:    ld t3, 96(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a4, a4, t3
+; RV64IM-NEXT:    ld t3, 88(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a3, a3, t3
+; RV64IM-NEXT:    xor a2, a2, a5
+; RV64IM-NEXT:    xor a5, a7, ra
+; RV64IM-NEXT:    xor a7, t6, s0
+; RV64IM-NEXT:    xor t6, s5, s6
+; RV64IM-NEXT:    ld t3, 232(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a0, a0, t3
+; RV64IM-NEXT:    ld t3, 176(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a1, a1, t3
+; RV64IM-NEXT:    ld t3, 168(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor s0, s10, t3
+; RV64IM-NEXT:    ld t3, 136(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a4, a4, t3
+; RV64IM-NEXT:    ld t3, 128(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a3, a3, t3
+; RV64IM-NEXT:    xor a2, a2, t1
+; RV64IM-NEXT:    xor a5, a5, a6
+; RV64IM-NEXT:    xor a6, a7, s1
+; RV64IM-NEXT:    xor a7, t6, s7
+; RV64IM-NEXT:    ld t1, 256(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t1, s0, t1
+; RV64IM-NEXT:    ld t3, 160(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a3, a3, t3
+; RV64IM-NEXT:    ld t3, 112(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a2, a2, t3
+; RV64IM-NEXT:    xor a5, a5, t4
+; RV64IM-NEXT:    xor a6, a6, s2
+; RV64IM-NEXT:    xor a7, a7, s8
+; RV64IM-NEXT:    xor a1, a0, a1
+; RV64IM-NEXT:    xor a1, a1, t1
+; RV64IM-NEXT:    ld t1, 248(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a3, a3, t1
+; RV64IM-NEXT:    ld t1, 152(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a2, a2, t1
+; RV64IM-NEXT:    xor a5, a5, t5
+; RV64IM-NEXT:    xor a6, a6, s3
+; RV64IM-NEXT:    xor a7, a7, s9
+; RV64IM-NEXT:    xor a1, a1, a4
+; RV64IM-NEXT:    xor a3, a3, t2
+; RV64IM-NEXT:    ld a4, 184(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a2, a2, a4
+; RV64IM-NEXT:    ld a4, 144(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a4, a5, a4
+; RV64IM-NEXT:    xor a5, a6, s4
+; RV64IM-NEXT:    slli a0, a0, 56
+; RV64IM-NEXT:    xor a6, a7, t0
+; RV64IM-NEXT:    ld t0, 304(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a7, a1, t0
+; RV64IM-NEXT:    xor a1, a1, a3
+; RV64IM-NEXT:    slli a7, a7, 40
+; RV64IM-NEXT:    xor a1, a1, a2
+; RV64IM-NEXT:    or a0, a0, a7
+; RV64IM-NEXT:    lui a7, 4080
+; RV64IM-NEXT:    and a2, a1, a7
+; RV64IM-NEXT:    xor a4, a1, a4
+; RV64IM-NEXT:    srli a1, a1, 8
+; RV64IM-NEXT:    slli a2, a2, 24
+; RV64IM-NEXT:    xor a5, a4, a5
+; RV64IM-NEXT:    ld a3, 336(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a1, a1, a3
+; RV64IM-NEXT:    srli a4, a4, 24
+; RV64IM-NEXT:    srliw a3, a5, 24
+; RV64IM-NEXT:    and a4, a4, a7
+; RV64IM-NEXT:    srli a7, a5, 40
+; RV64IM-NEXT:    xor a5, a5, a6
+; RV64IM-NEXT:    slli a3, a3, 32
+; RV64IM-NEXT:    or a1, a1, a4
+; RV64IM-NEXT:    and a4, a7, t0
+; RV64IM-NEXT:    srli a5, a5, 56
+; RV64IM-NEXT:    or a2, a2, a3
+; RV64IM-NEXT:    or a4, a4, a5
+; RV64IM-NEXT:    or a0, a0, a2
+; RV64IM-NEXT:    or a1, a1, a4
+; RV64IM-NEXT:    or a0, a0, a1
+; RV64IM-NEXT:    srli a1, a0, 4
+; RV64IM-NEXT:    ld a2, 312(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a0, a0, a2
+; RV64IM-NEXT:    and a1, a1, a2
+; RV64IM-NEXT:    slli a0, a0, 4
+; RV64IM-NEXT:    or a0, a1, a0
+; RV64IM-NEXT:    srli a1, a0, 2
+; RV64IM-NEXT:    ld a2, 320(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a0, a0, a2
+; RV64IM-NEXT:    and a1, a1, a2
+; RV64IM-NEXT:    slli a0, a0, 2
+; RV64IM-NEXT:    or a0, a1, a0
+; RV64IM-NEXT:    srli a1, a0, 1
+; RV64IM-NEXT:    ld a2, 328(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a0, a0, a2
+; RV64IM-NEXT:    and a1, a1, a2
+; RV64IM-NEXT:    slli a0, a0, 1
+; RV64IM-NEXT:    or a0, a1, a0
+; RV64IM-NEXT:    ld ra, 440(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s0, 432(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s1, 424(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s2, 416(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s3, 408(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s4, 400(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s5, 392(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s6, 384(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s7, 376(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s8, 368(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s9, 360(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s10, 352(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s11, 344(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    addi sp, sp, 448
+; RV64IM-NEXT:    ret
+  %res = call i32 @llvm.clmulr.i32(i32 %a, i32 %b)
+  ret i32 %res
+}
+
+define i64 @clmulr_i64(i64 %a, i64 %b) nounwind {
+; RV32IM-LABEL: clmulr_i64:
+; RV32IM:       # %bb.0:
+; RV32IM-NEXT:    addi sp, sp, -512
+; RV32IM-NEXT:    sw ra, 508(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s0, 504(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s1, 500(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s2, 496(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s3, 492(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s4, 488(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s5, 484(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s6, 480(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s7, 476(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s8, 472(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s9, 468(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s10, 464(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s11, 460(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    srli t3, a0, 8
+; RV32IM-NEXT:    lui s8, 16
+; RV32IM-NEXT:    srli t4, a0, 24
+; RV32IM-NEXT:    slli s2, a0, 24
+; RV32IM-NEXT:    lui t5, 61681
+; RV32IM-NEXT:    lui t6, 209715
+; RV32IM-NEXT:    lui s0, 349525
+; RV32IM-NEXT:    srli s4, a1, 8
+; RV32IM-NEXT:    srli s1, a1, 24
+; RV32IM-NEXT:    slli s3, a1, 24
+; RV32IM-NEXT:    li s10, 1
+; RV32IM-NEXT:    lui a3, 1
+; RV32IM-NEXT:    lui a4, 2
+; RV32IM-NEXT:    lui a5, 4
+; RV32IM-NEXT:    lui a6, 8
+; RV32IM-NEXT:    lui a7, 32
+; RV32IM-NEXT:    lui t0, 64
+; RV32IM-NEXT:    lui t1, 128
+; RV32IM-NEXT:    lui t2, 256
+; RV32IM-NEXT:    lui a2, 512
+; RV32IM-NEXT:    addi s7, s8, -256
+; RV32IM-NEXT:    sw s7, 396(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    addi s6, t5, -241
+; RV32IM-NEXT:    addi s5, t6, 819
+; RV32IM-NEXT:    addi t6, s0, 1365
+; RV32IM-NEXT:    slli s10, s10, 11
+; RV32IM-NEXT:    and t3, t3, s7
+; RV32IM-NEXT:    and a0, a0, s7
+; RV32IM-NEXT:    and t5, s4, s7
+; RV32IM-NEXT:    and a1, a1, s7
+; RV32IM-NEXT:    or t3, t3, t4
+; RV32IM-NEXT:    slli a0, a0, 8
+; RV32IM-NEXT:    or t4, t5, s1
+; RV32IM-NEXT:    slli a1, a1, 8
+; RV32IM-NEXT:    or a0, s2, a0
+; RV32IM-NEXT:    or a1, s3, a1
+; RV32IM-NEXT:    or a0, a0, t3
+; RV32IM-NEXT:    or a1, a1, t4
+; RV32IM-NEXT:    srli t3, a0, 4
+; RV32IM-NEXT:    sw s6, 400(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a0, a0, s6
+; RV32IM-NEXT:    srli t4, a1, 4
+; RV32IM-NEXT:    and a1, a1, s6
+; RV32IM-NEXT:    and t3, t3, s6
+; RV32IM-NEXT:    slli a0, a0, 4
+; RV32IM-NEXT:    and t4, t4, s6
+; RV32IM-NEXT:    slli a1, a1, 4
+; RV32IM-NEXT:    or a0, t3, a0
+; RV32IM-NEXT:    or a1, t4, a1
+; RV32IM-NEXT:    srli t3, a0, 2
+; RV32IM-NEXT:    sw s5, 404(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a0, a0, s5
+; RV32IM-NEXT:    srli t4, a1, 2
+; RV32IM-NEXT:    and a1, a1, s5
+; RV32IM-NEXT:    and t3, t3, s5
+; RV32IM-NEXT:    slli a0, a0, 2
+; RV32IM-NEXT:    and t4, t4, s5
+; RV32IM-NEXT:    slli a1, a1, 2
+; RV32IM-NEXT:    or a0, t3, a0
+; RV32IM-NEXT:    or a1, t4, a1
+; RV32IM-NEXT:    srli t3, a0, 1
+; RV32IM-NEXT:    sw t6, 408(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a0, a0, t6
+; RV32IM-NEXT:    srli t4, a1, 1
+; RV32IM-NEXT:    and a1, a1, t6
+; RV32IM-NEXT:    and t3, t3, t6
+; RV32IM-NEXT:    slli a0, a0, 1
+; RV32IM-NEXT:    and t4, t4, t6
+; RV32IM-NEXT:    slli a1, a1, 1
+; RV32IM-NEXT:    or s2, t3, a0
+; RV32IM-NEXT:    or a0, t4, a1
+; RV32IM-NEXT:    and a1, a0, s10
+; RV32IM-NEXT:    sw a1, 432(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a1, a0, a3
+; RV32IM-NEXT:    sw a1, 436(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a1, a0, a4
+; RV32IM-NEXT:    sw a1, 440(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a1, a0, a5
+; RV32IM-NEXT:    sw a1, 340(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a1, a0, a6
+; RV32IM-NEXT:    sw a1, 412(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a1, a0, s8
+; RV32IM-NEXT:    sw a1, 444(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a1, a0, a7
+; RV32IM-NEXT:    sw a1, 452(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and ra, a0, t0
+; RV32IM-NEXT:    and a1, a0, t1
+; RV32IM-NEXT:    sw a1, 344(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a1, a0, t2
+; RV32IM-NEXT:    sw a1, 448(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a1, a0, a2
+; RV32IM-NEXT:    sw a1, 456(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a1, s2, s10
+; RV32IM-NEXT:    sw a1, 384(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a1, s2, a3
+; RV32IM-NEXT:    sw a1, 380(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a1, s2, a4
+; RV32IM-NEXT:    sw a1, 376(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a1, s2, a5
+; RV32IM-NEXT:    sw a1, 368(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a1, s2, a6
+; RV32IM-NEXT:    sw a1, 348(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a1, s2, s8
+; RV32IM-NEXT:    sw a1, 336(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a1, s2, a7
+; RV32IM-NEXT:    sw a1, 324(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a1, s2, t0
+; RV32IM-NEXT:    sw a1, 320(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a1, s2, t1
+; RV32IM-NEXT:    sw a1, 312(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a1, s2, t2
+; RV32IM-NEXT:    sw a1, 308(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a1, s2, a2
+; RV32IM-NEXT:    sw a1, 300(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lui a1, 1024
+; RV32IM-NEXT:    and a2, a0, a1
+; RV32IM-NEXT:    sw a2, 424(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a1, s2, a1
+; RV32IM-NEXT:    sw a1, 164(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lui a1, 2048
+; RV32IM-NEXT:    and a2, a0, a1
+; RV32IM-NEXT:    sw a2, 428(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a1, s2, a1
+; RV32IM-NEXT:    sw a1, 136(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lui a1, 4096
+; RV32IM-NEXT:    and a2, a0, a1
+; RV32IM-NEXT:    sw a2, 416(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a1, s2, a1
+; RV32IM-NEXT:    sw a1, 132(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lui a1, 8192
+; RV32IM-NEXT:    and s1, a0, a1
+; RV32IM-NEXT:    sw s1, 108(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a1, s2, a1
+; RV32IM-NEXT:    sw a1, 128(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lui a1, 16384
+; RV32IM-NEXT:    and a2, a0, a1
+; RV32IM-NEXT:    sw a2, 48(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a1, s2, a1
+; RV32IM-NEXT:    sw a1, 112(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lui a1, 32768
+; RV32IM-NEXT:    and a2, a0, a1
+; RV32IM-NEXT:    sw a2, 420(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a1, s2, a1
+; RV32IM-NEXT:    sw a1, 104(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lui a1, 65536
+; RV32IM-NEXT:    and t3, a0, a1
+; RV32IM-NEXT:    sw t3, 116(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a1, s2, a1
+; RV32IM-NEXT:    sw a1, 100(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lui a1, 131072
+; RV32IM-NEXT:    and a2, a0, a1
+; RV32IM-NEXT:    sw a2, 16(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a1, s2, a1
+; RV32IM-NEXT:    sw a1, 72(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lui a1, 262144
+; RV32IM-NEXT:    and t2, a0, a1
+; RV32IM-NEXT:    sw t2, 120(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a1, s2, a1
+; RV32IM-NEXT:    sw a1, 68(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lui a1, 524288
+; RV32IM-NEXT:    and t1, a0, a1
+; RV32IM-NEXT:    sw t1, 124(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a1, s2, a1
+; RV32IM-NEXT:    sw a1, 64(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    andi t0, a0, 4
+; RV32IM-NEXT:    sw t0, 96(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    andi t4, a0, 2
+; RV32IM-NEXT:    andi a7, a0, 1
+; RV32IM-NEXT:    sw a7, 92(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    andi t5, a0, 8
+; RV32IM-NEXT:    andi a6, a0, 16
+; RV32IM-NEXT:    sw a6, 84(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    andi a5, a0, 32
+; RV32IM-NEXT:    sw a5, 80(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    andi a4, a0, 64
+; RV32IM-NEXT:    sw a4, 76(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    andi a3, a0, 128
+; RV32IM-NEXT:    sw a3, 88(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    andi a2, a0, 256
+; RV32IM-NEXT:    andi a1, a0, 512
+; RV32IM-NEXT:    andi s11, a0, 1024
+; RV32IM-NEXT:    andi s3, s2, 1
+; RV32IM-NEXT:    andi s5, s2, 2
+; RV32IM-NEXT:    andi s7, s2, 4
+; RV32IM-NEXT:    andi t6, s2, 8
+; RV32IM-NEXT:    andi s0, s2, 16
+; RV32IM-NEXT:    sw s0, 392(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    andi s0, s2, 32
+; RV32IM-NEXT:    andi s4, s2, 64
+; RV32IM-NEXT:    andi s6, s2, 128
+; RV32IM-NEXT:    andi s8, s2, 256
+; RV32IM-NEXT:    andi s9, s2, 512
+; RV32IM-NEXT:    andi s10, s2, 1024
+; RV32IM-NEXT:    sw s10, 360(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul s10, s2, t0
+; RV32IM-NEXT:    sw s10, 292(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul s10, s2, t4
+; RV32IM-NEXT:    sw s10, 288(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul s10, s2, a7
+; RV32IM-NEXT:    sw s10, 332(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul s10, s2, t5
+; RV32IM-NEXT:    sw s10, 284(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul s10, s2, a6
+; RV32IM-NEXT:    sw s10, 280(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul s10, s2, a5
+; RV32IM-NEXT:    sw s10, 276(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul s10, s2, a4
+; RV32IM-NEXT:    sw s10, 272(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul s10, s2, a3
+; RV32IM-NEXT:    sw s10, 268(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul s10, s2, a2
+; RV32IM-NEXT:    mv t0, a2
+; RV32IM-NEXT:    sw s10, 264(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul s10, s2, a1
+; RV32IM-NEXT:    mv a7, a1
+; RV32IM-NEXT:    sw s10, 260(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul s10, s2, s11
+; RV32IM-NEXT:    mv a6, s11
+; RV32IM-NEXT:    sw s10, 256(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw s10, 432(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul s10, s2, s10
+; RV32IM-NEXT:    sw s10, 252(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw s10, 436(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul s10, s2, s10
+; RV32IM-NEXT:    sw s10, 248(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw s10, 440(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul s10, s2, s10
+; RV32IM-NEXT:    sw s10, 244(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw s10, 340(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul s11, s2, s10
+; RV32IM-NEXT:    sw s11, 240(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw s11, 412(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul s11, s2, s11
+; RV32IM-NEXT:    sw s11, 236(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw s11, 444(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul s11, s2, s11
+; RV32IM-NEXT:    sw s11, 232(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw s11, 452(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul s11, s2, s11
+; RV32IM-NEXT:    sw s11, 228(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul s11, s2, ra
+; RV32IM-NEXT:    sw s11, 224(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mv a5, ra
+; RV32IM-NEXT:    lw s11, 344(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul ra, s2, s11
+; RV32IM-NEXT:    sw ra, 220(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw ra, 448(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul ra, s2, ra
+; RV32IM-NEXT:    sw ra, 216(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw ra, 456(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul ra, s2, ra
+; RV32IM-NEXT:    sw ra, 212(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw ra, 424(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul ra, s2, ra
+; RV32IM-NEXT:    sw ra, 208(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw ra, 428(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul ra, s2, ra
+; RV32IM-NEXT:    sw ra, 204(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw ra, 416(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul ra, s2, ra
+; RV32IM-NEXT:    sw ra, 200(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul ra, s2, s1
+; RV32IM-NEXT:    sw ra, 196(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw ra, 48(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul s1, s2, ra
+; RV32IM-NEXT:    sw s1, 192(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw s1, 420(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul s1, s2, s1
+; RV32IM-NEXT:    sw s1, 188(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul s1, s2, t3
+; RV32IM-NEXT:    sw s1, 184(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw s1, 16(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a4, s2, s1
+; RV32IM-NEXT:    sw a4, 180(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul a4, s2, t2
+; RV32IM-NEXT:    sw a4, 176(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul a3, s2, t1
+; RV32IM-NEXT:    sw a3, 172(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul s2, a0, s3
+; RV32IM-NEXT:    sw s2, 352(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul s2, a0, s5
+; RV32IM-NEXT:    sw s2, 364(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul s2, a0, s7
+; RV32IM-NEXT:    sw s2, 372(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul a4, a0, t6
+; RV32IM-NEXT:    sw a4, 388(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw a1, 392(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a4, a0, a1
+; RV32IM-NEXT:    sw a4, 392(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul a2, a0, s0
+; RV32IM-NEXT:    sw a2, 160(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul a2, a0, s4
+; RV32IM-NEXT:    sw a2, 156(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul a2, a0, s6
+; RV32IM-NEXT:    sw a2, 304(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul a2, a0, s8
+; RV32IM-NEXT:    sw a2, 152(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul a2, a0, s9
+; RV32IM-NEXT:    sw a2, 148(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw a1, 360(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a2, a0, a1
+; RV32IM-NEXT:    sw a2, 296(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw a2, 384(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a2, a0, a2
+; RV32IM-NEXT:    sw a2, 316(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw a2, 380(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a4, a0, a2
+; RV32IM-NEXT:    sw a4, 328(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw a2, 376(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a4, a0, a2
+; RV32IM-NEXT:    sw a4, 356(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw a2, 368(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a4, a0, a2
+; RV32IM-NEXT:    sw a4, 360(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw a2, 348(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a4, a0, a2
+; RV32IM-NEXT:    sw a4, 368(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw a2, 336(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a4, a0, a2
+; RV32IM-NEXT:    sw a4, 376(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw a2, 324(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a4, a0, a2
+; RV32IM-NEXT:    sw a4, 380(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw a2, 320(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a4, a0, a2
+; RV32IM-NEXT:    sw a4, 384(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw a2, 312(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a2, a0, a2
+; RV32IM-NEXT:    sw a2, 144(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw a2, 308(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a2, a0, a2
+; RV32IM-NEXT:    sw a2, 140(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw a2, 300(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a2, a0, a2
+; RV32IM-NEXT:    sw a2, 168(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw a2, 164(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a2, a0, a2
+; RV32IM-NEXT:    sw a2, 308(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw a2, 136(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a2, a0, a2
+; RV32IM-NEXT:    sw a2, 320(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw a1, 132(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a2, a0, a1
+; RV32IM-NEXT:    sw a2, 132(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw a1, 128(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a2, a0, a1
+; RV32IM-NEXT:    sw a2, 128(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw a1, 112(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a2, a0, a1
+; RV32IM-NEXT:    sw a2, 164(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw a1, 104(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a2, a0, a1
+; RV32IM-NEXT:    sw a2, 300(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw a1, 100(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a2, a0, a1
+; RV32IM-NEXT:    sw a2, 312(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw a1, 72(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a4, a0, a1
+; RV32IM-NEXT:    sw a4, 324(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw a1, 68(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a4, a0, a1
+; RV32IM-NEXT:    sw a4, 336(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw a1, 64(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a4, a0, a1
+; RV32IM-NEXT:    sw a4, 348(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mulhu t6, a0, t4
+; RV32IM-NEXT:    mul a1, a0, t4
+; RV32IM-NEXT:    sw a1, 104(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw a1, 92(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a1, a0, a1
+; RV32IM-NEXT:    sw a1, 100(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw a1, 96(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mulhu t2, a0, a1
+; RV32IM-NEXT:    mul a1, a0, a1
+; RV32IM-NEXT:    sw a1, 96(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mulhu t3, a0, t5
+; RV32IM-NEXT:    mul a1, a0, t5
+; RV32IM-NEXT:    sw a1, 92(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw a1, 84(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mulhu t4, a0, a1
+; RV32IM-NEXT:    mul a1, a0, a1
+; RV32IM-NEXT:    sw a1, 84(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw a1, 80(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mulhu s0, a0, a1
+; RV32IM-NEXT:    mul a1, a0, a1
+; RV32IM-NEXT:    sw a1, 80(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw a1, 76(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mulhu s2, a0, a1
+; RV32IM-NEXT:    mul a1, a0, a1
+; RV32IM-NEXT:    sw a1, 112(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw a1, 88(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mulhu s3, a0, a1
+; RV32IM-NEXT:    mul a1, a0, a1
+; RV32IM-NEXT:    sw a1, 136(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mulhu a2, a0, t0
+; RV32IM-NEXT:    sw a2, 64(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul a1, a0, t0
+; RV32IM-NEXT:    sw a1, 76(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mulhu a2, a0, a7
+; RV32IM-NEXT:    sw a2, 60(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul a1, a0, a7
+; RV32IM-NEXT:    sw a1, 72(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mulhu a2, a0, a6
+; RV32IM-NEXT:    sw a2, 52(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul a1, a0, a6
+; RV32IM-NEXT:    sw a1, 88(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw a1, 432(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mulhu a2, a0, a1
+; RV32IM-NEXT:    sw a2, 40(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul a1, a0, a1
+; RV32IM-NEXT:    sw a1, 432(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw a1, 436(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mulhu a2, a0, a1
+; RV32IM-NEXT:    sw a2, 36(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul a1, a0, a1
+; RV32IM-NEXT:    sw a1, 436(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw a1, 440(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mulhu a2, a0, a1
+; RV32IM-NEXT:    sw a2, 32(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul a1, a0, a1
+; RV32IM-NEXT:    sw a1, 68(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mulhu a2, a0, s10
+; RV32IM-NEXT:    sw a2, 28(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul a1, a0, s10
+; RV32IM-NEXT:    sw a1, 340(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw a1, 412(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mulhu a2, a0, a1
+; RV32IM-NEXT:    sw a2, 24(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul a1, a0, a1
+; RV32IM-NEXT:    sw a1, 412(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw a1, 444(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mulhu a2, a0, a1
+; RV32IM-NEXT:    sw a2, 20(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul a1, a0, a1
+; RV32IM-NEXT:    sw a1, 440(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw a1, 452(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mulhu a2, a0, a1
+; RV32IM-NEXT:    sw a2, 12(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul a1, a0, a1
+; RV32IM-NEXT:    sw a1, 444(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mulhu a2, a0, a5
+; RV32IM-NEXT:    sw a2, 8(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul a1, a0, a5
+; RV32IM-NEXT:    sw a1, 452(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mv a1, s11
+; RV32IM-NEXT:    mulhu s11, a0, s11
+; RV32IM-NEXT:    mul a1, a0, a1
+; RV32IM-NEXT:    sw a1, 56(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw a1, 448(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mulhu a5, a0, a1
+; RV32IM-NEXT:    mul a1, a0, a1
+; RV32IM-NEXT:    sw a1, 44(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw a1, 456(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mulhu s10, a0, a1
+; RV32IM-NEXT:    mul a1, a0, a1
+; RV32IM-NEXT:    sw a1, 344(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw a1, 424(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mulhu s8, a0, a1
+; RV32IM-NEXT:    mul a1, a0, a1
+; RV32IM-NEXT:    sw a1, 424(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw a1, 428(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mulhu s9, a0, a1
+; RV32IM-NEXT:    mul a1, a0, a1
+; RV32IM-NEXT:    sw a1, 456(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw a1, 416(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mulhu a7, a0, a1
+; RV32IM-NEXT:    mul a1, a0, a1
+; RV32IM-NEXT:    sw a1, 4(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw a1, 108(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mulhu a3, a0, a1
+; RV32IM-NEXT:    mul a1, a0, a1
+; RV32IM-NEXT:    sw a1, 0(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mulhu a2, a0, ra
+; RV32IM-NEXT:    mul a1, a0, ra
+; RV32IM-NEXT:    sw a1, 48(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw a1, 420(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mulhu t5, a0, a1
+; RV32IM-NEXT:    mul a1, a0, a1
+; RV32IM-NEXT:    sw a1, 108(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw a1, 116(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mulhu t0, a0, a1
+; RV32IM-NEXT:    mul a1, a0, a1
+; RV32IM-NEXT:    sw a1, 416(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mulhu a6, a0, s1
+; RV32IM-NEXT:    mul a1, a0, s1
+; RV32IM-NEXT:    sw a1, 420(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw a1, 120(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mulhu a4, a0, a1
+; RV32IM-NEXT:    mul a1, a0, a1
+; RV32IM-NEXT:    sw a1, 428(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw t1, 124(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mulhu a1, a0, t1
+; RV32IM-NEXT:    mul a0, a0, t1
+; RV32IM-NEXT:    sw a0, 448(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw a0, 292(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    or a0, t2, a0
+; RV32IM-NEXT:    sw a0, 116(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw a0, 288(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    or s7, t6, a0
+; RV32IM-NEXT:    lw a0, 284(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    or s5, t3, a0
+; RV32IM-NEXT:    lw a0, 280(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    or s6, t4, a0
+; RV32IM-NEXT:    lw a0, 276(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    or s4, s0, a0
+; RV32IM-NEXT:    lw a0, 272(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    or a0, s2, a0
+; RV32IM-NEXT:    sw a0, 124(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw a0, 268(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    or s3, s3, a0
+; RV32IM-NEXT:    lw a0, 264(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw t1, 64(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    or s2, t1, a0
+; RV32IM-NEXT:    lw a0, 260(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw t1, 60(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    or a0, t1, a0
+; RV32IM-NEXT:    sw a0, 120(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw a0, 256(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw t1, 52(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    or a0, t1, a0
+; RV32IM-NEXT:    sw a0, 272(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw a0, 252(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s0, 40(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    or s0, s0, a0
+; RV32IM-NEXT:    lw a0, 248(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw t1, 36(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    or t6, t1, a0
+; RV32IM-NEXT:    lw a0, 244(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw t1, 32(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    or a0, t1, a0
+; RV32IM-NEXT:    sw a0, 252(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw a0, 240(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw t1, 28(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    or a0, t1, a0
+; RV32IM-NEXT:    sw a0, 264(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw a0, 236(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw t1, 24(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    or a0, t1, a0
+; RV32IM-NEXT:    sw a0, 284(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw a0, 232(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw t1, 20(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    or t4, t1, a0
+; RV32IM-NEXT:    lw a0, 228(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw t1, 12(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    or t3, t1, a0
+; RV32IM-NEXT:    lw a0, 224(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw t1, 8(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    or a0, t1, a0
+; RV32IM-NEXT:    sw a0, 248(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw a0, 220(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    or a0, s11, a0
+; RV32IM-NEXT:    sw a0, 260(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw a0, 216(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    or a0, a5, a0
+; RV32IM-NEXT:    sw a0, 276(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw a0, 212(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    or a0, s10, a0
+; RV32IM-NEXT:    sw a0, 288(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw a0, 208(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    or s8, s8, a0
+; RV32IM-NEXT:    lw a0, 204(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    or s10, s9, a0
+; RV32IM-NEXT:    lw a0, 200(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    or s11, a7, a0
+; RV32IM-NEXT:    lw a0, 196(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    or a0, a3, a0
+; RV32IM-NEXT:    sw a0, 256(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw a0, 192(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    or a0, a2, a0
+; RV32IM-NEXT:    sw a0, 268(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw a0, 188(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    or a0, t5, a0
+; RV32IM-NEXT:    sw a0, 280(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw a0, 184(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    or a0, t0, a0
+; RV32IM-NEXT:    sw a0, 292(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw a0, 180(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    or t2, a6, a0
+; RV32IM-NEXT:    lw a0, 176(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    or t1, a4, a0
+; RV32IM-NEXT:    lw s1, 172(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    or s1, a1, s1
+; RV32IM-NEXT:    lw a0, 160(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw a1, 156(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor t0, a0, a1
+; RV32IM-NEXT:    lw a0, 152(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw a1, 148(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor t5, a0, a1
+; RV32IM-NEXT:    lw a0, 144(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw a1, 140(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a6, a0, a1
+; RV32IM-NEXT:    lw a0, 132(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw a1, 128(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor s9, a0, a1
+; RV32IM-NEXT:    lw a0, 104(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw a1, 100(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a7, a1, a0
+; RV32IM-NEXT:    lw a0, 96(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw a1, 92(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a0, a0, a1
+; RV32IM-NEXT:    lw a1, 84(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw a2, 80(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a1, a1, a2
+; RV32IM-NEXT:    lw a2, 76(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw a3, 72(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a2, a2, a3
+; RV32IM-NEXT:    lw a3, 68(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw a4, 340(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a3, a3, a4
+; RV32IM-NEXT:    lw a4, 56(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw a5, 44(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a4, a4, a5
+; RV32IM-NEXT:    lw a5, 4(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw ra, 0(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a5, a5, ra
+; RV32IM-NEXT:    lw ra, 332(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor s7, ra, s7
+; RV32IM-NEXT:    lw ra, 116(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor s5, ra, s5
+; RV32IM-NEXT:    xor s4, s6, s4
+; RV32IM-NEXT:    xor s2, s3, s2
+; RV32IM-NEXT:    xor t6, s0, t6
+; RV32IM-NEXT:    xor t3, t4, t3
+; RV32IM-NEXT:    xor t4, s8, s10
+; RV32IM-NEXT:    xor t1, t2, t1
+; RV32IM-NEXT:    lw t2, 304(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor t0, t0, t2
+; RV32IM-NEXT:    lw t2, 296(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor t2, t5, t2
+; RV32IM-NEXT:    lw t5, 168(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a6, a6, t5
+; RV32IM-NEXT:    lw t5, 164(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor t5, s9, t5
+; RV32IM-NEXT:    xor a0, a7, a0
+; RV32IM-NEXT:    lw a7, 112(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a1, a1, a7
+; RV32IM-NEXT:    lw a7, 88(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a2, a2, a7
+; RV32IM-NEXT:    lw a7, 412(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a3, a3, a7
+; RV32IM-NEXT:    lw a7, 344(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a4, a4, a7
+; RV32IM-NEXT:    lw a7, 48(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a5, a5, a7
+; RV32IM-NEXT:    xor a7, s7, s5
+; RV32IM-NEXT:    lw s0, 124(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor s0, s4, s0
+; RV32IM-NEXT:    lw s3, 120(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor s2, s2, s3
+; RV32IM-NEXT:    lw s3, 252(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor t6, t6, s3
+; RV32IM-NEXT:    lw s3, 248(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor t3, t3, s3
+; RV32IM-NEXT:    xor t4, t4, s11
+; RV32IM-NEXT:    xor t1, t1, s1
+; RV32IM-NEXT:    lw s1, 316(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor t2, t2, s1
+; RV32IM-NEXT:    lw s1, 308(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a6, a6, s1
+; RV32IM-NEXT:    lw s1, 300(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor t5, t5, s1
+; RV32IM-NEXT:    xor a0, a0, a1
+; RV32IM-NEXT:    lw a1, 432(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a1, a2, a1
+; RV32IM-NEXT:    lw a2, 440(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a2, a3, a2
+; RV32IM-NEXT:    lw a3, 424(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a3, a4, a3
+; RV32IM-NEXT:    lw a4, 108(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a4, a5, a4
+; RV32IM-NEXT:    xor a5, a7, s0
+; RV32IM-NEXT:    lw a7, 272(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a7, s2, a7
+; RV32IM-NEXT:    lw s0, 264(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor t6, t6, s0
+; RV32IM-NEXT:    lw s0, 260(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor t3, t3, s0
+; RV32IM-NEXT:    lw s0, 256(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor t4, t4, s0
+; RV32IM-NEXT:    lw s0, 352(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor t1, t1, s0
+; RV32IM-NEXT:    lw s0, 328(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor t2, t2, s0
+; RV32IM-NEXT:    lw s0, 320(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a6, a6, s0
+; RV32IM-NEXT:    lw s0, 312(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor t5, t5, s0
+; RV32IM-NEXT:    lw s0, 136(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a0, a0, s0
+; RV32IM-NEXT:    lw s0, 436(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a1, a1, s0
+; RV32IM-NEXT:    lw s0, 444(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a2, a2, s0
+; RV32IM-NEXT:    lw s0, 456(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a3, a3, s0
+; RV32IM-NEXT:    lw s0, 416(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a4, a4, s0
+; RV32IM-NEXT:    xor a5, a5, a7
+; RV32IM-NEXT:    lw a7, 284(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a7, t6, a7
+; RV32IM-NEXT:    lw t6, 276(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor t3, t3, t6
+; RV32IM-NEXT:    lw t6, 268(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor t4, t4, t6
+; RV32IM-NEXT:    lw t6, 364(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor t1, t1, t6
+; RV32IM-NEXT:    lw t6, 356(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor t2, t2, t6
+; RV32IM-NEXT:    lw t6, 324(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor t5, t5, t6
+; RV32IM-NEXT:    lw t6, 452(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a2, a2, t6
+; RV32IM-NEXT:    lw t6, 420(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a4, a4, t6
+; RV32IM-NEXT:    xor a5, a5, a7
+; RV32IM-NEXT:    lw a7, 288(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a7, t3, a7
+; RV32IM-NEXT:    lw t3, 280(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor t3, t4, t3
+; RV32IM-NEXT:    lw t4, 372(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor t1, t1, t4
+; RV32IM-NEXT:    lw t4, 360(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor t2, t2, t4
+; RV32IM-NEXT:    lw t4, 336(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor t4, t5, t4
+; RV32IM-NEXT:    xor a1, a0, a1
+; RV32IM-NEXT:    xor a1, a1, a2
+; RV32IM-NEXT:    lw a2, 428(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a2, a4, a2
+; RV32IM-NEXT:    xor a4, a5, a7
+; RV32IM-NEXT:    lw a5, 292(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a5, t3, a5
+; RV32IM-NEXT:    lw a7, 388(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a7, t1, a7
+; RV32IM-NEXT:    lw t1, 368(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor t1, t2, t1
+; RV32IM-NEXT:    lw t2, 348(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor t2, t4, t2
+; RV32IM-NEXT:    xor a1, a1, a3
+; RV32IM-NEXT:    lw a3, 448(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a2, a2, a3
+; RV32IM-NEXT:    xor a4, a4, a5
+; RV32IM-NEXT:    lw a3, 392(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a3, a7, a3
+; RV32IM-NEXT:    lw a5, 376(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a5, t1, a5
+; RV32IM-NEXT:    xor a3, a4, a3
+; RV32IM-NEXT:    lw a4, 380(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a4, a5, a4
+; RV32IM-NEXT:    xor a3, a3, t0
+; RV32IM-NEXT:    slli a0, a0, 24
+; RV32IM-NEXT:    lw a5, 384(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a4, a4, a5
+; RV32IM-NEXT:    lw a7, 396(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    and a5, a1, a7
+; RV32IM-NEXT:    slli a5, a5, 8
+; RV32IM-NEXT:    or a0, a0, a5
+; RV32IM-NEXT:    xor a2, a1, a2
+; RV32IM-NEXT:    srli a1, a1, 8
+; RV32IM-NEXT:    and a1, a1, a7
+; RV32IM-NEXT:    srli a2, a2, 24
+; RV32IM-NEXT:    or a1, a1, a2
+; RV32IM-NEXT:    or a0, a0, a1
+; RV32IM-NEXT:    xor a4, a3, a4
+; RV32IM-NEXT:    xor a1, a4, a6
+; RV32IM-NEXT:    and a2, a1, a7
+; RV32IM-NEXT:    xor a4, a1, t2
+; RV32IM-NEXT:    srli a1, a1, 8
+; RV32IM-NEXT:    and a1, a1, a7
+; RV32IM-NEXT:    srli a5, a0, 4
+; RV32IM-NEXT:    lw a6, 400(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    and a0, a0, a6
+; RV32IM-NEXT:    and a5, a5, a6
+; RV32IM-NEXT:    slli a0, a0, 4
+; RV32IM-NEXT:    or a0, a5, a0
+; RV32IM-NEXT:    slli a3, a3, 24
+; RV32IM-NEXT:    slli a2, a2, 8
+; RV32IM-NEXT:    or a2, a3, a2
+; RV32IM-NEXT:    srli a4, a4, 24
+; RV32IM-NEXT:    or a1, a1, a4
+; RV32IM-NEXT:    or a1, a2, a1
+; RV32IM-NEXT:    srli a2, a0, 2
+; RV32IM-NEXT:    lw a3, 404(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    and a0, a0, a3
+; RV32IM-NEXT:    and a2, a2, a3
+; RV32IM-NEXT:    slli a0, a0, 2
+; RV32IM-NEXT:    or a0, a2, a0
+; RV32IM-NEXT:    srli a2, a1, 4
+; RV32IM-NEXT:    and a1, a1, a6
+; RV32IM-NEXT:    and a2, a2, a6
+; RV32IM-NEXT:    slli a1, a1, 4
+; RV32IM-NEXT:    or a1, a2, a1
+; RV32IM-NEXT:    srli a2, a1, 2
+; RV32IM-NEXT:    and a1, a1, a3
+; RV32IM-NEXT:    and a2, a2, a3
+; RV32IM-NEXT:    srli a3, a0, 1
+; RV32IM-NEXT:    lw a5, 408(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    and a4, a0, a5
+; RV32IM-NEXT:    and a3, a3, a5
+; RV32IM-NEXT:    slli a1, a1, 2
+; RV32IM-NEXT:    or a1, a2, a1
+; RV32IM-NEXT:    srli a0, a1, 1
+; RV32IM-NEXT:    and a1, a1, a5
+; RV32IM-NEXT:    and a0, a0, a5
+; RV32IM-NEXT:    slli a1, a1, 1
+; RV32IM-NEXT:    or a0, a0, a1
+; RV32IM-NEXT:    slli a1, a4, 1
+; RV32IM-NEXT:    or a1, a3, a1
+; RV32IM-NEXT:    lw ra, 508(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s0, 504(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s1, 500(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s2, 496(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s3, 492(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s4, 488(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s5, 484(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s6, 480(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s7, 476(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s8, 472(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s9, 468(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s10, 464(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s11, 460(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    addi sp, sp, 512
+; RV32IM-NEXT:    ret
+;
+; RV64IM-LABEL: clmulr_i64:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    addi sp, sp, -448
+; RV64IM-NEXT:    sd ra, 440(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s0, 432(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s1, 424(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s2, 416(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s3, 408(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s4, 400(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s5, 392(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s6, 384(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s7, 376(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s8, 368(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s9, 360(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s10, 352(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s11, 344(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    srli a2, a0, 24
+; RV64IM-NEXT:    srli a6, a0, 8
+; RV64IM-NEXT:    li a3, 255
+; RV64IM-NEXT:    srli a5, a0, 40
+; RV64IM-NEXT:    lui s3, 16
+; RV64IM-NEXT:    srli s0, a0, 56
+; RV64IM-NEXT:    srliw t2, a0, 24
+; RV64IM-NEXT:    slli t0, a0, 56
+; RV64IM-NEXT:    lui t3, 61681
+; RV64IM-NEXT:    lui t4, 209715
+; RV64IM-NEXT:    lui t6, 349525
+; RV64IM-NEXT:    li a7, 1
+; RV64IM-NEXT:    lui s5, 2
+; RV64IM-NEXT:    lui t1, 4
+; RV64IM-NEXT:    lui a4, 128
+; RV64IM-NEXT:    lui s7, 256
+; RV64IM-NEXT:    lui s8, 4096
+; RV64IM-NEXT:    lui s10, 8192
+; RV64IM-NEXT:    lui a1, 4080
+; RV64IM-NEXT:    and a2, a2, a1
+; RV64IM-NEXT:    slli a3, a3, 24
+; RV64IM-NEXT:    sd a3, 336(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    addi s1, s3, -256
+; RV64IM-NEXT:    and t5, a0, a1
+; RV64IM-NEXT:    slli a1, t2, 32
+; RV64IM-NEXT:    addi s9, t3, -241
+; RV64IM-NEXT:    addi t4, t4, 819
+; RV64IM-NEXT:    addi t2, t6, 1365
+; RV64IM-NEXT:    slli t3, a7, 11
+; RV64IM-NEXT:    slli s11, a7, 32
+; RV64IM-NEXT:    slli ra, a7, 33
+; RV64IM-NEXT:    slli t6, a7, 34
+; RV64IM-NEXT:    slli s2, a7, 35
+; RV64IM-NEXT:    slli s4, a7, 36
+; RV64IM-NEXT:    sd s4, 256(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a3, a6, a3
+; RV64IM-NEXT:    or a2, a3, a2
+; RV64IM-NEXT:    slli a3, a7, 37
+; RV64IM-NEXT:    sd a3, 248(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s1, 304(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a3, a5, s1
+; RV64IM-NEXT:    or a3, a3, s0
+; RV64IM-NEXT:    slli a5, a7, 38
+; RV64IM-NEXT:    sd a5, 232(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli t5, t5, 24
+; RV64IM-NEXT:    and a0, a0, s1
+; RV64IM-NEXT:    or a1, t5, a1
+; RV64IM-NEXT:    slli a5, s9, 32
+; RV64IM-NEXT:    add a5, s9, a5
+; RV64IM-NEXT:    slli s0, t4, 32
+; RV64IM-NEXT:    add t4, t4, s0
+; RV64IM-NEXT:    slli s4, t2, 32
+; RV64IM-NEXT:    slli a0, a0, 40
+; RV64IM-NEXT:    add t2, t2, s4
+; RV64IM-NEXT:    or a2, a2, a3
+; RV64IM-NEXT:    or a0, t0, a0
+; RV64IM-NEXT:    or a0, a0, a1
+; RV64IM-NEXT:    or a0, a0, a2
+; RV64IM-NEXT:    srli a1, a0, 4
+; RV64IM-NEXT:    sd a5, 312(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a0, a0, a5
+; RV64IM-NEXT:    and a1, a1, a5
+; RV64IM-NEXT:    slli a0, a0, 4
+; RV64IM-NEXT:    or a0, a1, a0
+; RV64IM-NEXT:    srli a1, a0, 2
+; RV64IM-NEXT:    sd t4, 320(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a0, a0, t4
+; RV64IM-NEXT:    and a1, a1, t4
+; RV64IM-NEXT:    slli a0, a0, 2
+; RV64IM-NEXT:    or a0, a1, a0
+; RV64IM-NEXT:    srli a1, a0, 1
+; RV64IM-NEXT:    sd t2, 328(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a0, a0, t2
+; RV64IM-NEXT:    and a1, a1, t2
+; RV64IM-NEXT:    slli a0, a0, 1
+; RV64IM-NEXT:    or t0, a1, a0
+; RV64IM-NEXT:    andi a0, t0, 2
+; RV64IM-NEXT:    andi a1, t0, 1
+; RV64IM-NEXT:    andi a2, t0, 4
+; RV64IM-NEXT:    andi a3, t0, 8
+; RV64IM-NEXT:    andi a5, t0, 16
+; RV64IM-NEXT:    mul a0, t0, a0
+; RV64IM-NEXT:    mul a1, t0, a1
+; RV64IM-NEXT:    xor a0, a1, a0
+; RV64IM-NEXT:    sd a0, 296(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    andi a0, t0, 32
+; RV64IM-NEXT:    mul a1, t0, a2
+; RV64IM-NEXT:    mul a2, t0, a3
+; RV64IM-NEXT:    xor a1, a1, a2
+; RV64IM-NEXT:    sd a1, 288(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    andi a1, t0, 256
+; RV64IM-NEXT:    mul a2, t0, a5
+; RV64IM-NEXT:    mul a0, t0, a0
+; RV64IM-NEXT:    xor a0, a2, a0
+; RV64IM-NEXT:    sd a0, 280(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    andi a0, t0, 512
+; RV64IM-NEXT:    mul a1, t0, a1
+; RV64IM-NEXT:    mul a0, t0, a0
+; RV64IM-NEXT:    xor a0, a1, a0
+; RV64IM-NEXT:    sd a0, 272(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli t4, a7, 39
+; RV64IM-NEXT:    and a0, t0, s5
+; RV64IM-NEXT:    and a1, t0, t1
+; RV64IM-NEXT:    mul a0, t0, a0
+; RV64IM-NEXT:    mul a1, t0, a1
+; RV64IM-NEXT:    xor a0, a0, a1
+; RV64IM-NEXT:    sd a0, 264(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a0, a7, 40
+; RV64IM-NEXT:    and a1, t0, a4
+; RV64IM-NEXT:    and a2, t0, s7
+; RV64IM-NEXT:    mul a1, t0, a1
+; RV64IM-NEXT:    mul a2, t0, a2
+; RV64IM-NEXT:    xor a1, a1, a2
+; RV64IM-NEXT:    sd a1, 240(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, a7, 41
+; RV64IM-NEXT:    and a2, t0, s8
+; RV64IM-NEXT:    and a3, t0, s10
+; RV64IM-NEXT:    mul a2, t0, a2
+; RV64IM-NEXT:    mul a3, t0, a3
+; RV64IM-NEXT:    xor a2, a2, a3
+; RV64IM-NEXT:    sd a2, 224(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a2, a7, 48
+; RV64IM-NEXT:    and a3, t0, s11
+; RV64IM-NEXT:    and a4, t0, ra
+; RV64IM-NEXT:    mul a3, t0, a3
+; RV64IM-NEXT:    mul a4, t0, a4
+; RV64IM-NEXT:    xor a3, a3, a4
+; RV64IM-NEXT:    sd a3, 216(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a3, a7, 49
+; RV64IM-NEXT:    and a0, t0, a0
+; RV64IM-NEXT:    and a1, t0, a1
+; RV64IM-NEXT:    mul a0, t0, a0
+; RV64IM-NEXT:    mul a1, t0, a1
+; RV64IM-NEXT:    xor a0, a0, a1
+; RV64IM-NEXT:    sd a0, 208(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a0, a7, 56
+; RV64IM-NEXT:    and a1, t0, a2
+; RV64IM-NEXT:    and a2, t0, a3
+; RV64IM-NEXT:    mul a1, t0, a1
+; RV64IM-NEXT:    mul a2, t0, a2
+; RV64IM-NEXT:    xor a1, a1, a2
+; RV64IM-NEXT:    sd a1, 200(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, a7, 57
+; RV64IM-NEXT:    and a0, t0, a0
+; RV64IM-NEXT:    and a1, t0, a1
+; RV64IM-NEXT:    mul a0, t0, a0
+; RV64IM-NEXT:    mul a1, t0, a1
+; RV64IM-NEXT:    xor a0, a0, a1
+; RV64IM-NEXT:    sd a0, 192(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a2, a7, 42
+; RV64IM-NEXT:    slli ra, a7, 43
+; RV64IM-NEXT:    slli a3, a7, 44
+; RV64IM-NEXT:    slli a4, a7, 45
+; RV64IM-NEXT:    slli t5, a7, 46
+; RV64IM-NEXT:    slli s0, a7, 47
+; RV64IM-NEXT:    slli s1, a7, 50
+; RV64IM-NEXT:    slli a0, a7, 51
+; RV64IM-NEXT:    sd a0, 184(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a0, a7, 52
+; RV64IM-NEXT:    sd a0, 176(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a0, a7, 53
+; RV64IM-NEXT:    sd a0, 168(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a0, a7, 54
+; RV64IM-NEXT:    sd a0, 160(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a0, a7, 55
+; RV64IM-NEXT:    sd a0, 152(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a0, a7, 58
+; RV64IM-NEXT:    sd a0, 144(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a0, a7, 59
+; RV64IM-NEXT:    sd a0, 136(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a0, a7, 60
+; RV64IM-NEXT:    sd a0, 120(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a0, a7, 61
+; RV64IM-NEXT:    sd a0, 80(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a7, a7, 62
+; RV64IM-NEXT:    sd a7, 48(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a0, t0, t3
+; RV64IM-NEXT:    sd a0, 128(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui s7, 1
+; RV64IM-NEXT:    and a0, t0, s7
+; RV64IM-NEXT:    sd a0, 112(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui s8, 8
+; RV64IM-NEXT:    and a0, t0, s8
+; RV64IM-NEXT:    sd a0, 104(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a0, t0, s3
+; RV64IM-NEXT:    sd a0, 96(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui s6, 32
+; RV64IM-NEXT:    and a0, t0, s6
+; RV64IM-NEXT:    sd a0, 88(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui s10, 64
+; RV64IM-NEXT:    and a0, t0, s10
+; RV64IM-NEXT:    sd a0, 72(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui s11, 512
+; RV64IM-NEXT:    and a0, t0, s11
+; RV64IM-NEXT:    sd a0, 64(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui s4, 1024
+; RV64IM-NEXT:    and a0, t0, s4
+; RV64IM-NEXT:    sd a0, 56(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui s5, 2048
+; RV64IM-NEXT:    and a0, t0, s5
+; RV64IM-NEXT:    sd a0, 40(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui s9, 16384
+; RV64IM-NEXT:    and a0, t0, s9
+; RV64IM-NEXT:    sd a0, 32(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui a5, 32768
+; RV64IM-NEXT:    and a5, t0, a5
+; RV64IM-NEXT:    lui a6, 65536
+; RV64IM-NEXT:    and a6, t0, a6
+; RV64IM-NEXT:    lui t1, 131072
+; RV64IM-NEXT:    and t1, t0, t1
+; RV64IM-NEXT:    lui t2, 262144
+; RV64IM-NEXT:    and t2, t0, t2
+; RV64IM-NEXT:    and a0, t0, t6
+; RV64IM-NEXT:    sd a0, 24(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a0, t0, s2
+; RV64IM-NEXT:    sd a0, 16(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a0, 256(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a0, t0, a0
+; RV64IM-NEXT:    sd a0, 8(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a0, 248(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a0, t0, a0
+; RV64IM-NEXT:    sd a0, 0(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a0, 232(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a0, t0, a0
+; RV64IM-NEXT:    and a1, t0, t4
+; RV64IM-NEXT:    and a7, t0, a2
+; RV64IM-NEXT:    and ra, t0, ra
+; RV64IM-NEXT:    and t3, t0, a3
+; RV64IM-NEXT:    and t4, t0, a4
+; RV64IM-NEXT:    and t5, t0, t5
+; RV64IM-NEXT:    and t6, t0, s0
+; RV64IM-NEXT:    and s0, t0, s1
+; RV64IM-NEXT:    ld a2, 184(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s1, t0, a2
+; RV64IM-NEXT:    ld a2, 176(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s2, t0, a2
+; RV64IM-NEXT:    ld a2, 168(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s3, t0, a2
+; RV64IM-NEXT:    ld a2, 160(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s4, t0, a2
+; RV64IM-NEXT:    ld a2, 152(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s5, t0, a2
+; RV64IM-NEXT:    ld a2, 144(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s6, t0, a2
+; RV64IM-NEXT:    ld a2, 136(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s7, t0, a2
+; RV64IM-NEXT:    ld a2, 120(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s8, t0, a2
+; RV64IM-NEXT:    ld a2, 80(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s9, t0, a2
+; RV64IM-NEXT:    ld a2, 48(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s10, t0, a2
+; RV64IM-NEXT:    andi s11, t0, 64
+; RV64IM-NEXT:    mul a2, t0, s11
+; RV64IM-NEXT:    sd a2, 80(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    andi s11, t0, 128
+; RV64IM-NEXT:    mul a2, t0, s11
+; RV64IM-NEXT:    sd a2, 232(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    andi s11, t0, 1024
+; RV64IM-NEXT:    mul a2, t0, s11
+; RV64IM-NEXT:    sd a2, 48(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a2, 128(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a2, t0, a2
+; RV64IM-NEXT:    sd a2, 120(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a2, 112(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a2, t0, a2
+; RV64IM-NEXT:    sd a2, 176(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a2, 104(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul s11, t0, a2
+; RV64IM-NEXT:    ld a2, 96(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a2, t0, a2
+; RV64IM-NEXT:    sd a2, 104(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a2, 88(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a2, t0, a2
+; RV64IM-NEXT:    sd a2, 168(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a2, 72(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a2, t0, a2
+; RV64IM-NEXT:    sd a2, 256(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a2, 64(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a4, t0, a2
+; RV64IM-NEXT:    ld a2, 56(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a2, t0, a2
+; RV64IM-NEXT:    sd a2, 96(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a2, 40(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a2, t0, a2
+; RV64IM-NEXT:    sd a2, 136(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a2, 32(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a3, t0, a2
+; RV64IM-NEXT:    mul a2, t0, a5
+; RV64IM-NEXT:    sd a2, 88(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul a2, t0, a6
+; RV64IM-NEXT:    sd a2, 128(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul a2, t0, t1
+; RV64IM-NEXT:    sd a2, 160(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul a2, t0, t2
+; RV64IM-NEXT:    sd a2, 248(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    srliw t2, t0, 31
+; RV64IM-NEXT:    slli t2, t2, 31
+; RV64IM-NEXT:    ld a2, 24(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a2, t0, a2
+; RV64IM-NEXT:    ld a5, 16(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a5, t0, a5
+; RV64IM-NEXT:    ld a6, 8(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul t1, t0, a6
+; RV64IM-NEXT:    ld a6, 0(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a6, t0, a6
+; RV64IM-NEXT:    sd a6, 112(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul a0, t0, a0
+; RV64IM-NEXT:    sd a0, 152(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul a0, t0, a1
+; RV64IM-NEXT:    sd a0, 184(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul a7, t0, a7
+; RV64IM-NEXT:    mul ra, t0, ra
+; RV64IM-NEXT:    mul a6, t0, t3
+; RV64IM-NEXT:    mul t4, t0, t4
+; RV64IM-NEXT:    mul t5, t0, t5
+; RV64IM-NEXT:    mul a0, t0, t6
+; RV64IM-NEXT:    sd a0, 144(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul t6, t0, s0
+; RV64IM-NEXT:    mul s0, t0, s1
+; RV64IM-NEXT:    mul s1, t0, s2
+; RV64IM-NEXT:    mul s2, t0, s3
+; RV64IM-NEXT:    mul s3, t0, s4
+; RV64IM-NEXT:    mul s4, t0, s5
+; RV64IM-NEXT:    mul s5, t0, s6
+; RV64IM-NEXT:    mul s6, t0, s7
+; RV64IM-NEXT:    mul s7, t0, s8
+; RV64IM-NEXT:    mul s8, t0, s9
+; RV64IM-NEXT:    mul s9, t0, s10
+; RV64IM-NEXT:    srli s10, t0, 63
+; RV64IM-NEXT:    slli s10, s10, 63
+; RV64IM-NEXT:    mul t2, t0, t2
+; RV64IM-NEXT:    mul t0, t0, s10
+; RV64IM-NEXT:    ld a0, 296(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld a1, 288(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor s10, a0, a1
+; RV64IM-NEXT:    ld a0, 280(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld a1, 80(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a0, a0, a1
+; RV64IM-NEXT:    ld a1, 272(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld t3, 48(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a1, a1, t3
+; RV64IM-NEXT:    ld t3, 264(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor s11, t3, s11
+; RV64IM-NEXT:    ld t3, 240(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a4, t3, a4
+; RV64IM-NEXT:    ld t3, 224(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a3, t3, a3
+; RV64IM-NEXT:    ld t3, 216(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a2, t3, a2
+; RV64IM-NEXT:    ld t3, 208(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a7, t3, a7
+; RV64IM-NEXT:    ld t3, 200(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t6, t3, t6
+; RV64IM-NEXT:    ld t3, 192(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor s5, t3, s5
+; RV64IM-NEXT:    xor a0, s10, a0
+; RV64IM-NEXT:    ld t3, 120(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a1, a1, t3
+; RV64IM-NEXT:    ld t3, 104(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor s10, s11, t3
+; RV64IM-NEXT:    ld t3, 96(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a4, a4, t3
+; RV64IM-NEXT:    ld t3, 88(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a3, a3, t3
+; RV64IM-NEXT:    xor a2, a2, a5
+; RV64IM-NEXT:    xor a5, a7, ra
+; RV64IM-NEXT:    xor a7, t6, s0
+; RV64IM-NEXT:    xor t6, s5, s6
+; RV64IM-NEXT:    ld t3, 232(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a0, a0, t3
+; RV64IM-NEXT:    ld t3, 176(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a1, a1, t3
+; RV64IM-NEXT:    ld t3, 168(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor s0, s10, t3
+; RV64IM-NEXT:    ld t3, 136(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a4, a4, t3
+; RV64IM-NEXT:    ld t3, 128(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a3, a3, t3
+; RV64IM-NEXT:    xor a2, a2, t1
+; RV64IM-NEXT:    xor a5, a5, a6
+; RV64IM-NEXT:    xor a6, a7, s1
+; RV64IM-NEXT:    xor a7, t6, s7
+; RV64IM-NEXT:    ld t1, 256(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t1, s0, t1
+; RV64IM-NEXT:    ld t3, 160(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a3, a3, t3
+; RV64IM-NEXT:    ld t3, 112(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a2, a2, t3
+; RV64IM-NEXT:    xor a5, a5, t4
+; RV64IM-NEXT:    xor a6, a6, s2
+; RV64IM-NEXT:    xor a7, a7, s8
+; RV64IM-NEXT:    xor a1, a0, a1
+; RV64IM-NEXT:    xor a1, a1, t1
+; RV64IM-NEXT:    ld t1, 248(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a3, a3, t1
+; RV64IM-NEXT:    ld t1, 152(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a2, a2, t1
+; RV64IM-NEXT:    xor a5, a5, t5
+; RV64IM-NEXT:    xor a6, a6, s3
+; RV64IM-NEXT:    xor a7, a7, s9
+; RV64IM-NEXT:    xor a1, a1, a4
+; RV64IM-NEXT:    xor a3, a3, t2
+; RV64IM-NEXT:    ld a4, 184(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a2, a2, a4
+; RV64IM-NEXT:    ld a4, 144(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a4, a5, a4
+; RV64IM-NEXT:    xor a5, a6, s4
+; RV64IM-NEXT:    slli a0, a0, 56
+; RV64IM-NEXT:    xor a6, a7, t0
+; RV64IM-NEXT:    ld t0, 304(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a7, a1, t0
+; RV64IM-NEXT:    xor a1, a1, a3
+; RV64IM-NEXT:    slli a7, a7, 40
+; RV64IM-NEXT:    xor a1, a1, a2
+; RV64IM-NEXT:    or a0, a0, a7
+; RV64IM-NEXT:    lui a7, 4080
+; RV64IM-NEXT:    and a2, a1, a7
+; RV64IM-NEXT:    xor a4, a1, a4
+; RV64IM-NEXT:    srli a1, a1, 8
+; RV64IM-NEXT:    slli a2, a2, 24
+; RV64IM-NEXT:    xor a5, a4, a5
+; RV64IM-NEXT:    ld a3, 336(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a1, a1, a3
+; RV64IM-NEXT:    srli a4, a4, 24
+; RV64IM-NEXT:    srliw a3, a5, 24
+; RV64IM-NEXT:    and a4, a4, a7
+; RV64IM-NEXT:    srli a7, a5, 40
+; RV64IM-NEXT:    xor a5, a5, a6
+; RV64IM-NEXT:    slli a3, a3, 32
+; RV64IM-NEXT:    or a1, a1, a4
+; RV64IM-NEXT:    and a4, a7, t0
+; RV64IM-NEXT:    srli a5, a5, 56
+; RV64IM-NEXT:    or a2, a2, a3
+; RV64IM-NEXT:    or a4, a4, a5
+; RV64IM-NEXT:    or a0, a0, a2
+; RV64IM-NEXT:    or a1, a1, a4
+; RV64IM-NEXT:    or a0, a0, a1
+; RV64IM-NEXT:    srli a1, a0, 4
+; RV64IM-NEXT:    ld a2, 312(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a0, a0, a2
+; RV64IM-NEXT:    and a1, a1, a2
+; RV64IM-NEXT:    slli a0, a0, 4
+; RV64IM-NEXT:    or a0, a1, a0
+; RV64IM-NEXT:    srli a1, a0, 2
+; RV64IM-NEXT:    ld a2, 320(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a0, a0, a2
+; RV64IM-NEXT:    and a1, a1, a2
+; RV64IM-NEXT:    slli a0, a0, 2
+; RV64IM-NEXT:    or a0, a1, a0
+; RV64IM-NEXT:    srli a1, a0, 1
+; RV64IM-NEXT:    ld a2, 328(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a0, a0, a2
+; RV64IM-NEXT:    and a1, a1, a2
+; RV64IM-NEXT:    slli a0, a0, 1
+; RV64IM-NEXT:    or a0, a1, a0
+; RV64IM-NEXT:    ld ra, 440(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s0, 432(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s1, 424(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s2, 416(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s3, 408(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s4, 400(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s5, 392(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s6, 384(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s7, 376(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s8, 368(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s9, 360(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s10, 352(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s11, 344(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    addi sp, sp, 448
+; RV64IM-NEXT:    ret
+  %res = call i64 @llvm.clmulr.i64(i64 %a, i64 %b)
+  ret i64 %res
+}
+
+define i4 @clmulr_constfold_i4() nounwind {
+; CHECK-LABEL: clmulr_constfold_i4:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a0, 0
+; CHECK-NEXT:    ret
+  %res = call i4 @llvm.clmulr.i4(i4 1, i4 2)
+  ret i4 %res
+}
+
+define i16 @clmulr_constfold_i16() nounwind {
+; RV32IM-LABEL: clmulr_constfold_i16:
+; RV32IM:       # %bb.0:
+; RV32IM-NEXT:    lui a0, 699051
+; RV32IM-NEXT:    addi a0, a0, -1366
+; RV32IM-NEXT:    ret
+;
+; RV64IM-LABEL: clmulr_constfold_i16:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    lui a0, %hi(.LCPI13_0)
+; RV64IM-NEXT:    ld a0, %lo(.LCPI13_0)(a0)
+; RV64IM-NEXT:    ret
+  %res = call i16 @llvm.clmulr.i16(i16 -2, i16 -1)
+  ret i16 %res
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/clmul-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/clmul-sdnode.ll
new file mode 100644
index 0000000000000..dd04be1212587
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/clmul-sdnode.ll
@@ -0,0 +1,24188 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -verify-machineinstrs -mattr=+v < %s | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -verify-machineinstrs -mattr=+v < %s | FileCheck %s --check-prefixes=CHECK,RV64
+
+define <vscale x 1 x i32> @clmul_nxv1i32(<vscale x 1 x i32> %x, <vscale x 1 x i32> %y) nounwind {
+; CHECK-LABEL: clmul_nxv1i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
+; CHECK-NEXT:    vand.vi v10, v9, 2
+; CHECK-NEXT:    vand.vi v11, v9, 1
+; CHECK-NEXT:    vmul.vv v10, v8, v10
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v11, v10
+; CHECK-NEXT:    vand.vi v11, v9, 4
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vi v11, v9, 8
+; CHECK-NEXT:    li a0, 16
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    li a0, 32
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    li a0, 64
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    li a0, 128
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    li a0, 256
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    li a0, 512
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    li a0, 1024
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    li a0, 1
+; CHECK-NEXT:    slli a0, a0, 11
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 2
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 4
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 8
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 16
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 32
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 64
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 128
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 256
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 512
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 1024
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 2048
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 4096
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 8192
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 16384
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 32768
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 65536
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 131072
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 262144
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 524288
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vmul.vv v8, v8, v9
+; CHECK-NEXT:    vxor.vv v8, v10, v8
+; CHECK-NEXT:    ret
+  %a = call <vscale x 1 x i32> @llvm.clmul.nxv1i32(<vscale x 1 x i32> %x, <vscale x 1 x i32> %y)
+  ret <vscale x 1 x i32> %a
+}
+
+define <vscale x 2 x i32> @clmul_nxv2i32(<vscale x 2 x i32> %x, <vscale x 2 x i32> %y) nounwind {
+; CHECK-LABEL: clmul_nxv2i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
+; CHECK-NEXT:    vand.vi v10, v9, 2
+; CHECK-NEXT:    vand.vi v11, v9, 1
+; CHECK-NEXT:    vmul.vv v10, v8, v10
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v11, v10
+; CHECK-NEXT:    vand.vi v11, v9, 4
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vi v11, v9, 8
+; CHECK-NEXT:    li a0, 16
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    li a0, 32
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    li a0, 64
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    li a0, 128
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    li a0, 256
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    li a0, 512
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    li a0, 1024
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    li a0, 1
+; CHECK-NEXT:    slli a0, a0, 11
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 2
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 4
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 8
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 16
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 32
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 64
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 128
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 256
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 512
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 1024
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 2048
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 4096
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 8192
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 16384
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 32768
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 65536
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 131072
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 262144
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 524288
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vmul.vv v8, v8, v9
+; CHECK-NEXT:    vxor.vv v8, v10, v8
+; CHECK-NEXT:    ret
+  %a = call <vscale x 2 x i32> @llvm.clmul.nxv2i32(<vscale x 2 x i32> %x, <vscale x 2 x i32> %y)
+  ret <vscale x 2 x i32> %a
+}
+
+define <vscale x 4 x i32> @clmul_nxv4i32(<vscale x 4 x i32> %x, <vscale x 4 x i32> %y) nounwind {
+; CHECK-LABEL: clmul_nxv4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vand.vi v12, v10, 2
+; CHECK-NEXT:    vand.vi v14, v10, 1
+; CHECK-NEXT:    vmul.vv v12, v8, v12
+; CHECK-NEXT:    vmul.vv v14, v8, v14
+; CHECK-NEXT:    vxor.vv v12, v14, v12
+; CHECK-NEXT:    vand.vi v14, v10, 4
+; CHECK-NEXT:    vmul.vv v14, v8, v14
+; CHECK-NEXT:    vxor.vv v12, v12, v14
+; CHECK-NEXT:    vand.vi v14, v10, 8
+; CHECK-NEXT:    li a0, 16
+; CHECK-NEXT:    vmul.vv v14, v8, v14
+; CHECK-NEXT:    vxor.vv v12, v12, v14
+; CHECK-NEXT:    vand.vx v14, v10, a0
+; CHECK-NEXT:    li a0, 32
+; CHECK-NEXT:    vmul.vv v14, v8, v14
+; CHECK-NEXT:    vxor.vv v12, v12, v14
+; CHECK-NEXT:    vand.vx v14, v10, a0
+; CHECK-NEXT:    li a0, 64
+; CHECK-NEXT:    vmul.vv v14, v8, v14
+; CHECK-NEXT:    vxor.vv v12, v12, v14
+; CHECK-NEXT:    vand.vx v14, v10, a0
+; CHECK-NEXT:    li a0, 128
+; CHECK-NEXT:    vmul.vv v14, v8, v14
+; CHECK-NEXT:    vxor.vv v12, v12, v14
+; CHECK-NEXT:    vand.vx v14, v10, a0
+; CHECK-NEXT:    li a0, 256
+; CHECK-NEXT:    vmul.vv v14, v8, v14
+; CHECK-NEXT:    vxor.vv v12, v12, v14
+; CHECK-NEXT:    vand.vx v14, v10, a0
+; CHECK-NEXT:    li a0, 512
+; CHECK-NEXT:    vmul.vv v14, v8, v14
+; CHECK-NEXT:    vxor.vv v12, v12, v14
+; CHECK-NEXT:    vand.vx v14, v10, a0
+; CHECK-NEXT:    li a0, 1024
+; CHECK-NEXT:    vmul.vv v14, v8, v14
+; CHECK-NEXT:    vxor.vv v12, v12, v14
+; CHECK-NEXT:    vand.vx v14, v10, a0
+; CHECK-NEXT:    li a0, 1
+; CHECK-NEXT:    slli a0, a0, 11
+; CHECK-NEXT:    vmul.vv v14, v8, v14
+; CHECK-NEXT:    vxor.vv v12, v12, v14
+; CHECK-NEXT:    vand.vx v14, v10, a0
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    vmul.vv v14, v8, v14
+; CHECK-NEXT:    vxor.vv v12, v12, v14
+; CHECK-NEXT:    vand.vx v14, v10, a0
+; CHECK-NEXT:    lui a0, 2
+; CHECK-NEXT:    vmul.vv v14, v8, v14
+; CHECK-NEXT:    vxor.vv v12, v12, v14
+; CHECK-NEXT:    vand.vx v14, v10, a0
+; CHECK-NEXT:    lui a0, 4
+; CHECK-NEXT:    vmul.vv v14, v8, v14
+; CHECK-NEXT:    vxor.vv v12, v12, v14
+; CHECK-NEXT:    vand.vx v14, v10, a0
+; CHECK-NEXT:    lui a0, 8
+; CHECK-NEXT:    vmul.vv v14, v8, v14
+; CHECK-NEXT:    vxor.vv v12, v12, v14
+; CHECK-NEXT:    vand.vx v14, v10, a0
+; CHECK-NEXT:    lui a0, 16
+; CHECK-NEXT:    vmul.vv v14, v8, v14
+; CHECK-NEXT:    vxor.vv v12, v12, v14
+; CHECK-NEXT:    vand.vx v14, v10, a0
+; CHECK-NEXT:    lui a0, 32
+; CHECK-NEXT:    vmul.vv v14, v8, v14
+; CHECK-NEXT:    vxor.vv v12, v12, v14
+; CHECK-NEXT:    vand.vx v14, v10, a0
+; CHECK-NEXT:    lui a0, 64
+; CHECK-NEXT:    vmul.vv v14, v8, v14
+; CHECK-NEXT:    vxor.vv v12, v12, v14
+; CHECK-NEXT:    vand.vx v14, v10, a0
+; CHECK-NEXT:    lui a0, 128
+; CHECK-NEXT:    vmul.vv v14, v8, v14
+; CHECK-NEXT:    vxor.vv v12, v12, v14
+; CHECK-NEXT:    vand.vx v14, v10, a0
+; CHECK-NEXT:    lui a0, 256
+; CHECK-NEXT:    vmul.vv v14, v8, v14
+; CHECK-NEXT:    vxor.vv v12, v12, v14
+; CHECK-NEXT:    vand.vx v14, v10, a0
+; CHECK-NEXT:    lui a0, 512
+; CHECK-NEXT:    vmul.vv v14, v8, v14
+; CHECK-NEXT:    vxor.vv v12, v12, v14
+; CHECK-NEXT:    vand.vx v14, v10, a0
+; CHECK-NEXT:    lui a0, 1024
+; CHECK-NEXT:    vmul.vv v14, v8, v14
+; CHECK-NEXT:    vxor.vv v12, v12, v14
+; CHECK-NEXT:    vand.vx v14, v10, a0
+; CHECK-NEXT:    lui a0, 2048
+; CHECK-NEXT:    vmul.vv v14, v8, v14
+; CHECK-NEXT:    vxor.vv v12, v12, v14
+; CHECK-NEXT:    vand.vx v14, v10, a0
+; CHECK-NEXT:    lui a0, 4096
+; CHECK-NEXT:    vmul.vv v14, v8, v14
+; CHECK-NEXT:    vxor.vv v12, v12, v14
+; CHECK-NEXT:    vand.vx v14, v10, a0
+; CHECK-NEXT:    lui a0, 8192
+; CHECK-NEXT:    vmul.vv v14, v8, v14
+; CHECK-NEXT:    vxor.vv v12, v12, v14
+; CHECK-NEXT:    vand.vx v14, v10, a0
+; CHECK-NEXT:    lui a0, 16384
+; CHECK-NEXT:    vmul.vv v14, v8, v14
+; CHECK-NEXT:    vxor.vv v12, v12, v14
+; CHECK-NEXT:    vand.vx v14, v10, a0
+; CHECK-NEXT:    lui a0, 32768
+; CHECK-NEXT:    vmul.vv v14, v8, v14
+; CHECK-NEXT:    vxor.vv v12, v12, v14
+; CHECK-NEXT:    vand.vx v14, v10, a0
+; CHECK-NEXT:    lui a0, 65536
+; CHECK-NEXT:    vmul.vv v14, v8, v14
+; CHECK-NEXT:    vxor.vv v12, v12, v14
+; CHECK-NEXT:    vand.vx v14, v10, a0
+; CHECK-NEXT:    lui a0, 131072
+; CHECK-NEXT:    vmul.vv v14, v8, v14
+; CHECK-NEXT:    vxor.vv v12, v12, v14
+; CHECK-NEXT:    vand.vx v14, v10, a0
+; CHECK-NEXT:    lui a0, 262144
+; CHECK-NEXT:    vmul.vv v14, v8, v14
+; CHECK-NEXT:    vxor.vv v12, v12, v14
+; CHECK-NEXT:    vand.vx v14, v10, a0
+; CHECK-NEXT:    lui a0, 524288
+; CHECK-NEXT:    vand.vx v10, v10, a0
+; CHECK-NEXT:    vmul.vv v14, v8, v14
+; CHECK-NEXT:    vxor.vv v12, v12, v14
+; CHECK-NEXT:    vmul.vv v8, v8, v10
+; CHECK-NEXT:    vxor.vv v8, v12, v8
+; CHECK-NEXT:    ret
+  %a = call <vscale x 4 x i32> @llvm.clmul.nxv4i32(<vscale x 4 x i32> %x, <vscale x 4 x i32> %y)
+  ret <vscale x 4 x i32> %a
+}
+
+define <vscale x 8 x i32> @clmul_nxv8i32(<vscale x 8 x i32> %x, <vscale x 8 x i32> %y) nounwind {
+; CHECK-LABEL: clmul_nxv8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
+; CHECK-NEXT:    vand.vi v12, v8, 2
+; CHECK-NEXT:    vand.vi v16, v8, 1
+; CHECK-NEXT:    vmul.vv v12, v8, v12
+; CHECK-NEXT:    vmul.vv v16, v8, v16
+; CHECK-NEXT:    vxor.vv v12, v16, v12
+; CHECK-NEXT:    vand.vi v16, v8, 4
+; CHECK-NEXT:    vmul.vv v16, v8, v16
+; CHECK-NEXT:    vxor.vv v12, v12, v16
+; CHECK-NEXT:    vand.vi v16, v8, 8
+; CHECK-NEXT:    li a0, 16
+; CHECK-NEXT:    vmul.vv v16, v8, v16
+; CHECK-NEXT:    vxor.vv v12, v12, v16
+; CHECK-NEXT:    vand.vx v16, v8, a0
+; CHECK-NEXT:    li a0, 32
+; CHECK-NEXT:    vmul.vv v16, v8, v16
+; CHECK-NEXT:    vxor.vv v12, v12, v16
+; CHECK-NEXT:    vand.vx v16, v8, a0
+; CHECK-NEXT:    li a0, 64
+; CHECK-NEXT:    vmul.vv v16, v8, v16
+; CHECK-NEXT:    vxor.vv v12, v12, v16
+; CHECK-NEXT:    vand.vx v16, v8, a0
+; CHECK-NEXT:    li a0, 128
+; CHECK-NEXT:    vmul.vv v16, v8, v16
+; CHECK-NEXT:    vxor.vv v12, v12, v16
+; CHECK-NEXT:    vand.vx v16, v8, a0
+; CHECK-NEXT:    li a0, 256
+; CHECK-NEXT:    vmul.vv v16, v8, v16
+; CHECK-NEXT:    vxor.vv v12, v12, v16
+; CHECK-NEXT:    vand.vx v16, v8, a0
+; CHECK-NEXT:    li a0, 512
+; CHECK-NEXT:    vmul.vv v16, v8, v16
+; CHECK-NEXT:    vxor.vv v12, v12, v16
+; CHECK-NEXT:    vand.vx v16, v8, a0
+; CHECK-NEXT:    li a0, 1024
+; CHECK-NEXT:    vmul.vv v16, v8, v16
+; CHECK-NEXT:    vxor.vv v12, v12, v16
+; CHECK-NEXT:    vand.vx v16, v8, a0
+; CHECK-NEXT:    li a0, 1
+; CHECK-NEXT:    slli a0, a0, 11
+; CHECK-NEXT:    vmul.vv v16, v8, v16
+; CHECK-NEXT:    vxor.vv v12, v12, v16
+; CHECK-NEXT:    vand.vx v16, v8, a0
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    vmul.vv v16, v8, v16
+; CHECK-NEXT:    vxor.vv v12, v12, v16
+; CHECK-NEXT:    vand.vx v16, v8, a0
+; CHECK-NEXT:    lui a0, 2
+; CHECK-NEXT:    vmul.vv v16, v8, v16
+; CHECK-NEXT:    vxor.vv v12, v12, v16
+; CHECK-NEXT:    vand.vx v16, v8, a0
+; CHECK-NEXT:    lui a0, 4
+; CHECK-NEXT:    vmul.vv v16, v8, v16
+; CHECK-NEXT:    vxor.vv v12, v12, v16
+; CHECK-NEXT:    vand.vx v16, v8, a0
+; CHECK-NEXT:    lui a0, 8
+; CHECK-NEXT:    vmul.vv v16, v8, v16
+; CHECK-NEXT:    vxor.vv v12, v12, v16
+; CHECK-NEXT:    vand.vx v16, v8, a0
+; CHECK-NEXT:    lui a0, 16
+; CHECK-NEXT:    vmul.vv v16, v8, v16
+; CHECK-NEXT:    vxor.vv v12, v12, v16
+; CHECK-NEXT:    vand.vx v16, v8, a0
+; CHECK-NEXT:    lui a0, 32
+; CHECK-NEXT:    vmul.vv v16, v8, v16
+; CHECK-NEXT:    vxor.vv v12, v12, v16
+; CHECK-NEXT:    vand.vx v16, v8, a0
+; CHECK-NEXT:    lui a0, 64
+; CHECK-NEXT:    vmul.vv v16, v8, v16
+; CHECK-NEXT:    vxor.vv v12, v12, v16
+; CHECK-NEXT:    vand.vx v16, v8, a0
+; CHECK-NEXT:    lui a0, 128
+; CHECK-NEXT:    vmul.vv v16, v8, v16
+; CHECK-NEXT:    vxor.vv v12, v12, v16
+; CHECK-NEXT:    vand.vx v16, v8, a0
+; CHECK-NEXT:    lui a0, 256
+; CHECK-NEXT:    vmul.vv v16, v8, v16
+; CHECK-NEXT:    vxor.vv v12, v12, v16
+; CHECK-NEXT:    vand.vx v16, v8, a0
+; CHECK-NEXT:    lui a0, 512
+; CHECK-NEXT:    vmul.vv v16, v8, v16
+; CHECK-NEXT:    vxor.vv v12, v12, v16
+; CHECK-NEXT:    vand.vx v16, v8, a0
+; CHECK-NEXT:    lui a0, 1024
+; CHECK-NEXT:    vmul.vv v16, v8, v16
+; CHECK-NEXT:    vxor.vv v12, v12, v16
+; CHECK-NEXT:    vand.vx v16, v8, a0
+; CHECK-NEXT:    lui a0, 2048
+; CHECK-NEXT:    vmul.vv v16, v8, v16
+; CHECK-NEXT:    vxor.vv v12, v12, v16
+; CHECK-NEXT:    vand.vx v16, v8, a0
+; CHECK-NEXT:    lui a0, 4096
+; CHECK-NEXT:    vmul.vv v16, v8, v16
+; CHECK-NEXT:    vxor.vv v12, v12, v16
+; CHECK-NEXT:    vand.vx v16, v8, a0
+; CHECK-NEXT:    lui a0, 8192
+; CHECK-NEXT:    vmul.vv v16, v8, v16
+; CHECK-NEXT:    vxor.vv v12, v12, v16
+; CHECK-NEXT:    vand.vx v16, v8, a0
+; CHECK-NEXT:    lui a0, 16384
+; CHECK-NEXT:    vmul.vv v16, v8, v16
+; CHECK-NEXT:    vxor.vv v12, v12, v16
+; CHECK-NEXT:    vand.vx v16, v8, a0
+; CHECK-NEXT:    lui a0, 32768
+; CHECK-NEXT:    vmul.vv v16, v8, v16
+; CHECK-NEXT:    vxor.vv v12, v12, v16
+; CHECK-NEXT:    vand.vx v16, v8, a0
+; CHECK-NEXT:    lui a0, 65536
+; CHECK-NEXT:    vmul.vv v16, v8, v16
+; CHECK-NEXT:    vxor.vv v12, v12, v16
+; CHECK-NEXT:    vand.vx v16, v8, a0
+; CHECK-NEXT:    lui a0, 131072
+; CHECK-NEXT:    vmul.vv v16, v8, v16
+; CHECK-NEXT:    vxor.vv v12, v12, v16
+; CHECK-NEXT:    vand.vx v16, v8, a0
+; CHECK-NEXT:    lui a0, 262144
+; CHECK-NEXT:    vmul.vv v16, v8, v16
+; CHECK-NEXT:    vxor.vv v12, v12, v16
+; CHECK-NEXT:    vand.vx v16, v8, a0
+; CHECK-NEXT:    lui a0, 524288
+; CHECK-NEXT:    vmul.vv v16, v8, v16
+; CHECK-NEXT:    vxor.vv v12, v12, v16
+; CHECK-NEXT:    vand.vx v16, v8, a0
+; CHECK-NEXT:    vmul.vv v8, v8, v16
+; CHECK-NEXT:    vxor.vv v8, v12, v8
+; CHECK-NEXT:    ret
+  %a = call <vscale x 8 x i32> @llvm.clmul.nxv8i32(<vscale x 8 x i32> %x, <vscale x 8 x i32> %x)
+  ret <vscale x 8 x i32> %a
+}
+
+define <vscale x 16 x i32> @clmul_nxv16i32(<vscale x 16 x i32> %x, <vscale x 16 x i32> %y) nounwind {
+; CHECK-LABEL: clmul_nxv16i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
+; CHECK-NEXT:    vand.vi v24, v16, 2
+; CHECK-NEXT:    vand.vi v0, v16, 1
+; CHECK-NEXT:    vmul.vv v24, v8, v24
+; CHECK-NEXT:    vmul.vv v0, v8, v0
+; CHECK-NEXT:    vxor.vv v24, v0, v24
+; CHECK-NEXT:    vand.vi v0, v16, 4
+; CHECK-NEXT:    vmul.vv v0, v8, v0
+; CHECK-NEXT:    vxor.vv v24, v24, v0
+; CHECK-NEXT:    vand.vi v0, v16, 8
+; CHECK-NEXT:    li a0, 16
+; CHECK-NEXT:    vmul.vv v0, v8, v0
+; CHECK-NEXT:    vxor.vv v24, v24, v0
+; CHECK-NEXT:    vand.vx v0, v16, a0
+; CHECK-NEXT:    li a0, 32
+; CHECK-NEXT:    vmul.vv v0, v8, v0
+; CHECK-NEXT:    vxor.vv v24, v24, v0
+; CHECK-NEXT:    vand.vx v0, v16, a0
+; CHECK-NEXT:    li a0, 64
+; CHECK-NEXT:    vmul.vv v0, v8, v0
+; CHECK-NEXT:    vxor.vv v24, v24, v0
+; CHECK-NEXT:    vand.vx v0, v16, a0
+; CHECK-NEXT:    li a0, 128
+; CHECK-NEXT:    vmul.vv v0, v8, v0
+; CHECK-NEXT:    vxor.vv v24, v24, v0
+; CHECK-NEXT:    vand.vx v0, v16, a0
+; CHECK-NEXT:    li a0, 256
+; CHECK-NEXT:    vmul.vv v0, v8, v0
+; CHECK-NEXT:    vxor.vv v24, v24, v0
+; CHECK-NEXT:    vand.vx v0, v16, a0
+; CHECK-NEXT:    li a0, 512
+; CHECK-NEXT:    vmul.vv v0, v8, v0
+; CHECK-NEXT:    vxor.vv v24, v24, v0
+; CHECK-NEXT:    vand.vx v0, v16, a0
+; CHECK-NEXT:    li a0, 1024
+; CHECK-NEXT:    vmul.vv v0, v8, v0
+; CHECK-NEXT:    vxor.vv v24, v24, v0
+; CHECK-NEXT:    vand.vx v0, v16, a0
+; CHECK-NEXT:    li a0, 1
+; CHECK-NEXT:    slli a0, a0, 11
+; CHECK-NEXT:    vmul.vv v0, v8, v0
+; CHECK-NEXT:    vxor.vv v24, v24, v0
+; CHECK-NEXT:    vand.vx v0, v16, a0
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    vmul.vv v0, v8, v0
+; CHECK-NEXT:    vxor.vv v24, v24, v0
+; CHECK-NEXT:    vand.vx v0, v16, a0
+; CHECK-NEXT:    lui a0, 2
+; CHECK-NEXT:    vmul.vv v0, v8, v0
+; CHECK-NEXT:    vxor.vv v24, v24, v0
+; CHECK-NEXT:    vand.vx v0, v16, a0
+; CHECK-NEXT:    lui a0, 4
+; CHECK-NEXT:    vmul.vv v0, v8, v0
+; CHECK-NEXT:    vxor.vv v24, v24, v0
+; CHECK-NEXT:    vand.vx v0, v16, a0
+; CHECK-NEXT:    lui a0, 8
+; CHECK-NEXT:    vmul.vv v0, v8, v0
+; CHECK-NEXT:    vxor.vv v24, v24, v0
+; CHECK-NEXT:    vand.vx v0, v16, a0
+; CHECK-NEXT:    lui a0, 16
+; CHECK-NEXT:    vmul.vv v0, v8, v0
+; CHECK-NEXT:    vxor.vv v24, v24, v0
+; CHECK-NEXT:    vand.vx v0, v16, a0
+; CHECK-NEXT:    lui a0, 32
+; CHECK-NEXT:    vmul.vv v0, v8, v0
+; CHECK-NEXT:    vxor.vv v24, v24, v0
+; CHECK-NEXT:    vand.vx v0, v16, a0
+; CHECK-NEXT:    lui a0, 64
+; CHECK-NEXT:    vmul.vv v0, v8, v0
+; CHECK-NEXT:    vxor.vv v24, v24, v0
+; CHECK-NEXT:    vand.vx v0, v16, a0
+; CHECK-NEXT:    lui a0, 128
+; CHECK-NEXT:    vmul.vv v0, v8, v0
+; CHECK-NEXT:    vxor.vv v24, v24, v0
+; CHECK-NEXT:    vand.vx v0, v16, a0
+; CHECK-NEXT:    lui a0, 256
+; CHECK-NEXT:    vmul.vv v0, v8, v0
+; CHECK-NEXT:    vxor.vv v24, v24, v0
+; CHECK-NEXT:    vand.vx v0, v16, a0
+; CHECK-NEXT:    lui a0, 512
+; CHECK-NEXT:    vmul.vv v0, v8, v0
+; CHECK-NEXT:    vxor.vv v24, v24, v0
+; CHECK-NEXT:    vand.vx v0, v16, a0
+; CHECK-NEXT:    lui a0, 1024
+; CHECK-NEXT:    vmul.vv v0, v8, v0
+; CHECK-NEXT:    vxor.vv v24, v24, v0
+; CHECK-NEXT:    vand.vx v0, v16, a0
+; CHECK-NEXT:    lui a0, 2048
+; CHECK-NEXT:    vmul.vv v0, v8, v0
+; CHECK-NEXT:    vxor.vv v24, v24, v0
+; CHECK-NEXT:    vand.vx v0, v16, a0
+; CHECK-NEXT:    lui a0, 4096
+; CHECK-NEXT:    vmul.vv v0, v8, v0
+; CHECK-NEXT:    vxor.vv v24, v24, v0
+; CHECK-NEXT:    vand.vx v0, v16, a0
+; CHECK-NEXT:    lui a0, 8192
+; CHECK-NEXT:    vmul.vv v0, v8, v0
+; CHECK-NEXT:    vxor.vv v24, v24, v0
+; CHECK-NEXT:    vand.vx v0, v16, a0
+; CHECK-NEXT:    lui a0, 16384
+; CHECK-NEXT:    vmul.vv v0, v8, v0
+; CHECK-NEXT:    vxor.vv v24, v24, v0
+; CHECK-NEXT:    vand.vx v0, v16, a0
+; CHECK-NEXT:    lui a0, 32768
+; CHECK-NEXT:    vmul.vv v0, v8, v0
+; CHECK-NEXT:    vxor.vv v24, v24, v0
+; CHECK-NEXT:    vand.vx v0, v16, a0
+; CHECK-NEXT:    lui a0, 65536
+; CHECK-NEXT:    vmul.vv v0, v8, v0
+; CHECK-NEXT:    vxor.vv v24, v24, v0
+; CHECK-NEXT:    vand.vx v0, v16, a0
+; CHECK-NEXT:    lui a0, 131072
+; CHECK-NEXT:    vmul.vv v0, v8, v0
+; CHECK-NEXT:    vxor.vv v24, v24, v0
+; CHECK-NEXT:    vand.vx v0, v16, a0
+; CHECK-NEXT:    lui a0, 262144
+; CHECK-NEXT:    vmul.vv v0, v8, v0
+; CHECK-NEXT:    vxor.vv v24, v24, v0
+; CHECK-NEXT:    vand.vx v0, v16, a0
+; CHECK-NEXT:    lui a0, 524288
+; CHECK-NEXT:    vand.vx v16, v16, a0
+; CHECK-NEXT:    vmul.vv v0, v8, v0
+; CHECK-NEXT:    vxor.vv v24, v24, v0
+; CHECK-NEXT:    vmul.vv v8, v8, v16
+; CHECK-NEXT:    vxor.vv v8, v24, v8
+; CHECK-NEXT:    ret
+  %a = call <vscale x 16 x i32> @llvm.clmul.nxv16i32(<vscale x 16 x i32> %x, <vscale x 16 x i32> %y)
+  ret <vscale x 16 x i32> %a
+}
+
+define <vscale x 1 x i64> @clmul_nxv1i64(<vscale x 1 x i64> %x, <vscale x 1 x i64> %y) nounwind {
+; RV32-LABEL: clmul_nxv1i64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -352
+; RV32-NEXT:    sw ra, 348(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s0, 344(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s1, 340(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s2, 336(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s3, 332(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s4, 328(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s5, 324(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s6, 320(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s7, 316(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s8, 312(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s9, 308(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s10, 304(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s11, 300(sp) # 4-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a1, a0, 3
+; RV32-NEXT:    sub a0, a1, a0
+; RV32-NEXT:    sub sp, sp, a0
+; RV32-NEXT:    lui a1, 524288
+; RV32-NEXT:    li t5, 1
+; RV32-NEXT:    li a4, 2
+; RV32-NEXT:    li a2, 4
+; RV32-NEXT:    li s11, 8
+; RV32-NEXT:    li a0, 16
+; RV32-NEXT:    li ra, 32
+; RV32-NEXT:    li s10, 64
+; RV32-NEXT:    li s9, 128
+; RV32-NEXT:    li s8, 256
+; RV32-NEXT:    li s7, 512
+; RV32-NEXT:    li s1, 1024
+; RV32-NEXT:    lui s6, 1
+; RV32-NEXT:    lui s5, 2
+; RV32-NEXT:    lui s4, 4
+; RV32-NEXT:    lui s3, 8
+; RV32-NEXT:    lui s2, 16
+; RV32-NEXT:    lui s0, 32
+; RV32-NEXT:    lui t6, 64
+; RV32-NEXT:    lui t4, 128
+; RV32-NEXT:    lui t3, 256
+; RV32-NEXT:    lui t2, 512
+; RV32-NEXT:    lui t1, 1024
+; RV32-NEXT:    lui t0, 2048
+; RV32-NEXT:    lui a7, 4096
+; RV32-NEXT:    lui a6, 8192
+; RV32-NEXT:    lui a5, 16384
+; RV32-NEXT:    lui a3, 32768
+; RV32-NEXT:    sw a1, 272(sp)
+; RV32-NEXT:    sw zero, 276(sp)
+; RV32-NEXT:    sw zero, 264(sp)
+; RV32-NEXT:    sw t5, 268(sp)
+; RV32-NEXT:    sw zero, 256(sp)
+; RV32-NEXT:    sw a4, 260(sp)
+; RV32-NEXT:    lui a4, 65536
+; RV32-NEXT:    sw zero, 248(sp)
+; RV32-NEXT:    sw a2, 252(sp)
+; RV32-NEXT:    lui a2, 131072
+; RV32-NEXT:    sw zero, 240(sp)
+; RV32-NEXT:    sw s11, 244(sp)
+; RV32-NEXT:    vsetvli s11, zero, e64, m1, ta, ma
+; RV32-NEXT:    vand.vi v13, v9, 2
+; RV32-NEXT:    vand.vi v14, v9, 1
+; RV32-NEXT:    vand.vi v12, v9, 4
+; RV32-NEXT:    vand.vi v11, v9, 8
+; RV32-NEXT:    sw zero, 232(sp)
+; RV32-NEXT:    sw a0, 236(sp)
+; RV32-NEXT:    vand.vx v10, v9, a0
+; RV32-NEXT:    addi s11, sp, 272
+; RV32-NEXT:    sw zero, 224(sp)
+; RV32-NEXT:    sw ra, 228(sp)
+; RV32-NEXT:    vand.vx v15, v9, ra
+; RV32-NEXT:    addi ra, sp, 264
+; RV32-NEXT:    sw zero, 216(sp)
+; RV32-NEXT:    sw s10, 220(sp)
+; RV32-NEXT:    vand.vx v16, v9, s10
+; RV32-NEXT:    addi s10, sp, 256
+; RV32-NEXT:    sw zero, 208(sp)
+; RV32-NEXT:    sw s9, 212(sp)
+; RV32-NEXT:    vand.vx v17, v9, s9
+; RV32-NEXT:    addi s9, sp, 248
+; RV32-NEXT:    sw zero, 200(sp)
+; RV32-NEXT:    sw s8, 204(sp)
+; RV32-NEXT:    vand.vx v18, v9, s8
+; RV32-NEXT:    addi s8, sp, 240
+; RV32-NEXT:    sw zero, 192(sp)
+; RV32-NEXT:    sw s7, 196(sp)
+; RV32-NEXT:    vand.vx v19, v9, s7
+; RV32-NEXT:    addi s7, sp, 232
+; RV32-NEXT:    sw zero, 184(sp)
+; RV32-NEXT:    sw s1, 188(sp)
+; RV32-NEXT:    vand.vx v20, v9, s1
+; RV32-NEXT:    slli t5, t5, 11
+; RV32-NEXT:    vand.vx v21, v9, s6
+; RV32-NEXT:    sw zero, 176(sp)
+; RV32-NEXT:    sw t5, 180(sp)
+; RV32-NEXT:    sw zero, 168(sp)
+; RV32-NEXT:    sw s6, 172(sp)
+; RV32-NEXT:    addi s6, sp, 216
+; RV32-NEXT:    vand.vx v22, v9, s5
+; RV32-NEXT:    sw zero, 160(sp)
+; RV32-NEXT:    sw s5, 164(sp)
+; RV32-NEXT:    addi s5, sp, 208
+; RV32-NEXT:    vand.vx v23, v9, s4
+; RV32-NEXT:    sw zero, 152(sp)
+; RV32-NEXT:    sw s4, 156(sp)
+; RV32-NEXT:    addi s4, sp, 200
+; RV32-NEXT:    vand.vx v24, v9, s3
+; RV32-NEXT:    sw zero, 144(sp)
+; RV32-NEXT:    sw s3, 148(sp)
+; RV32-NEXT:    addi s3, sp, 192
+; RV32-NEXT:    vand.vx v25, v9, s2
+; RV32-NEXT:    sw zero, 136(sp)
+; RV32-NEXT:    sw s2, 140(sp)
+; RV32-NEXT:    addi s2, sp, 184
+; RV32-NEXT:    vand.vx v26, v9, s0
+; RV32-NEXT:    sw zero, 128(sp)
+; RV32-NEXT:    sw s0, 132(sp)
+; RV32-NEXT:    addi s1, sp, 176
+; RV32-NEXT:    vand.vx v27, v9, t6
+; RV32-NEXT:    sw zero, 120(sp)
+; RV32-NEXT:    sw t6, 124(sp)
+; RV32-NEXT:    addi s0, sp, 168
+; RV32-NEXT:    vand.vx v28, v9, t4
+; RV32-NEXT:    sw zero, 112(sp)
+; RV32-NEXT:    sw t4, 116(sp)
+; RV32-NEXT:    addi t6, sp, 160
+; RV32-NEXT:    vand.vx v29, v9, t3
+; RV32-NEXT:    sw zero, 104(sp)
+; RV32-NEXT:    sw t3, 108(sp)
+; RV32-NEXT:    addi t4, sp, 152
+; RV32-NEXT:    vand.vx v30, v9, t2
+; RV32-NEXT:    sw zero, 96(sp)
+; RV32-NEXT:    sw t2, 100(sp)
+; RV32-NEXT:    addi t3, sp, 144
+; RV32-NEXT:    vand.vx v31, v9, t1
+; RV32-NEXT:    sw zero, 88(sp)
+; RV32-NEXT:    sw t1, 92(sp)
+; RV32-NEXT:    addi t2, sp, 136
+; RV32-NEXT:    vand.vx v7, v9, t0
+; RV32-NEXT:    sw zero, 80(sp)
+; RV32-NEXT:    sw t0, 84(sp)
+; RV32-NEXT:    addi t1, sp, 128
+; RV32-NEXT:    vand.vx v6, v9, a7
+; RV32-NEXT:    sw zero, 72(sp)
+; RV32-NEXT:    sw a7, 76(sp)
+; RV32-NEXT:    addi t0, sp, 120
+; RV32-NEXT:    vand.vx v5, v9, a6
+; RV32-NEXT:    sw zero, 64(sp)
+; RV32-NEXT:    sw a6, 68(sp)
+; RV32-NEXT:    addi a7, sp, 112
+; RV32-NEXT:    vand.vx v4, v9, a5
+; RV32-NEXT:    sw zero, 56(sp)
+; RV32-NEXT:    sw a5, 60(sp)
+; RV32-NEXT:    addi a6, sp, 104
+; RV32-NEXT:    vand.vx v3, v9, a3
+; RV32-NEXT:    sw zero, 48(sp)
+; RV32-NEXT:    sw a3, 52(sp)
+; RV32-NEXT:    addi a5, sp, 96
+; RV32-NEXT:    vand.vx v2, v9, a4
+; RV32-NEXT:    sw zero, 40(sp)
+; RV32-NEXT:    sw a4, 44(sp)
+; RV32-NEXT:    addi a4, sp, 88
+; RV32-NEXT:    vand.vx v1, v9, a2
+; RV32-NEXT:    sw zero, 32(sp)
+; RV32-NEXT:    sw a2, 36(sp)
+; RV32-NEXT:    addi a3, sp, 80
+; RV32-NEXT:    sw zero, 24(sp)
+; RV32-NEXT:    lui a0, 262144
+; RV32-NEXT:    sw a0, 28(sp)
+; RV32-NEXT:    sw zero, 16(sp)
+; RV32-NEXT:    sw a1, 20(sp)
+; RV32-NEXT:    addi a2, sp, 72
+; RV32-NEXT:    vand.vx v0, v9, t5
+; RV32-NEXT:    addi a1, sp, 64
+; RV32-NEXT:    vmul.vv v13, v8, v13
+; RV32-NEXT:    vmul.vv v14, v8, v14
+; RV32-NEXT:    vxor.vi v14, v14, 0
+; RV32-NEXT:    vxor.vv v14, v14, v13
+; RV32-NEXT:    vlse64.v v13, (s11), zero
+; RV32-NEXT:    addi s11, sp, 56
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    vxor.vv v14, v14, v12
+; RV32-NEXT:    vlse64.v v12, (ra), zero
+; RV32-NEXT:    csrr t5, vlenb
+; RV32-NEXT:    slli t5, t5, 1
+; RV32-NEXT:    mv ra, t5
+; RV32-NEXT:    slli t5, t5, 1
+; RV32-NEXT:    add t5, t5, ra
+; RV32-NEXT:    add t5, sp, t5
+; RV32-NEXT:    addi t5, t5, 288
+; RV32-NEXT:    vs1r.v v12, (t5) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    addi ra, sp, 48
+; RV32-NEXT:    vmul.vv v11, v8, v11
+; RV32-NEXT:    vxor.vv v14, v14, v11
+; RV32-NEXT:    vlse64.v v11, (s10), zero
+; RV32-NEXT:    csrr t5, vlenb
+; RV32-NEXT:    slli s10, t5, 2
+; RV32-NEXT:    add t5, s10, t5
+; RV32-NEXT:    add t5, sp, t5
+; RV32-NEXT:    addi t5, t5, 288
+; RV32-NEXT:    vs1r.v v11, (t5) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    addi s10, sp, 40
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    vxor.vv v14, v14, v10
+; RV32-NEXT:    vlse64.v v10, (s9), zero
+; RV32-NEXT:    csrr t5, vlenb
+; RV32-NEXT:    slli t5, t5, 2
+; RV32-NEXT:    add t5, sp, t5
+; RV32-NEXT:    addi t5, t5, 288
+; RV32-NEXT:    vs1r.v v10, (t5) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    addi t5, sp, 32
+; RV32-NEXT:    vmul.vv v15, v8, v15
+; RV32-NEXT:    vxor.vv v15, v14, v15
+; RV32-NEXT:    vlse64.v v10, (s8), zero
+; RV32-NEXT:    csrr s8, vlenb
+; RV32-NEXT:    slli s9, s8, 1
+; RV32-NEXT:    add s8, s9, s8
+; RV32-NEXT:    add s8, sp, s8
+; RV32-NEXT:    addi s8, s8, 288
+; RV32-NEXT:    vs1r.v v10, (s8) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    addi s8, sp, 24
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    vxor.vv v16, v15, v16
+; RV32-NEXT:    vlse64.v v10, (s7), zero
+; RV32-NEXT:    csrr s7, vlenb
+; RV32-NEXT:    slli s7, s7, 1
+; RV32-NEXT:    add s7, sp, s7
+; RV32-NEXT:    addi s7, s7, 288
+; RV32-NEXT:    vs1r.v v10, (s7) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    addi s7, sp, 16
+; RV32-NEXT:    vmul.vv v17, v8, v17
+; RV32-NEXT:    vmul.vv v18, v8, v18
+; RV32-NEXT:    vmul.vv v19, v8, v19
+; RV32-NEXT:    vmul.vv v20, v8, v20
+; RV32-NEXT:    vmul.vv v21, v8, v21
+; RV32-NEXT:    vmul.vv v22, v8, v22
+; RV32-NEXT:    vmul.vv v23, v8, v23
+; RV32-NEXT:    vmul.vv v24, v8, v24
+; RV32-NEXT:    vmul.vv v25, v8, v25
+; RV32-NEXT:    vmul.vv v26, v8, v26
+; RV32-NEXT:    vmul.vv v27, v8, v27
+; RV32-NEXT:    vmul.vv v28, v8, v28
+; RV32-NEXT:    vmul.vv v29, v8, v29
+; RV32-NEXT:    vmul.vv v30, v8, v30
+; RV32-NEXT:    vmul.vv v31, v8, v31
+; RV32-NEXT:    vmul.vv v7, v8, v7
+; RV32-NEXT:    vmul.vv v6, v8, v6
+; RV32-NEXT:    vmul.vv v5, v8, v5
+; RV32-NEXT:    vmul.vv v4, v8, v4
+; RV32-NEXT:    vmul.vv v3, v8, v3
+; RV32-NEXT:    vmul.vv v2, v8, v2
+; RV32-NEXT:    vmul.vv v1, v8, v1
+; RV32-NEXT:    vmul.vv v0, v8, v0
+; RV32-NEXT:    vxor.vv v16, v16, v17
+; RV32-NEXT:    addi s9, sp, 224
+; RV32-NEXT:    vlse64.v v11, (s9), zero
+; RV32-NEXT:    vxor.vv v16, v16, v18
+; RV32-NEXT:    vlse64.v v10, (s6), zero
+; RV32-NEXT:    csrr s6, vlenb
+; RV32-NEXT:    add s6, sp, s6
+; RV32-NEXT:    addi s6, s6, 288
+; RV32-NEXT:    vs1r.v v10, (s6) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vxor.vv v16, v16, v19
+; RV32-NEXT:    vlse64.v v10, (s5), zero
+; RV32-NEXT:    addi s5, sp, 288
+; RV32-NEXT:    vs1r.v v10, (s5) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vxor.vv v16, v16, v20
+; RV32-NEXT:    vlse64.v v12, (s4), zero
+; RV32-NEXT:    vxor.vv v16, v16, v0
+; RV32-NEXT:    vlse64.v v0, (s3), zero
+; RV32-NEXT:    vxor.vv v16, v16, v21
+; RV32-NEXT:    vlse64.v v21, (s2), zero
+; RV32-NEXT:    vxor.vv v16, v16, v22
+; RV32-NEXT:    vlse64.v v22, (s1), zero
+; RV32-NEXT:    vxor.vv v16, v16, v23
+; RV32-NEXT:    vlse64.v v23, (s0), zero
+; RV32-NEXT:    vxor.vv v16, v16, v24
+; RV32-NEXT:    vlse64.v v24, (t6), zero
+; RV32-NEXT:    vxor.vv v16, v16, v25
+; RV32-NEXT:    vlse64.v v25, (t4), zero
+; RV32-NEXT:    vxor.vv v16, v16, v26
+; RV32-NEXT:    vlse64.v v26, (t3), zero
+; RV32-NEXT:    vxor.vv v16, v16, v27
+; RV32-NEXT:    vlse64.v v27, (t2), zero
+; RV32-NEXT:    vxor.vv v16, v16, v28
+; RV32-NEXT:    vlse64.v v28, (t1), zero
+; RV32-NEXT:    vxor.vv v16, v16, v29
+; RV32-NEXT:    vlse64.v v29, (t0), zero
+; RV32-NEXT:    vxor.vv v16, v16, v30
+; RV32-NEXT:    vlse64.v v30, (a7), zero
+; RV32-NEXT:    vxor.vv v16, v16, v31
+; RV32-NEXT:    vlse64.v v31, (a6), zero
+; RV32-NEXT:    vxor.vv v16, v16, v7
+; RV32-NEXT:    vlse64.v v7, (a5), zero
+; RV32-NEXT:    vxor.vv v16, v16, v6
+; RV32-NEXT:    vlse64.v v6, (a4), zero
+; RV32-NEXT:    vxor.vv v16, v16, v5
+; RV32-NEXT:    vlse64.v v5, (a3), zero
+; RV32-NEXT:    vxor.vv v16, v16, v4
+; RV32-NEXT:    vlse64.v v4, (a2), zero
+; RV32-NEXT:    vxor.vv v16, v16, v3
+; RV32-NEXT:    vlse64.v v3, (a1), zero
+; RV32-NEXT:    vxor.vv v16, v16, v2
+; RV32-NEXT:    vlse64.v v2, (s11), zero
+; RV32-NEXT:    vxor.vv v1, v16, v1
+; RV32-NEXT:    vlse64.v v10, (ra), zero
+; RV32-NEXT:    vand.vv v13, v9, v13
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 1
+; RV32-NEXT:    mv a2, a1
+; RV32-NEXT:    slli a1, a1, 1
+; RV32-NEXT:    add a1, a1, a2
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 288
+; RV32-NEXT:    vl1r.v v14, (a1) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vand.vv v14, v9, v14
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a2, a1, 2
+; RV32-NEXT:    add a1, a2, a1
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 288
+; RV32-NEXT:    vl1r.v v15, (a1) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vand.vv v15, v9, v15
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 2
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 288
+; RV32-NEXT:    vl1r.v v16, (a1) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vand.vv v16, v9, v16
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a2, a1, 1
+; RV32-NEXT:    add a1, a2, a1
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 288
+; RV32-NEXT:    vl1r.v v17, (a1) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vand.vv v17, v9, v17
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 1
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 288
+; RV32-NEXT:    vl1r.v v18, (a1) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vand.vv v18, v9, v18
+; RV32-NEXT:    vand.vv v19, v9, v11
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 288
+; RV32-NEXT:    vl1r.v v11, (a1) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vand.vv v20, v9, v11
+; RV32-NEXT:    addi a1, sp, 288
+; RV32-NEXT:    vl1r.v v11, (a1) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vand.vv v11, v9, v11
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 1
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 288
+; RV32-NEXT:    vs1r.v v11, (a1) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vv v11, v9, v12
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a2, a1, 1
+; RV32-NEXT:    add a1, a2, a1
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 288
+; RV32-NEXT:    vs1r.v v11, (a1) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vv v0, v9, v0
+; RV32-NEXT:    vand.vv v21, v9, v21
+; RV32-NEXT:    vand.vv v22, v9, v22
+; RV32-NEXT:    vand.vv v23, v9, v23
+; RV32-NEXT:    vand.vv v24, v9, v24
+; RV32-NEXT:    vand.vv v25, v9, v25
+; RV32-NEXT:    vand.vv v26, v9, v26
+; RV32-NEXT:    vand.vv v27, v9, v27
+; RV32-NEXT:    vand.vv v28, v9, v28
+; RV32-NEXT:    vand.vv v29, v9, v29
+; RV32-NEXT:    vand.vv v30, v9, v30
+; RV32-NEXT:    vand.vv v31, v9, v31
+; RV32-NEXT:    vand.vv v7, v9, v7
+; RV32-NEXT:    vand.vv v6, v9, v6
+; RV32-NEXT:    vand.vv v5, v9, v5
+; RV32-NEXT:    vand.vv v4, v9, v4
+; RV32-NEXT:    vand.vv v11, v9, v3
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 2
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 288
+; RV32-NEXT:    vs1r.v v11, (a1) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vv v2, v9, v2
+; RV32-NEXT:    vand.vv v10, v9, v10
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 1
+; RV32-NEXT:    mv a2, a1
+; RV32-NEXT:    slli a1, a1, 1
+; RV32-NEXT:    add a1, a1, a2
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 288
+; RV32-NEXT:    vs1r.v v10, (a1) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vlse64.v v10, (s10), zero
+; RV32-NEXT:    vlse64.v v3, (t5), zero
+; RV32-NEXT:    vlse64.v v11, (s8), zero
+; RV32-NEXT:    vlse64.v v12, (s7), zero
+; RV32-NEXT:    vand.vv v10, v9, v10
+; RV32-NEXT:    vand.vv v3, v9, v3
+; RV32-NEXT:    vand.vv v11, v9, v11
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a2, a1, 2
+; RV32-NEXT:    add a1, a2, a1
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 288
+; RV32-NEXT:    vs1r.v v11, (a1) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vv v12, v9, v12
+; RV32-NEXT:    vand.vx v9, v9, a0
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    vxor.vv v9, v1, v9
+; RV32-NEXT:    vmul.vv v11, v8, v13
+; RV32-NEXT:    vxor.vv v9, v9, v11
+; RV32-NEXT:    vmul.vv v11, v8, v14
+; RV32-NEXT:    vxor.vv v9, v9, v11
+; RV32-NEXT:    vmul.vv v11, v8, v15
+; RV32-NEXT:    vxor.vv v9, v9, v11
+; RV32-NEXT:    vmul.vv v11, v8, v16
+; RV32-NEXT:    vxor.vv v9, v9, v11
+; RV32-NEXT:    vmul.vv v11, v8, v17
+; RV32-NEXT:    vxor.vv v9, v9, v11
+; RV32-NEXT:    vmul.vv v11, v8, v18
+; RV32-NEXT:    vxor.vv v9, v9, v11
+; RV32-NEXT:    vmul.vv v11, v8, v19
+; RV32-NEXT:    vxor.vv v9, v9, v11
+; RV32-NEXT:    vmul.vv v11, v8, v20
+; RV32-NEXT:    vxor.vv v9, v9, v11
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v11, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vmul.vv v11, v8, v11
+; RV32-NEXT:    vxor.vv v9, v9, v11
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a1, a0, 1
+; RV32-NEXT:    add a0, a1, a0
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v11, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vmul.vv v11, v8, v11
+; RV32-NEXT:    vxor.vv v9, v9, v11
+; RV32-NEXT:    vmul.vv v11, v8, v0
+; RV32-NEXT:    vxor.vv v9, v9, v11
+; RV32-NEXT:    vmul.vv v11, v8, v21
+; RV32-NEXT:    vxor.vv v9, v9, v11
+; RV32-NEXT:    vmul.vv v11, v8, v22
+; RV32-NEXT:    vxor.vv v9, v9, v11
+; RV32-NEXT:    vmul.vv v11, v8, v23
+; RV32-NEXT:    vxor.vv v9, v9, v11
+; RV32-NEXT:    vmul.vv v11, v8, v24
+; RV32-NEXT:    vxor.vv v9, v9, v11
+; RV32-NEXT:    vmul.vv v11, v8, v25
+; RV32-NEXT:    vxor.vv v9, v9, v11
+; RV32-NEXT:    vmul.vv v11, v8, v26
+; RV32-NEXT:    vxor.vv v9, v9, v11
+; RV32-NEXT:    vmul.vv v11, v8, v27
+; RV32-NEXT:    vxor.vv v9, v9, v11
+; RV32-NEXT:    vmul.vv v11, v8, v28
+; RV32-NEXT:    vxor.vv v9, v9, v11
+; RV32-NEXT:    vmul.vv v11, v8, v29
+; RV32-NEXT:    vxor.vv v9, v9, v11
+; RV32-NEXT:    vmul.vv v11, v8, v30
+; RV32-NEXT:    vxor.vv v9, v9, v11
+; RV32-NEXT:    vmul.vv v11, v8, v31
+; RV32-NEXT:    vxor.vv v9, v9, v11
+; RV32-NEXT:    vmul.vv v11, v8, v7
+; RV32-NEXT:    vxor.vv v9, v9, v11
+; RV32-NEXT:    vmul.vv v11, v8, v6
+; RV32-NEXT:    vxor.vv v9, v9, v11
+; RV32-NEXT:    vmul.vv v11, v8, v5
+; RV32-NEXT:    vxor.vv v9, v9, v11
+; RV32-NEXT:    vmul.vv v11, v8, v4
+; RV32-NEXT:    vxor.vv v9, v9, v11
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v11, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vmul.vv v11, v8, v11
+; RV32-NEXT:    vxor.vv v9, v9, v11
+; RV32-NEXT:    vmul.vv v11, v8, v2
+; RV32-NEXT:    vxor.vv v9, v9, v11
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v11, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vmul.vv v11, v8, v11
+; RV32-NEXT:    vxor.vv v9, v9, v11
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    vxor.vv v9, v9, v10
+; RV32-NEXT:    vmul.vv v10, v8, v3
+; RV32-NEXT:    vxor.vv v9, v9, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a1, a0, 2
+; RV32-NEXT:    add a0, a1, a0
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v10, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    vxor.vv v9, v9, v10
+; RV32-NEXT:    vmul.vv v8, v8, v12
+; RV32-NEXT:    vxor.vv v8, v9, v8
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a1, a0, 3
+; RV32-NEXT:    sub a0, a1, a0
+; RV32-NEXT:    add sp, sp, a0
+; RV32-NEXT:    lw ra, 348(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s0, 344(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s1, 340(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s2, 336(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s3, 332(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s4, 328(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s5, 324(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s6, 320(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s7, 316(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s8, 312(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s9, 308(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s10, 304(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s11, 300(sp) # 4-byte Folded Reload
+; RV32-NEXT:    addi sp, sp, 352
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: clmul_nxv1i64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
+; RV64-NEXT:    vand.vi v10, v9, 2
+; RV64-NEXT:    vand.vi v11, v9, 1
+; RV64-NEXT:    vmul.vv v10, v8, v10
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v11, v10
+; RV64-NEXT:    vand.vi v11, v9, 4
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vi v11, v9, 8
+; RV64-NEXT:    li a0, 16
+; RV64-NEXT:    li a1, 32
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a0
+; RV64-NEXT:    li a0, 64
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    li a1, 128
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a0
+; RV64-NEXT:    li a0, 256
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    li a1, 512
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a0
+; RV64-NEXT:    li a2, 1024
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    li a0, 1
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a2
+; RV64-NEXT:    slli a1, a0, 11
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    lui a1, 1
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    lui a1, 2
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    lui a1, 4
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    lui a1, 8
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    lui a1, 16
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    lui a1, 32
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    lui a1, 64
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    lui a1, 128
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    lui a1, 256
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    lui a1, 512
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    lui a1, 1024
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    lui a1, 2048
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    lui a1, 4096
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    lui a1, 8192
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    lui a1, 16384
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    lui a1, 32768
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    lui a1, 65536
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    lui a1, 131072
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    lui a1, 262144
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    slli a1, a0, 31
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    slli a1, a0, 32
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    slli a1, a0, 33
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    slli a1, a0, 34
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    slli a1, a0, 35
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    slli a1, a0, 36
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    slli a1, a0, 37
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    slli a1, a0, 38
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    slli a1, a0, 39
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    slli a1, a0, 40
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    slli a1, a0, 41
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    slli a1, a0, 42
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    slli a1, a0, 43
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    slli a1, a0, 44
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    slli a1, a0, 45
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    slli a1, a0, 46
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    slli a1, a0, 47
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    slli a1, a0, 48
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    slli a1, a0, 49
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    slli a1, a0, 50
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    slli a1, a0, 51
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    slli a1, a0, 52
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    slli a1, a0, 53
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    slli a1, a0, 54
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    slli a1, a0, 55
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    slli a1, a0, 56
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    slli a1, a0, 57
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    slli a1, a0, 58
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    slli a1, a0, 59
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    slli a1, a0, 60
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    slli a1, a0, 61
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    li a1, -1
+; RV64-NEXT:    slli a0, a0, 62
+; RV64-NEXT:    slli a1, a1, 63
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a0
+; RV64-NEXT:    vand.vx v9, v9, a1
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vmul.vv v8, v8, v9
+; RV64-NEXT:    vxor.vv v8, v10, v8
+; RV64-NEXT:    ret
+  %a = call <vscale x 1 x i64> @llvm.clmul.nxv1i64(<vscale x 1 x i64> %x, <vscale x 1 x i64> %y)
+  ret <vscale x 1 x i64> %a
+}
+
+define <vscale x 2 x i64> @clmul_nxv2i64(<vscale x 2 x i64> %x, <vscale x 2 x i64> %y) nounwind {
+; RV32-LABEL: clmul_nxv2i64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -352
+; RV32-NEXT:    sw ra, 348(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s0, 344(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s1, 340(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s2, 336(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s3, 332(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s4, 328(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s5, 324(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s6, 320(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s7, 316(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s8, 312(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s9, 308(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s10, 304(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s11, 300(sp) # 4-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    sub sp, sp, a0
+; RV32-NEXT:    lui a1, 524288
+; RV32-NEXT:    li s2, 1
+; RV32-NEXT:    li a3, 2
+; RV32-NEXT:    li a2, 4
+; RV32-NEXT:    li s7, 8
+; RV32-NEXT:    li a0, 16
+; RV32-NEXT:    li s6, 32
+; RV32-NEXT:    li s5, 64
+; RV32-NEXT:    li s4, 128
+; RV32-NEXT:    li s1, 256
+; RV32-NEXT:    li s0, 512
+; RV32-NEXT:    li t5, 1024
+; RV32-NEXT:    lui ra, 1
+; RV32-NEXT:    lui s8, 2
+; RV32-NEXT:    lui s10, 4
+; RV32-NEXT:    lui s11, 8
+; RV32-NEXT:    lui s9, 16
+; RV32-NEXT:    lui s3, 32
+; RV32-NEXT:    lui t6, 64
+; RV32-NEXT:    lui t4, 128
+; RV32-NEXT:    lui t3, 256
+; RV32-NEXT:    lui t2, 512
+; RV32-NEXT:    lui t1, 1024
+; RV32-NEXT:    lui t0, 2048
+; RV32-NEXT:    lui a7, 4096
+; RV32-NEXT:    lui a6, 8192
+; RV32-NEXT:    lui a5, 16384
+; RV32-NEXT:    lui a4, 32768
+; RV32-NEXT:    sw a1, 272(sp)
+; RV32-NEXT:    sw zero, 276(sp)
+; RV32-NEXT:    sw zero, 264(sp)
+; RV32-NEXT:    sw s2, 268(sp)
+; RV32-NEXT:    sw zero, 256(sp)
+; RV32-NEXT:    sw a3, 260(sp)
+; RV32-NEXT:    lui a3, 65536
+; RV32-NEXT:    sw zero, 248(sp)
+; RV32-NEXT:    sw a2, 252(sp)
+; RV32-NEXT:    lui a2, 131072
+; RV32-NEXT:    sw zero, 240(sp)
+; RV32-NEXT:    sw s7, 244(sp)
+; RV32-NEXT:    vsetvli s7, zero, e64, m2, ta, ma
+; RV32-NEXT:    vand.vi v28, v10, 2
+; RV32-NEXT:    vand.vi v20, v10, 1
+; RV32-NEXT:    vand.vi v30, v10, 4
+; RV32-NEXT:    vand.vi v14, v10, 8
+; RV32-NEXT:    sw zero, 232(sp)
+; RV32-NEXT:    sw a0, 236(sp)
+; RV32-NEXT:    vand.vx v12, v10, a0
+; RV32-NEXT:    addi s7, sp, 272
+; RV32-NEXT:    sw zero, 224(sp)
+; RV32-NEXT:    sw s6, 228(sp)
+; RV32-NEXT:    vand.vx v16, v10, s6
+; RV32-NEXT:    addi s6, sp, 264
+; RV32-NEXT:    sw zero, 216(sp)
+; RV32-NEXT:    sw s5, 220(sp)
+; RV32-NEXT:    vand.vx v18, v10, s5
+; RV32-NEXT:    addi s5, sp, 256
+; RV32-NEXT:    sw zero, 208(sp)
+; RV32-NEXT:    sw s4, 212(sp)
+; RV32-NEXT:    vand.vx v0, v10, s4
+; RV32-NEXT:    addi s4, sp, 248
+; RV32-NEXT:    sw zero, 200(sp)
+; RV32-NEXT:    sw s1, 204(sp)
+; RV32-NEXT:    vand.vx v6, v10, s1
+; RV32-NEXT:    addi s1, sp, 240
+; RV32-NEXT:    sw zero, 192(sp)
+; RV32-NEXT:    sw s0, 196(sp)
+; RV32-NEXT:    vand.vx v4, v10, s0
+; RV32-NEXT:    addi s0, sp, 232
+; RV32-NEXT:    sw zero, 184(sp)
+; RV32-NEXT:    sw t5, 188(sp)
+; RV32-NEXT:    vand.vx v2, v10, t5
+; RV32-NEXT:    slli s2, s2, 11
+; RV32-NEXT:    vand.vx v24, v10, ra
+; RV32-NEXT:    sw zero, 176(sp)
+; RV32-NEXT:    sw s2, 180(sp)
+; RV32-NEXT:    sw zero, 168(sp)
+; RV32-NEXT:    sw ra, 172(sp)
+; RV32-NEXT:    addi t5, sp, 216
+; RV32-NEXT:    vand.vx v26, v10, s8
+; RV32-NEXT:    sw zero, 160(sp)
+; RV32-NEXT:    sw s8, 164(sp)
+; RV32-NEXT:    addi s8, sp, 208
+; RV32-NEXT:    vand.vx v22, v10, s10
+; RV32-NEXT:    sw zero, 152(sp)
+; RV32-NEXT:    sw s10, 156(sp)
+; RV32-NEXT:    addi s10, sp, 200
+; RV32-NEXT:    vmul.vv v28, v8, v28
+; RV32-NEXT:    vmul.vv v20, v8, v20
+; RV32-NEXT:    vxor.vi v20, v20, 0
+; RV32-NEXT:    vxor.vv v20, v20, v28
+; RV32-NEXT:    vand.vx v28, v10, s11
+; RV32-NEXT:    sw zero, 144(sp)
+; RV32-NEXT:    sw s11, 148(sp)
+; RV32-NEXT:    addi s11, sp, 192
+; RV32-NEXT:    vmul.vv v30, v8, v30
+; RV32-NEXT:    vxor.vv v20, v20, v30
+; RV32-NEXT:    vand.vx v30, v10, s9
+; RV32-NEXT:    sw zero, 136(sp)
+; RV32-NEXT:    sw s9, 140(sp)
+; RV32-NEXT:    addi s9, sp, 184
+; RV32-NEXT:    vmul.vv v14, v8, v14
+; RV32-NEXT:    vxor.vv v14, v20, v14
+; RV32-NEXT:    vand.vx v20, v10, s3
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv ra, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, ra
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v20, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    sw zero, 128(sp)
+; RV32-NEXT:    sw s3, 132(sp)
+; RV32-NEXT:    addi s3, sp, 176
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    vxor.vv v12, v14, v12
+; RV32-NEXT:    vand.vx v14, v10, t6
+; RV32-NEXT:    sw zero, 120(sp)
+; RV32-NEXT:    sw t6, 124(sp)
+; RV32-NEXT:    addi t6, sp, 168
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    vxor.vv v12, v12, v16
+; RV32-NEXT:    vand.vx v16, v10, t4
+; RV32-NEXT:    sw zero, 112(sp)
+; RV32-NEXT:    sw t4, 116(sp)
+; RV32-NEXT:    addi t4, sp, 160
+; RV32-NEXT:    vmul.vv v18, v8, v18
+; RV32-NEXT:    vxor.vv v18, v12, v18
+; RV32-NEXT:    vand.vx v12, v10, t3
+; RV32-NEXT:    sw zero, 104(sp)
+; RV32-NEXT:    sw t3, 108(sp)
+; RV32-NEXT:    addi t3, sp, 152
+; RV32-NEXT:    vmul.vv v20, v8, v0
+; RV32-NEXT:    vxor.vv v18, v18, v20
+; RV32-NEXT:    vand.vx v20, v10, t2
+; RV32-NEXT:    sw zero, 96(sp)
+; RV32-NEXT:    sw t2, 100(sp)
+; RV32-NEXT:    addi t2, sp, 144
+; RV32-NEXT:    vmul.vv v6, v8, v6
+; RV32-NEXT:    vxor.vv v18, v18, v6
+; RV32-NEXT:    vand.vx v6, v10, t1
+; RV32-NEXT:    sw zero, 88(sp)
+; RV32-NEXT:    sw t1, 92(sp)
+; RV32-NEXT:    addi t1, sp, 136
+; RV32-NEXT:    vmul.vv v4, v8, v4
+; RV32-NEXT:    vxor.vv v18, v18, v4
+; RV32-NEXT:    vand.vx v4, v10, t0
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv ra, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add ra, ra, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, ra
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v4, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    sw zero, 80(sp)
+; RV32-NEXT:    sw t0, 84(sp)
+; RV32-NEXT:    addi t0, sp, 128
+; RV32-NEXT:    vmul.vv v2, v8, v2
+; RV32-NEXT:    vxor.vv v18, v18, v2
+; RV32-NEXT:    vand.vx v2, v10, s2
+; RV32-NEXT:    addi ra, sp, 120
+; RV32-NEXT:    vmul.vv v2, v8, v2
+; RV32-NEXT:    vxor.vv v18, v18, v2
+; RV32-NEXT:    vand.vx v2, v10, a7
+; RV32-NEXT:    sw zero, 72(sp)
+; RV32-NEXT:    sw a7, 76(sp)
+; RV32-NEXT:    addi a7, sp, 112
+; RV32-NEXT:    vmul.vv v24, v8, v24
+; RV32-NEXT:    vxor.vv v18, v18, v24
+; RV32-NEXT:    vand.vx v4, v10, a6
+; RV32-NEXT:    sw zero, 64(sp)
+; RV32-NEXT:    sw a6, 68(sp)
+; RV32-NEXT:    addi a6, sp, 104
+; RV32-NEXT:    vmul.vv v26, v8, v26
+; RV32-NEXT:    vxor.vv v18, v18, v26
+; RV32-NEXT:    vand.vx v26, v10, a5
+; RV32-NEXT:    sw zero, 56(sp)
+; RV32-NEXT:    sw a5, 60(sp)
+; RV32-NEXT:    addi a5, sp, 96
+; RV32-NEXT:    vmul.vv v22, v8, v22
+; RV32-NEXT:    vxor.vv v18, v18, v22
+; RV32-NEXT:    vand.vx v24, v10, a4
+; RV32-NEXT:    sw zero, 48(sp)
+; RV32-NEXT:    sw a4, 52(sp)
+; RV32-NEXT:    addi a4, sp, 88
+; RV32-NEXT:    vmul.vv v28, v8, v28
+; RV32-NEXT:    vxor.vv v18, v18, v28
+; RV32-NEXT:    vand.vx v28, v10, a3
+; RV32-NEXT:    sw zero, 40(sp)
+; RV32-NEXT:    sw a3, 44(sp)
+; RV32-NEXT:    addi a3, sp, 80
+; RV32-NEXT:    vmul.vv v30, v8, v30
+; RV32-NEXT:    vxor.vv v18, v18, v30
+; RV32-NEXT:    vand.vx v30, v10, a2
+; RV32-NEXT:    sw zero, 32(sp)
+; RV32-NEXT:    sw a2, 36(sp)
+; RV32-NEXT:    addi a2, sp, 72
+; RV32-NEXT:    sw zero, 24(sp)
+; RV32-NEXT:    lui a0, 262144
+; RV32-NEXT:    sw a0, 28(sp)
+; RV32-NEXT:    sw zero, 16(sp)
+; RV32-NEXT:    sw a1, 20(sp)
+; RV32-NEXT:    addi a1, sp, 64
+; RV32-NEXT:    sw a6, 4(sp) # 4-byte Folded Spill
+; RV32-NEXT:    csrr a6, vlenb
+; RV32-NEXT:    slli a6, a6, 3
+; RV32-NEXT:    mv s2, a6
+; RV32-NEXT:    slli a6, a6, 2
+; RV32-NEXT:    add a6, a6, s2
+; RV32-NEXT:    add a6, sp, a6
+; RV32-NEXT:    addi a6, a6, 288
+; RV32-NEXT:    vl2r.v v22, (a6) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v0, v8, v22
+; RV32-NEXT:    vxor.vv v0, v18, v0
+; RV32-NEXT:    vlse64.v v18, (s7), zero
+; RV32-NEXT:    csrr a6, vlenb
+; RV32-NEXT:    slli a6, a6, 3
+; RV32-NEXT:    mv s2, a6
+; RV32-NEXT:    slli a6, a6, 2
+; RV32-NEXT:    add a6, a6, s2
+; RV32-NEXT:    add a6, sp, a6
+; RV32-NEXT:    addi a6, a6, 288
+; RV32-NEXT:    vs2r.v v18, (a6) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    addi s7, sp, 56
+; RV32-NEXT:    vmul.vv v14, v8, v14
+; RV32-NEXT:    vxor.vv v14, v0, v14
+; RV32-NEXT:    vlse64.v v18, (s6), zero
+; RV32-NEXT:    csrr a6, vlenb
+; RV32-NEXT:    slli a6, a6, 2
+; RV32-NEXT:    mv s2, a6
+; RV32-NEXT:    slli a6, a6, 3
+; RV32-NEXT:    add a6, a6, s2
+; RV32-NEXT:    add a6, sp, a6
+; RV32-NEXT:    addi a6, a6, 288
+; RV32-NEXT:    vs2r.v v18, (a6) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    addi s2, sp, 48
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    vxor.vv v14, v14, v16
+; RV32-NEXT:    vlse64.v v16, (s5), zero
+; RV32-NEXT:    csrr a6, vlenb
+; RV32-NEXT:    slli a6, a6, 1
+; RV32-NEXT:    mv s5, a6
+; RV32-NEXT:    slli a6, a6, 4
+; RV32-NEXT:    add a6, a6, s5
+; RV32-NEXT:    add a6, sp, a6
+; RV32-NEXT:    addi a6, a6, 288
+; RV32-NEXT:    vs2r.v v16, (a6) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    addi s5, sp, 40
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    vxor.vv v12, v14, v12
+; RV32-NEXT:    vlse64.v v14, (s4), zero
+; RV32-NEXT:    csrr a6, vlenb
+; RV32-NEXT:    slli a6, a6, 5
+; RV32-NEXT:    add a6, sp, a6
+; RV32-NEXT:    addi a6, a6, 288
+; RV32-NEXT:    vs2r.v v14, (a6) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    addi s4, sp, 32
+; RV32-NEXT:    vmul.vv v20, v8, v20
+; RV32-NEXT:    vxor.vv v20, v12, v20
+; RV32-NEXT:    vlse64.v v12, (s1), zero
+; RV32-NEXT:    csrr a6, vlenb
+; RV32-NEXT:    slli a6, a6, 1
+; RV32-NEXT:    mv s1, a6
+; RV32-NEXT:    slli a6, a6, 1
+; RV32-NEXT:    add s1, s1, a6
+; RV32-NEXT:    slli a6, a6, 1
+; RV32-NEXT:    add s1, s1, a6
+; RV32-NEXT:    slli a6, a6, 1
+; RV32-NEXT:    add a6, a6, s1
+; RV32-NEXT:    add a6, sp, a6
+; RV32-NEXT:    addi a6, a6, 288
+; RV32-NEXT:    vs2r.v v12, (a6) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    addi s1, sp, 24
+; RV32-NEXT:    vmul.vv v6, v8, v6
+; RV32-NEXT:    vxor.vv v20, v20, v6
+; RV32-NEXT:    vlse64.v v12, (s0), zero
+; RV32-NEXT:    csrr a6, vlenb
+; RV32-NEXT:    slli a6, a6, 2
+; RV32-NEXT:    mv s0, a6
+; RV32-NEXT:    slli a6, a6, 1
+; RV32-NEXT:    add s0, s0, a6
+; RV32-NEXT:    slli a6, a6, 1
+; RV32-NEXT:    add a6, a6, s0
+; RV32-NEXT:    add a6, sp, a6
+; RV32-NEXT:    addi a6, a6, 288
+; RV32-NEXT:    vs2r.v v12, (a6) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    addi s0, sp, 16
+; RV32-NEXT:    csrr s6, vlenb
+; RV32-NEXT:    slli s6, s6, 1
+; RV32-NEXT:    mv a6, s6
+; RV32-NEXT:    slli s6, s6, 1
+; RV32-NEXT:    add a6, a6, s6
+; RV32-NEXT:    slli s6, s6, 3
+; RV32-NEXT:    add s6, s6, a6
+; RV32-NEXT:    lw a6, 4(sp) # 4-byte Folded Reload
+; RV32-NEXT:    add s6, sp, s6
+; RV32-NEXT:    addi s6, s6, 288
+; RV32-NEXT:    vl2r.v v12, (s6) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v6, v8, v12
+; RV32-NEXT:    vmul.vv v2, v8, v2
+; RV32-NEXT:    vmul.vv v4, v8, v4
+; RV32-NEXT:    vmul.vv v26, v8, v26
+; RV32-NEXT:    vmul.vv v24, v8, v24
+; RV32-NEXT:    vmul.vv v28, v8, v28
+; RV32-NEXT:    vmul.vv v30, v8, v30
+; RV32-NEXT:    vxor.vv v20, v20, v6
+; RV32-NEXT:    addi s6, sp, 224
+; RV32-NEXT:    vlse64.v v0, (s6), zero
+; RV32-NEXT:    vxor.vv v20, v20, v2
+; RV32-NEXT:    vlse64.v v6, (t5), zero
+; RV32-NEXT:    vxor.vv v20, v20, v4
+; RV32-NEXT:    vlse64.v v22, (s8), zero
+; RV32-NEXT:    vxor.vv v20, v20, v26
+; RV32-NEXT:    vlse64.v v18, (s10), zero
+; RV32-NEXT:    vxor.vv v20, v20, v24
+; RV32-NEXT:    vlse64.v v16, (s11), zero
+; RV32-NEXT:    vxor.vv v20, v20, v28
+; RV32-NEXT:    vlse64.v v14, (s9), zero
+; RV32-NEXT:    vxor.vv v2, v20, v30
+; RV32-NEXT:    vlse64.v v12, (s3), zero
+; RV32-NEXT:    csrr t5, vlenb
+; RV32-NEXT:    slli t5, t5, 3
+; RV32-NEXT:    mv s3, t5
+; RV32-NEXT:    slli t5, t5, 2
+; RV32-NEXT:    add t5, t5, s3
+; RV32-NEXT:    add t5, sp, t5
+; RV32-NEXT:    addi t5, t5, 288
+; RV32-NEXT:    vl2r.v v20, (t5) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vand.vv v26, v10, v20
+; RV32-NEXT:    csrr t5, vlenb
+; RV32-NEXT:    slli t5, t5, 2
+; RV32-NEXT:    mv s3, t5
+; RV32-NEXT:    slli t5, t5, 3
+; RV32-NEXT:    add t5, t5, s3
+; RV32-NEXT:    add t5, sp, t5
+; RV32-NEXT:    addi t5, t5, 288
+; RV32-NEXT:    vl2r.v v20, (t5) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vand.vv v4, v10, v20
+; RV32-NEXT:    csrr t5, vlenb
+; RV32-NEXT:    slli t5, t5, 1
+; RV32-NEXT:    mv s3, t5
+; RV32-NEXT:    slli t5, t5, 4
+; RV32-NEXT:    add t5, t5, s3
+; RV32-NEXT:    add t5, sp, t5
+; RV32-NEXT:    addi t5, t5, 288
+; RV32-NEXT:    vl2r.v v20, (t5) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vand.vv v30, v10, v20
+; RV32-NEXT:    csrr t5, vlenb
+; RV32-NEXT:    slli t5, t5, 5
+; RV32-NEXT:    add t5, sp, t5
+; RV32-NEXT:    addi t5, t5, 288
+; RV32-NEXT:    vl2r.v v20, (t5) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vand.vv v20, v10, v20
+; RV32-NEXT:    csrr t5, vlenb
+; RV32-NEXT:    slli t5, t5, 1
+; RV32-NEXT:    mv s3, t5
+; RV32-NEXT:    slli t5, t5, 1
+; RV32-NEXT:    add s3, s3, t5
+; RV32-NEXT:    slli t5, t5, 1
+; RV32-NEXT:    add s3, s3, t5
+; RV32-NEXT:    slli t5, t5, 1
+; RV32-NEXT:    add t5, t5, s3
+; RV32-NEXT:    add t5, sp, t5
+; RV32-NEXT:    addi t5, t5, 288
+; RV32-NEXT:    vl2r.v v24, (t5) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vand.vv v28, v10, v24
+; RV32-NEXT:    csrr t5, vlenb
+; RV32-NEXT:    slli t5, t5, 2
+; RV32-NEXT:    mv s3, t5
+; RV32-NEXT:    slli t5, t5, 1
+; RV32-NEXT:    add s3, s3, t5
+; RV32-NEXT:    slli t5, t5, 1
+; RV32-NEXT:    add t5, t5, s3
+; RV32-NEXT:    add t5, sp, t5
+; RV32-NEXT:    addi t5, t5, 288
+; RV32-NEXT:    vl2r.v v24, (t5) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vand.vv v24, v10, v24
+; RV32-NEXT:    vand.vv v0, v10, v0
+; RV32-NEXT:    vand.vv v6, v10, v6
+; RV32-NEXT:    vand.vv v22, v10, v22
+; RV32-NEXT:    vand.vv v18, v10, v18
+; RV32-NEXT:    csrr t5, vlenb
+; RV32-NEXT:    slli t5, t5, 3
+; RV32-NEXT:    add t5, sp, t5
+; RV32-NEXT:    addi t5, t5, 288
+; RV32-NEXT:    vs2r.v v18, (t5) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vv v16, v10, v16
+; RV32-NEXT:    csrr t5, vlenb
+; RV32-NEXT:    slli t5, t5, 2
+; RV32-NEXT:    mv s3, t5
+; RV32-NEXT:    slli t5, t5, 2
+; RV32-NEXT:    add t5, t5, s3
+; RV32-NEXT:    add t5, sp, t5
+; RV32-NEXT:    addi t5, t5, 288
+; RV32-NEXT:    vs2r.v v16, (t5) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vv v14, v10, v14
+; RV32-NEXT:    csrr t5, vlenb
+; RV32-NEXT:    slli t5, t5, 1
+; RV32-NEXT:    mv s3, t5
+; RV32-NEXT:    slli t5, t5, 1
+; RV32-NEXT:    add s3, s3, t5
+; RV32-NEXT:    slli t5, t5, 1
+; RV32-NEXT:    add s3, s3, t5
+; RV32-NEXT:    slli t5, t5, 1
+; RV32-NEXT:    add t5, t5, s3
+; RV32-NEXT:    add t5, sp, t5
+; RV32-NEXT:    addi t5, t5, 288
+; RV32-NEXT:    vs2r.v v14, (t5) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vv v12, v10, v12
+; RV32-NEXT:    csrr t5, vlenb
+; RV32-NEXT:    slli t5, t5, 3
+; RV32-NEXT:    mv s3, t5
+; RV32-NEXT:    slli t5, t5, 2
+; RV32-NEXT:    add t5, t5, s3
+; RV32-NEXT:    add t5, sp, t5
+; RV32-NEXT:    addi t5, t5, 288
+; RV32-NEXT:    vs2r.v v12, (t5) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vlse64.v v12, (t6), zero
+; RV32-NEXT:    vlse64.v v14, (t4), zero
+; RV32-NEXT:    vlse64.v v16, (t3), zero
+; RV32-NEXT:    vlse64.v v18, (t2), zero
+; RV32-NEXT:    vand.vv v12, v10, v12
+; RV32-NEXT:    csrr t2, vlenb
+; RV32-NEXT:    slli t2, t2, 1
+; RV32-NEXT:    mv t3, t2
+; RV32-NEXT:    slli t2, t2, 1
+; RV32-NEXT:    add t2, t2, t3
+; RV32-NEXT:    add t2, sp, t2
+; RV32-NEXT:    addi t2, t2, 288
+; RV32-NEXT:    vs2r.v v12, (t2) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vv v12, v10, v14
+; RV32-NEXT:    csrr t2, vlenb
+; RV32-NEXT:    slli t2, t2, 1
+; RV32-NEXT:    mv t3, t2
+; RV32-NEXT:    slli t2, t2, 3
+; RV32-NEXT:    add t2, t2, t3
+; RV32-NEXT:    add t2, sp, t2
+; RV32-NEXT:    addi t2, t2, 288
+; RV32-NEXT:    vs2r.v v12, (t2) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vv v12, v10, v16
+; RV32-NEXT:    csrr t2, vlenb
+; RV32-NEXT:    slli t2, t2, 2
+; RV32-NEXT:    mv t3, t2
+; RV32-NEXT:    slli t2, t2, 1
+; RV32-NEXT:    add t3, t3, t2
+; RV32-NEXT:    slli t2, t2, 1
+; RV32-NEXT:    add t2, t2, t3
+; RV32-NEXT:    add t2, sp, t2
+; RV32-NEXT:    addi t2, t2, 288
+; RV32-NEXT:    vs2r.v v12, (t2) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vv v12, v10, v18
+; RV32-NEXT:    csrr t2, vlenb
+; RV32-NEXT:    slli t2, t2, 1
+; RV32-NEXT:    mv t3, t2
+; RV32-NEXT:    slli t2, t2, 1
+; RV32-NEXT:    add t3, t3, t2
+; RV32-NEXT:    slli t2, t2, 3
+; RV32-NEXT:    add t2, t2, t3
+; RV32-NEXT:    add t2, sp, t2
+; RV32-NEXT:    addi t2, t2, 288
+; RV32-NEXT:    vs2r.v v12, (t2) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vlse64.v v12, (t1), zero
+; RV32-NEXT:    vlse64.v v14, (t0), zero
+; RV32-NEXT:    vlse64.v v16, (ra), zero
+; RV32-NEXT:    vlse64.v v18, (a7), zero
+; RV32-NEXT:    vand.vv v12, v10, v12
+; RV32-NEXT:    csrr a7, vlenb
+; RV32-NEXT:    slli a7, a7, 2
+; RV32-NEXT:    add a7, sp, a7
+; RV32-NEXT:    addi a7, a7, 288
+; RV32-NEXT:    vs2r.v v12, (a7) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vv v12, v10, v14
+; RV32-NEXT:    csrr a7, vlenb
+; RV32-NEXT:    slli a7, a7, 4
+; RV32-NEXT:    add a7, sp, a7
+; RV32-NEXT:    addi a7, a7, 288
+; RV32-NEXT:    vs2r.v v12, (a7) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vv v12, v10, v16
+; RV32-NEXT:    csrr a7, vlenb
+; RV32-NEXT:    slli a7, a7, 1
+; RV32-NEXT:    mv t0, a7
+; RV32-NEXT:    slli a7, a7, 2
+; RV32-NEXT:    add t0, t0, a7
+; RV32-NEXT:    slli a7, a7, 1
+; RV32-NEXT:    add a7, a7, t0
+; RV32-NEXT:    add a7, sp, a7
+; RV32-NEXT:    addi a7, a7, 288
+; RV32-NEXT:    vs2r.v v12, (a7) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vv v12, v10, v18
+; RV32-NEXT:    csrr a7, vlenb
+; RV32-NEXT:    slli a7, a7, 2
+; RV32-NEXT:    mv t0, a7
+; RV32-NEXT:    slli a7, a7, 3
+; RV32-NEXT:    add a7, a7, t0
+; RV32-NEXT:    add a7, sp, a7
+; RV32-NEXT:    addi a7, a7, 288
+; RV32-NEXT:    vs2r.v v12, (a7) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vlse64.v v12, (a6), zero
+; RV32-NEXT:    vlse64.v v14, (a5), zero
+; RV32-NEXT:    vlse64.v v16, (a4), zero
+; RV32-NEXT:    vlse64.v v18, (a3), zero
+; RV32-NEXT:    vand.vv v12, v10, v12
+; RV32-NEXT:    csrr a3, vlenb
+; RV32-NEXT:    slli a3, a3, 1
+; RV32-NEXT:    add a3, sp, a3
+; RV32-NEXT:    addi a3, a3, 288
+; RV32-NEXT:    vs2r.v v12, (a3) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vv v12, v10, v14
+; RV32-NEXT:    csrr a3, vlenb
+; RV32-NEXT:    slli a3, a3, 1
+; RV32-NEXT:    mv a4, a3
+; RV32-NEXT:    slli a3, a3, 1
+; RV32-NEXT:    add a4, a4, a3
+; RV32-NEXT:    slli a3, a3, 1
+; RV32-NEXT:    add a3, a3, a4
+; RV32-NEXT:    add a3, sp, a3
+; RV32-NEXT:    addi a3, a3, 288
+; RV32-NEXT:    vs2r.v v12, (a3) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vv v12, v10, v16
+; RV32-NEXT:    csrr a3, vlenb
+; RV32-NEXT:    slli a3, a3, 3
+; RV32-NEXT:    mv a4, a3
+; RV32-NEXT:    slli a3, a3, 1
+; RV32-NEXT:    add a3, a3, a4
+; RV32-NEXT:    add a3, sp, a3
+; RV32-NEXT:    addi a3, a3, 288
+; RV32-NEXT:    vs2r.v v12, (a3) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vv v12, v10, v18
+; RV32-NEXT:    csrr a3, vlenb
+; RV32-NEXT:    slli a3, a3, 1
+; RV32-NEXT:    mv a4, a3
+; RV32-NEXT:    slli a3, a3, 4
+; RV32-NEXT:    add a3, a3, a4
+; RV32-NEXT:    add a3, sp, a3
+; RV32-NEXT:    addi a3, a3, 288
+; RV32-NEXT:    vs2r.v v12, (a3) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vlse64.v v12, (a2), zero
+; RV32-NEXT:    vlse64.v v14, (a1), zero
+; RV32-NEXT:    vlse64.v v16, (s7), zero
+; RV32-NEXT:    vlse64.v v18, (s2), zero
+; RV32-NEXT:    vand.vv v12, v10, v12
+; RV32-NEXT:    addi a1, sp, 288
+; RV32-NEXT:    vs2r.v v12, (a1) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vv v12, v10, v14
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 2
+; RV32-NEXT:    mv a2, a1
+; RV32-NEXT:    slli a1, a1, 1
+; RV32-NEXT:    add a1, a1, a2
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 288
+; RV32-NEXT:    vs2r.v v12, (a1) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vv v12, v10, v16
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 1
+; RV32-NEXT:    mv a2, a1
+; RV32-NEXT:    slli a1, a1, 1
+; RV32-NEXT:    add a2, a2, a1
+; RV32-NEXT:    slli a1, a1, 2
+; RV32-NEXT:    add a1, a1, a2
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 288
+; RV32-NEXT:    vs2r.v v12, (a1) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vv v12, v10, v18
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 5
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 288
+; RV32-NEXT:    vs2r.v v12, (a1) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vlse64.v v14, (s5), zero
+; RV32-NEXT:    vlse64.v v16, (s4), zero
+; RV32-NEXT:    vlse64.v v18, (s1), zero
+; RV32-NEXT:    vlse64.v v12, (s0), zero
+; RV32-NEXT:    vand.vv v14, v10, v14
+; RV32-NEXT:    vand.vv v16, v10, v16
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 1
+; RV32-NEXT:    mv a2, a1
+; RV32-NEXT:    slli a1, a1, 2
+; RV32-NEXT:    add a1, a1, a2
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 288
+; RV32-NEXT:    vs2r.v v16, (a1) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vv v18, v10, v18
+; RV32-NEXT:    vand.vv v16, v10, v12
+; RV32-NEXT:    vand.vx v10, v10, a0
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    vxor.vv v10, v2, v10
+; RV32-NEXT:    vmul.vv v12, v8, v26
+; RV32-NEXT:    vxor.vv v10, v10, v12
+; RV32-NEXT:    vmul.vv v12, v8, v4
+; RV32-NEXT:    vxor.vv v10, v10, v12
+; RV32-NEXT:    vmul.vv v12, v8, v30
+; RV32-NEXT:    vxor.vv v10, v10, v12
+; RV32-NEXT:    vmul.vv v12, v8, v20
+; RV32-NEXT:    vxor.vv v10, v10, v12
+; RV32-NEXT:    vmul.vv v12, v8, v28
+; RV32-NEXT:    vxor.vv v10, v10, v12
+; RV32-NEXT:    vmul.vv v12, v8, v24
+; RV32-NEXT:    vxor.vv v10, v10, v12
+; RV32-NEXT:    vmul.vv v12, v8, v0
+; RV32-NEXT:    vxor.vv v10, v10, v12
+; RV32-NEXT:    vmul.vv v12, v8, v6
+; RV32-NEXT:    vxor.vv v10, v10, v12
+; RV32-NEXT:    vmul.vv v12, v8, v22
+; RV32-NEXT:    vxor.vv v10, v10, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v12, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    vxor.vv v10, v10, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v12, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    vxor.vv v10, v10, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v12, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    vxor.vv v10, v10, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v12, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    vxor.vv v10, v10, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v12, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    vxor.vv v10, v10, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v12, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    vxor.vv v10, v10, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v12, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    vxor.vv v10, v10, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v12, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    vxor.vv v10, v10, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v12, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    vxor.vv v10, v10, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v12, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    vxor.vv v10, v10, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v12, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    vxor.vv v10, v10, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v12, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    vxor.vv v10, v10, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v12, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    vxor.vv v10, v10, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v12, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    vxor.vv v10, v10, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v12, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    vxor.vv v10, v10, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v12, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    vxor.vv v10, v10, v12
+; RV32-NEXT:    addi a0, sp, 288
+; RV32-NEXT:    vl2r.v v12, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    vxor.vv v10, v10, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v12, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    vxor.vv v10, v10, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v12, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    vxor.vv v10, v10, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v12, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    vxor.vv v10, v10, v12
+; RV32-NEXT:    vmul.vv v12, v8, v14
+; RV32-NEXT:    vxor.vv v10, v10, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v12, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    vxor.vv v10, v10, v12
+; RV32-NEXT:    vmul.vv v12, v8, v18
+; RV32-NEXT:    vxor.vv v10, v10, v12
+; RV32-NEXT:    vmul.vv v8, v8, v16
+; RV32-NEXT:    vxor.vv v8, v10, v8
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add sp, sp, a0
+; RV32-NEXT:    lw ra, 348(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s0, 344(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s1, 340(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s2, 336(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s3, 332(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s4, 328(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s5, 324(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s6, 320(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s7, 316(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s8, 312(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s9, 308(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s10, 304(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s11, 300(sp) # 4-byte Folded Reload
+; RV32-NEXT:    addi sp, sp, 352
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: clmul_nxv2i64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
+; RV64-NEXT:    vand.vi v12, v10, 2
+; RV64-NEXT:    vand.vi v14, v10, 1
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v14, v12
+; RV64-NEXT:    vand.vi v14, v10, 4
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vi v14, v10, 8
+; RV64-NEXT:    li a0, 16
+; RV64-NEXT:    li a1, 32
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a0
+; RV64-NEXT:    li a0, 64
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    li a1, 128
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a0
+; RV64-NEXT:    li a0, 256
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    li a1, 512
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a0
+; RV64-NEXT:    li a2, 1024
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    li a0, 1
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a2
+; RV64-NEXT:    slli a1, a0, 11
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    lui a1, 1
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    lui a1, 2
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    lui a1, 4
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    lui a1, 8
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    lui a1, 16
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    lui a1, 32
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    lui a1, 64
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    lui a1, 128
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    lui a1, 256
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    lui a1, 512
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    lui a1, 1024
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    lui a1, 2048
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    lui a1, 4096
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    lui a1, 8192
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    lui a1, 16384
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    lui a1, 32768
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    lui a1, 65536
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    lui a1, 131072
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    lui a1, 262144
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    slli a1, a0, 31
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    slli a1, a0, 32
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    slli a1, a0, 33
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    slli a1, a0, 34
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    slli a1, a0, 35
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    slli a1, a0, 36
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    slli a1, a0, 37
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    slli a1, a0, 38
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    slli a1, a0, 39
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    slli a1, a0, 40
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    slli a1, a0, 41
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    slli a1, a0, 42
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    slli a1, a0, 43
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    slli a1, a0, 44
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    slli a1, a0, 45
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    slli a1, a0, 46
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    slli a1, a0, 47
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    slli a1, a0, 48
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    slli a1, a0, 49
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    slli a1, a0, 50
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    slli a1, a0, 51
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    slli a1, a0, 52
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    slli a1, a0, 53
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    slli a1, a0, 54
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    slli a1, a0, 55
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    slli a1, a0, 56
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    slli a1, a0, 57
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    slli a1, a0, 58
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    slli a1, a0, 59
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    slli a1, a0, 60
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    slli a1, a0, 61
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    li a1, -1
+; RV64-NEXT:    slli a0, a0, 62
+; RV64-NEXT:    slli a1, a1, 63
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a0
+; RV64-NEXT:    vand.vx v10, v10, a1
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vmul.vv v8, v8, v10
+; RV64-NEXT:    vxor.vv v8, v12, v8
+; RV64-NEXT:    ret
+  %a = call <vscale x 2 x i64> @llvm.clmul.nxv2i64(<vscale x 2 x i64> %x, <vscale x 2 x i64> %y)
+  ret <vscale x 2 x i64> %a
+}
+
+define <vscale x 4 x i64> @clmul_nxv4i64(<vscale x 4 x i64> %x, <vscale x 4 x i64> %y) nounwind {
+; RV32-LABEL: clmul_nxv4i64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -352
+; RV32-NEXT:    sw ra, 348(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s0, 344(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s1, 340(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s2, 336(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s3, 332(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s4, 328(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s5, 324(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s6, 320(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s7, 316(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s8, 312(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s9, 308(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s10, 304(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s11, 300(sp) # 4-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    sub sp, sp, a0
+; RV32-NEXT:    lui a1, 524288
+; RV32-NEXT:    li s4, 1
+; RV32-NEXT:    li a3, 2
+; RV32-NEXT:    li a2, 4
+; RV32-NEXT:    li a0, 8
+; RV32-NEXT:    li s3, 16
+; RV32-NEXT:    li s2, 32
+; RV32-NEXT:    li s5, 64
+; RV32-NEXT:    li s6, 128
+; RV32-NEXT:    li s8, 256
+; RV32-NEXT:    li s1, 512
+; RV32-NEXT:    li s7, 1024
+; RV32-NEXT:    lui ra, 1
+; RV32-NEXT:    lui s11, 2
+; RV32-NEXT:    lui s10, 4
+; RV32-NEXT:    lui s9, 8
+; RV32-NEXT:    lui s0, 16
+; RV32-NEXT:    lui t6, 32
+; RV32-NEXT:    lui t5, 64
+; RV32-NEXT:    lui t4, 128
+; RV32-NEXT:    lui t3, 256
+; RV32-NEXT:    lui t2, 512
+; RV32-NEXT:    lui t1, 1024
+; RV32-NEXT:    lui t0, 2048
+; RV32-NEXT:    lui a7, 4096
+; RV32-NEXT:    lui a6, 8192
+; RV32-NEXT:    lui a5, 16384
+; RV32-NEXT:    lui a4, 32768
+; RV32-NEXT:    sw a1, 272(sp)
+; RV32-NEXT:    sw zero, 276(sp)
+; RV32-NEXT:    sw zero, 264(sp)
+; RV32-NEXT:    sw s4, 268(sp)
+; RV32-NEXT:    sw zero, 256(sp)
+; RV32-NEXT:    sw a3, 260(sp)
+; RV32-NEXT:    lui a3, 65536
+; RV32-NEXT:    sw zero, 248(sp)
+; RV32-NEXT:    sw a2, 252(sp)
+; RV32-NEXT:    lui a2, 131072
+; RV32-NEXT:    sw zero, 240(sp)
+; RV32-NEXT:    sw a0, 244(sp)
+; RV32-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
+; RV32-NEXT:    vand.vi v28, v12, 2
+; RV32-NEXT:    vand.vi v4, v12, 1
+; RV32-NEXT:    vand.vi v24, v12, 4
+; RV32-NEXT:    vand.vi v20, v12, 8
+; RV32-NEXT:    sw zero, 232(sp)
+; RV32-NEXT:    sw s3, 236(sp)
+; RV32-NEXT:    vand.vx v16, v12, s3
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v16, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    addi s3, sp, 272
+; RV32-NEXT:    sw zero, 224(sp)
+; RV32-NEXT:    sw s2, 228(sp)
+; RV32-NEXT:    vand.vx v0, v12, s2
+; RV32-NEXT:    addi s2, sp, 264
+; RV32-NEXT:    sw zero, 216(sp)
+; RV32-NEXT:    sw s5, 220(sp)
+; RV32-NEXT:    vmul.vv v16, v8, v28
+; RV32-NEXT:    vmul.vv v28, v8, v4
+; RV32-NEXT:    vxor.vi v28, v28, 0
+; RV32-NEXT:    vxor.vv v28, v28, v16
+; RV32-NEXT:    vand.vx v16, v12, s5
+; RV32-NEXT:    addi s5, sp, 256
+; RV32-NEXT:    sw zero, 208(sp)
+; RV32-NEXT:    sw s6, 212(sp)
+; RV32-NEXT:    vmul.vv v24, v8, v24
+; RV32-NEXT:    vxor.vv v28, v28, v24
+; RV32-NEXT:    vand.vx v24, v12, s6
+; RV32-NEXT:    addi s6, sp, 248
+; RV32-NEXT:    sw zero, 200(sp)
+; RV32-NEXT:    sw s8, 204(sp)
+; RV32-NEXT:    vmul.vv v20, v8, v20
+; RV32-NEXT:    vxor.vv v20, v28, v20
+; RV32-NEXT:    vand.vx v28, v12, s8
+; RV32-NEXT:    addi s8, sp, 240
+; RV32-NEXT:    sw zero, 192(sp)
+; RV32-NEXT:    sw s1, 196(sp)
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v4, v8, v4
+; RV32-NEXT:    vxor.vv v20, v20, v4
+; RV32-NEXT:    vand.vx v4, v12, s1
+; RV32-NEXT:    sw zero, 184(sp)
+; RV32-NEXT:    sw s7, 188(sp)
+; RV32-NEXT:    vmul.vv v0, v8, v0
+; RV32-NEXT:    vxor.vv v20, v20, v0
+; RV32-NEXT:    vand.vx v0, v12, s7
+; RV32-NEXT:    slli a0, s4, 11
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    vxor.vv v20, v20, v16
+; RV32-NEXT:    vand.vx v16, v12, ra
+; RV32-NEXT:    sw zero, 176(sp)
+; RV32-NEXT:    sw a0, 180(sp)
+; RV32-NEXT:    sw zero, 168(sp)
+; RV32-NEXT:    sw ra, 172(sp)
+; RV32-NEXT:    addi s4, sp, 216
+; RV32-NEXT:    vmul.vv v24, v8, v24
+; RV32-NEXT:    vxor.vv v24, v20, v24
+; RV32-NEXT:    vand.vx v20, v12, s11
+; RV32-NEXT:    sw zero, 160(sp)
+; RV32-NEXT:    sw s11, 164(sp)
+; RV32-NEXT:    addi s11, sp, 208
+; RV32-NEXT:    vmul.vv v28, v8, v28
+; RV32-NEXT:    vxor.vv v28, v24, v28
+; RV32-NEXT:    vand.vx v24, v12, s10
+; RV32-NEXT:    sw zero, 152(sp)
+; RV32-NEXT:    sw s10, 156(sp)
+; RV32-NEXT:    addi s10, sp, 200
+; RV32-NEXT:    vmul.vv v4, v8, v4
+; RV32-NEXT:    vxor.vv v4, v28, v4
+; RV32-NEXT:    vand.vx v28, v12, s9
+; RV32-NEXT:    sw zero, 144(sp)
+; RV32-NEXT:    sw s9, 148(sp)
+; RV32-NEXT:    addi s9, sp, 192
+; RV32-NEXT:    vmul.vv v0, v8, v0
+; RV32-NEXT:    vxor.vv v4, v4, v0
+; RV32-NEXT:    vand.vx v0, v12, a0
+; RV32-NEXT:    addi ra, sp, 184
+; RV32-NEXT:    vmul.vv v0, v8, v0
+; RV32-NEXT:    vxor.vv v0, v4, v0
+; RV32-NEXT:    vand.vx v4, v12, s0
+; RV32-NEXT:    sw zero, 136(sp)
+; RV32-NEXT:    sw s0, 140(sp)
+; RV32-NEXT:    addi s1, sp, 176
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    vxor.vv v0, v0, v16
+; RV32-NEXT:    vand.vx v16, v12, t6
+; RV32-NEXT:    sw zero, 128(sp)
+; RV32-NEXT:    sw t6, 132(sp)
+; RV32-NEXT:    addi s0, sp, 168
+; RV32-NEXT:    vmul.vv v20, v8, v20
+; RV32-NEXT:    vxor.vv v0, v0, v20
+; RV32-NEXT:    vand.vx v20, v12, t5
+; RV32-NEXT:    sw zero, 120(sp)
+; RV32-NEXT:    sw t5, 124(sp)
+; RV32-NEXT:    addi t6, sp, 160
+; RV32-NEXT:    vmul.vv v24, v8, v24
+; RV32-NEXT:    vxor.vv v0, v0, v24
+; RV32-NEXT:    vand.vx v24, v12, t4
+; RV32-NEXT:    sw zero, 112(sp)
+; RV32-NEXT:    sw t4, 116(sp)
+; RV32-NEXT:    addi t5, sp, 152
+; RV32-NEXT:    vmul.vv v28, v8, v28
+; RV32-NEXT:    vxor.vv v0, v0, v28
+; RV32-NEXT:    vand.vx v28, v12, t3
+; RV32-NEXT:    sw zero, 104(sp)
+; RV32-NEXT:    sw t3, 108(sp)
+; RV32-NEXT:    addi t4, sp, 144
+; RV32-NEXT:    vmul.vv v4, v8, v4
+; RV32-NEXT:    vxor.vv v0, v0, v4
+; RV32-NEXT:    vand.vx v4, v12, t2
+; RV32-NEXT:    sw zero, 96(sp)
+; RV32-NEXT:    sw t2, 100(sp)
+; RV32-NEXT:    addi t3, sp, 136
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    vxor.vv v16, v0, v16
+; RV32-NEXT:    vand.vx v0, v12, t1
+; RV32-NEXT:    sw zero, 88(sp)
+; RV32-NEXT:    sw t1, 92(sp)
+; RV32-NEXT:    addi t2, sp, 128
+; RV32-NEXT:    vmul.vv v20, v8, v20
+; RV32-NEXT:    vxor.vv v20, v16, v20
+; RV32-NEXT:    vand.vx v16, v12, t0
+; RV32-NEXT:    sw zero, 80(sp)
+; RV32-NEXT:    sw t0, 84(sp)
+; RV32-NEXT:    addi t1, sp, 120
+; RV32-NEXT:    vmul.vv v24, v8, v24
+; RV32-NEXT:    vxor.vv v24, v20, v24
+; RV32-NEXT:    vand.vx v20, v12, a7
+; RV32-NEXT:    sw zero, 72(sp)
+; RV32-NEXT:    sw a7, 76(sp)
+; RV32-NEXT:    addi t0, sp, 112
+; RV32-NEXT:    vmul.vv v28, v8, v28
+; RV32-NEXT:    vxor.vv v24, v24, v28
+; RV32-NEXT:    vand.vx v28, v12, a6
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v28, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    sw zero, 64(sp)
+; RV32-NEXT:    sw a6, 68(sp)
+; RV32-NEXT:    addi a7, sp, 104
+; RV32-NEXT:    vmul.vv v28, v8, v4
+; RV32-NEXT:    vxor.vv v24, v24, v28
+; RV32-NEXT:    vand.vx v28, v12, a5
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v28, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    sw zero, 56(sp)
+; RV32-NEXT:    sw a5, 60(sp)
+; RV32-NEXT:    addi a6, sp, 96
+; RV32-NEXT:    vmul.vv v28, v8, v0
+; RV32-NEXT:    vxor.vv v28, v24, v28
+; RV32-NEXT:    vand.vx v24, v12, a4
+; RV32-NEXT:    sw zero, 48(sp)
+; RV32-NEXT:    sw a4, 52(sp)
+; RV32-NEXT:    addi a5, sp, 88
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    vxor.vv v16, v28, v16
+; RV32-NEXT:    vand.vx v28, v12, a3
+; RV32-NEXT:    sw zero, 40(sp)
+; RV32-NEXT:    sw a3, 44(sp)
+; RV32-NEXT:    addi a4, sp, 80
+; RV32-NEXT:    vmul.vv v20, v8, v20
+; RV32-NEXT:    vxor.vv v16, v16, v20
+; RV32-NEXT:    vand.vx v4, v12, a2
+; RV32-NEXT:    sw zero, 32(sp)
+; RV32-NEXT:    sw a2, 36(sp)
+; RV32-NEXT:    addi a3, sp, 72
+; RV32-NEXT:    sw zero, 24(sp)
+; RV32-NEXT:    lui a1, 262144
+; RV32-NEXT:    sw a1, 28(sp)
+; RV32-NEXT:    sw zero, 16(sp)
+; RV32-NEXT:    lui a0, 524288
+; RV32-NEXT:    sw a0, 20(sp)
+; RV32-NEXT:    addi a2, sp, 64
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv s7, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add s7, s7, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add s7, s7, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, s7
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v20, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v20, v8, v20
+; RV32-NEXT:    vxor.vv v20, v16, v20
+; RV32-NEXT:    vlse64.v v16, (s3), zero
+; RV32-NEXT:    addi s3, sp, 56
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv s7, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add s7, s7, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, s7
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v0, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v0, v8, v0
+; RV32-NEXT:    vxor.vv v0, v20, v0
+; RV32-NEXT:    vlse64.v v20, (s2), zero
+; RV32-NEXT:    addi s2, sp, 48
+; RV32-NEXT:    vmul.vv v24, v8, v24
+; RV32-NEXT:    vxor.vv v0, v0, v24
+; RV32-NEXT:    vlse64.v v24, (s5), zero
+; RV32-NEXT:    addi s5, sp, 40
+; RV32-NEXT:    vmul.vv v28, v8, v28
+; RV32-NEXT:    vxor.vv v0, v0, v28
+; RV32-NEXT:    vlse64.v v28, (s6), zero
+; RV32-NEXT:    addi s6, sp, 32
+; RV32-NEXT:    vmul.vv v4, v8, v4
+; RV32-NEXT:    vxor.vv v4, v0, v4
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv s7, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add s7, s7, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add s7, s7, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, s7
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v4, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vlse64.v v4, (s8), zero
+; RV32-NEXT:    addi s8, sp, 24
+; RV32-NEXT:    vand.vv v16, v12, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    mv s7, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, s7
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v16, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vv v16, v12, v20
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv s7, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add s7, s7, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, s7
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v16, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vv v16, v12, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv s7, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add s7, s7, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, s7
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v16, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vv v16, v12, v28
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv s7, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add s7, s7, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add s7, s7, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, s7
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v16, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vv v16, v12, v4
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv s7, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add s7, s7, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, s7
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v16, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    addi a0, sp, 16
+; RV32-NEXT:    addi s7, sp, 232
+; RV32-NEXT:    vlse64.v v16, (s7), zero
+; RV32-NEXT:    addi s7, sp, 224
+; RV32-NEXT:    vlse64.v v20, (s7), zero
+; RV32-NEXT:    vlse64.v v24, (s4), zero
+; RV32-NEXT:    vlse64.v v28, (s11), zero
+; RV32-NEXT:    vand.vv v16, v12, v16
+; RV32-NEXT:    csrr s4, vlenb
+; RV32-NEXT:    slli s4, s4, 4
+; RV32-NEXT:    add s4, sp, s4
+; RV32-NEXT:    addi s4, s4, 288
+; RV32-NEXT:    vs4r.v v16, (s4) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vv v16, v12, v20
+; RV32-NEXT:    csrr s4, vlenb
+; RV32-NEXT:    slli s4, s4, 2
+; RV32-NEXT:    mv s7, s4
+; RV32-NEXT:    slli s4, s4, 1
+; RV32-NEXT:    add s7, s7, s4
+; RV32-NEXT:    slli s4, s4, 2
+; RV32-NEXT:    add s4, s4, s7
+; RV32-NEXT:    add s4, sp, s4
+; RV32-NEXT:    addi s4, s4, 288
+; RV32-NEXT:    vs4r.v v16, (s4) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vv v16, v12, v24
+; RV32-NEXT:    csrr s4, vlenb
+; RV32-NEXT:    slli s4, s4, 2
+; RV32-NEXT:    mv s7, s4
+; RV32-NEXT:    slli s4, s4, 4
+; RV32-NEXT:    add s4, s4, s7
+; RV32-NEXT:    add s4, sp, s4
+; RV32-NEXT:    addi s4, s4, 288
+; RV32-NEXT:    vs4r.v v16, (s4) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vv v16, v12, v28
+; RV32-NEXT:    csrr s4, vlenb
+; RV32-NEXT:    slli s4, s4, 2
+; RV32-NEXT:    mv s7, s4
+; RV32-NEXT:    slli s4, s4, 1
+; RV32-NEXT:    add s7, s7, s4
+; RV32-NEXT:    slli s4, s4, 1
+; RV32-NEXT:    add s7, s7, s4
+; RV32-NEXT:    slli s4, s4, 2
+; RV32-NEXT:    add s4, s4, s7
+; RV32-NEXT:    add s4, sp, s4
+; RV32-NEXT:    addi s4, s4, 288
+; RV32-NEXT:    vs4r.v v16, (s4) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vlse64.v v20, (s10), zero
+; RV32-NEXT:    vlse64.v v24, (s9), zero
+; RV32-NEXT:    vlse64.v v28, (ra), zero
+; RV32-NEXT:    vlse64.v v4, (s1), zero
+; RV32-NEXT:    vand.vv v16, v12, v20
+; RV32-NEXT:    csrr s1, vlenb
+; RV32-NEXT:    slli s1, s1, 2
+; RV32-NEXT:    mv s4, s1
+; RV32-NEXT:    slli s1, s1, 1
+; RV32-NEXT:    add s1, s1, s4
+; RV32-NEXT:    add s1, sp, s1
+; RV32-NEXT:    addi s1, s1, 288
+; RV32-NEXT:    vs4r.v v16, (s1) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vv v16, v12, v24
+; RV32-NEXT:    csrr s1, vlenb
+; RV32-NEXT:    slli s1, s1, 3
+; RV32-NEXT:    mv s4, s1
+; RV32-NEXT:    slli s1, s1, 2
+; RV32-NEXT:    add s1, s1, s4
+; RV32-NEXT:    add s1, sp, s1
+; RV32-NEXT:    addi s1, s1, 288
+; RV32-NEXT:    vs4r.v v16, (s1) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vv v16, v12, v28
+; RV32-NEXT:    csrr s1, vlenb
+; RV32-NEXT:    slli s1, s1, 6
+; RV32-NEXT:    add s1, sp, s1
+; RV32-NEXT:    addi s1, s1, 288
+; RV32-NEXT:    vs4r.v v16, (s1) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vv v16, v12, v4
+; RV32-NEXT:    csrr s1, vlenb
+; RV32-NEXT:    slli s1, s1, 3
+; RV32-NEXT:    mv s4, s1
+; RV32-NEXT:    slli s1, s1, 1
+; RV32-NEXT:    add s4, s4, s1
+; RV32-NEXT:    slli s1, s1, 2
+; RV32-NEXT:    add s1, s1, s4
+; RV32-NEXT:    add s1, sp, s1
+; RV32-NEXT:    addi s1, s1, 288
+; RV32-NEXT:    vs4r.v v16, (s1) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vlse64.v v24, (s0), zero
+; RV32-NEXT:    vlse64.v v28, (t6), zero
+; RV32-NEXT:    vlse64.v v4, (t5), zero
+; RV32-NEXT:    vlse64.v v0, (t4), zero
+; RV32-NEXT:    vand.vv v16, v12, v24
+; RV32-NEXT:    csrr t4, vlenb
+; RV32-NEXT:    slli t4, t4, 3
+; RV32-NEXT:    add t4, sp, t4
+; RV32-NEXT:    addi t4, t4, 288
+; RV32-NEXT:    vs4r.v v16, (t4) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vv v16, v12, v28
+; RV32-NEXT:    csrr t4, vlenb
+; RV32-NEXT:    slli t4, t4, 2
+; RV32-NEXT:    mv t5, t4
+; RV32-NEXT:    slli t4, t4, 3
+; RV32-NEXT:    add t4, t4, t5
+; RV32-NEXT:    add t4, sp, t4
+; RV32-NEXT:    addi t4, t4, 288
+; RV32-NEXT:    vs4r.v v16, (t4) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vv v16, v12, v4
+; RV32-NEXT:    csrr t4, vlenb
+; RV32-NEXT:    slli t4, t4, 2
+; RV32-NEXT:    mv t5, t4
+; RV32-NEXT:    slli t4, t4, 1
+; RV32-NEXT:    add t5, t5, t4
+; RV32-NEXT:    slli t4, t4, 1
+; RV32-NEXT:    add t5, t5, t4
+; RV32-NEXT:    slli t4, t4, 1
+; RV32-NEXT:    add t4, t4, t5
+; RV32-NEXT:    add t4, sp, t4
+; RV32-NEXT:    addi t4, t4, 288
+; RV32-NEXT:    vs4r.v v16, (t4) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vv v16, v12, v0
+; RV32-NEXT:    csrr t4, vlenb
+; RV32-NEXT:    slli t4, t4, 2
+; RV32-NEXT:    mv t5, t4
+; RV32-NEXT:    slli t4, t4, 2
+; RV32-NEXT:    add t5, t5, t4
+; RV32-NEXT:    slli t4, t4, 2
+; RV32-NEXT:    add t4, t4, t5
+; RV32-NEXT:    add t4, sp, t4
+; RV32-NEXT:    addi t4, t4, 288
+; RV32-NEXT:    vs4r.v v16, (t4) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vlse64.v v28, (t3), zero
+; RV32-NEXT:    vlse64.v v4, (t2), zero
+; RV32-NEXT:    vlse64.v v0, (t1), zero
+; RV32-NEXT:    vlse64.v v16, (t0), zero
+; RV32-NEXT:    vand.vv v20, v12, v28
+; RV32-NEXT:    csrr t0, vlenb
+; RV32-NEXT:    slli t0, t0, 2
+; RV32-NEXT:    add t0, sp, t0
+; RV32-NEXT:    addi t0, t0, 288
+; RV32-NEXT:    vs4r.v v20, (t0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vv v20, v12, v4
+; RV32-NEXT:    csrr t0, vlenb
+; RV32-NEXT:    slli t0, t0, 5
+; RV32-NEXT:    add t0, sp, t0
+; RV32-NEXT:    addi t0, t0, 288
+; RV32-NEXT:    vs4r.v v20, (t0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vv v20, v12, v0
+; RV32-NEXT:    csrr t0, vlenb
+; RV32-NEXT:    slli t0, t0, 3
+; RV32-NEXT:    mv t1, t0
+; RV32-NEXT:    slli t0, t0, 1
+; RV32-NEXT:    add t1, t1, t0
+; RV32-NEXT:    slli t0, t0, 1
+; RV32-NEXT:    add t0, t0, t1
+; RV32-NEXT:    add t0, sp, t0
+; RV32-NEXT:    addi t0, t0, 288
+; RV32-NEXT:    vs4r.v v20, (t0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vv v16, v12, v16
+; RV32-NEXT:    csrr t0, vlenb
+; RV32-NEXT:    slli t0, t0, 4
+; RV32-NEXT:    mv t1, t0
+; RV32-NEXT:    slli t0, t0, 2
+; RV32-NEXT:    add t0, t0, t1
+; RV32-NEXT:    add t0, sp, t0
+; RV32-NEXT:    addi t0, t0, 288
+; RV32-NEXT:    vs4r.v v16, (t0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vlse64.v v16, (a7), zero
+; RV32-NEXT:    vlse64.v v0, (a6), zero
+; RV32-NEXT:    vlse64.v v20, (a5), zero
+; RV32-NEXT:    vlse64.v v24, (a4), zero
+; RV32-NEXT:    vand.vv v4, v12, v16
+; RV32-NEXT:    vand.vv v16, v12, v0
+; RV32-NEXT:    csrr a4, vlenb
+; RV32-NEXT:    slli a4, a4, 2
+; RV32-NEXT:    mv a5, a4
+; RV32-NEXT:    slli a4, a4, 1
+; RV32-NEXT:    add a5, a5, a4
+; RV32-NEXT:    slli a4, a4, 1
+; RV32-NEXT:    add a4, a4, a5
+; RV32-NEXT:    add a4, sp, a4
+; RV32-NEXT:    addi a4, a4, 288
+; RV32-NEXT:    vs4r.v v16, (a4) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vv v16, v12, v20
+; RV32-NEXT:    csrr a4, vlenb
+; RV32-NEXT:    slli a4, a4, 2
+; RV32-NEXT:    mv a5, a4
+; RV32-NEXT:    slli a4, a4, 2
+; RV32-NEXT:    add a5, a5, a4
+; RV32-NEXT:    slli a4, a4, 1
+; RV32-NEXT:    add a4, a4, a5
+; RV32-NEXT:    add a4, sp, a4
+; RV32-NEXT:    addi a4, a4, 288
+; RV32-NEXT:    vs4r.v v16, (a4) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vv v16, v12, v24
+; RV32-NEXT:    csrr a4, vlenb
+; RV32-NEXT:    slli a4, a4, 2
+; RV32-NEXT:    mv a5, a4
+; RV32-NEXT:    slli a4, a4, 1
+; RV32-NEXT:    add a5, a5, a4
+; RV32-NEXT:    slli a4, a4, 3
+; RV32-NEXT:    add a4, a4, a5
+; RV32-NEXT:    add a4, sp, a4
+; RV32-NEXT:    addi a4, a4, 288
+; RV32-NEXT:    vs4r.v v16, (a4) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vlse64.v v16, (a3), zero
+; RV32-NEXT:    vlse64.v v20, (a2), zero
+; RV32-NEXT:    vlse64.v v24, (s3), zero
+; RV32-NEXT:    vlse64.v v28, (s2), zero
+; RV32-NEXT:    vand.vv v0, v12, v16
+; RV32-NEXT:    vand.vv v16, v12, v20
+; RV32-NEXT:    csrr a2, vlenb
+; RV32-NEXT:    slli a2, a2, 3
+; RV32-NEXT:    mv a3, a2
+; RV32-NEXT:    slli a2, a2, 1
+; RV32-NEXT:    add a2, a2, a3
+; RV32-NEXT:    add a2, sp, a2
+; RV32-NEXT:    addi a2, a2, 288
+; RV32-NEXT:    vs4r.v v16, (a2) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vv v16, v12, v24
+; RV32-NEXT:    csrr a2, vlenb
+; RV32-NEXT:    slli a2, a2, 4
+; RV32-NEXT:    mv a3, a2
+; RV32-NEXT:    slli a2, a2, 1
+; RV32-NEXT:    add a2, a2, a3
+; RV32-NEXT:    add a2, sp, a2
+; RV32-NEXT:    addi a2, a2, 288
+; RV32-NEXT:    vs4r.v v16, (a2) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vv v16, v12, v28
+; RV32-NEXT:    csrr a2, vlenb
+; RV32-NEXT:    slli a2, a2, 3
+; RV32-NEXT:    mv a3, a2
+; RV32-NEXT:    slli a2, a2, 3
+; RV32-NEXT:    add a2, a2, a3
+; RV32-NEXT:    add a2, sp, a2
+; RV32-NEXT:    addi a2, a2, 288
+; RV32-NEXT:    vs4r.v v16, (a2) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vlse64.v v16, (s5), zero
+; RV32-NEXT:    vlse64.v v20, (s6), zero
+; RV32-NEXT:    vlse64.v v24, (s8), zero
+; RV32-NEXT:    vlse64.v v28, (a0), zero
+; RV32-NEXT:    vand.vv v16, v12, v16
+; RV32-NEXT:    addi a0, sp, 288
+; RV32-NEXT:    vs4r.v v16, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vv v16, v12, v20
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a2, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a2
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v16, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vv v24, v12, v24
+; RV32-NEXT:    vand.vv v20, v12, v28
+; RV32-NEXT:    vand.vx v12, v12, a1
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v12, v16, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    vxor.vv v12, v12, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    vxor.vv v12, v12, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    vxor.vv v12, v12, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    vxor.vv v12, v12, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    vxor.vv v12, v12, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    vxor.vv v12, v12, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    vxor.vv v12, v12, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    vxor.vv v12, v12, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    vxor.vv v12, v12, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    vxor.vv v12, v12, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    vxor.vv v12, v12, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 6
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    vxor.vv v12, v12, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    vxor.vv v12, v12, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    vxor.vv v12, v12, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    vxor.vv v12, v12, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    vxor.vv v12, v12, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    vxor.vv v12, v12, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    vxor.vv v12, v12, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    vxor.vv v12, v12, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    vxor.vv v12, v12, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    vxor.vv v12, v12, v16
+; RV32-NEXT:    vmul.vv v16, v8, v4
+; RV32-NEXT:    vxor.vv v12, v12, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    vxor.vv v12, v12, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    vxor.vv v12, v12, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    vxor.vv v12, v12, v16
+; RV32-NEXT:    vmul.vv v16, v8, v0
+; RV32-NEXT:    vxor.vv v12, v12, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    vxor.vv v12, v12, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    vxor.vv v12, v12, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    vxor.vv v12, v12, v16
+; RV32-NEXT:    addi a0, sp, 288
+; RV32-NEXT:    vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    vxor.vv v12, v12, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    vxor.vv v12, v12, v16
+; RV32-NEXT:    vmul.vv v16, v8, v24
+; RV32-NEXT:    vxor.vv v12, v12, v16
+; RV32-NEXT:    vmul.vv v8, v8, v20
+; RV32-NEXT:    vxor.vv v8, v12, v8
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add sp, sp, a0
+; RV32-NEXT:    lw ra, 348(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s0, 344(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s1, 340(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s2, 336(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s3, 332(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s4, 328(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s5, 324(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s6, 320(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s7, 316(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s8, 312(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s9, 308(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s10, 304(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s11, 300(sp) # 4-byte Folded Reload
+; RV32-NEXT:    addi sp, sp, 352
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: clmul_nxv4i64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
+; RV64-NEXT:    vand.vi v16, v12, 2
+; RV64-NEXT:    vand.vi v20, v12, 1
+; RV64-NEXT:    vmul.vv v16, v8, v16
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v20, v16
+; RV64-NEXT:    vand.vi v20, v12, 4
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vi v20, v12, 8
+; RV64-NEXT:    li a0, 16
+; RV64-NEXT:    li a1, 32
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a0
+; RV64-NEXT:    li a0, 64
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    li a1, 128
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a0
+; RV64-NEXT:    li a0, 256
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    li a1, 512
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a0
+; RV64-NEXT:    li a2, 1024
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    li a0, 1
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a2
+; RV64-NEXT:    slli a1, a0, 11
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    lui a1, 1
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    lui a1, 2
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    lui a1, 4
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    lui a1, 8
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    lui a1, 16
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    lui a1, 32
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    lui a1, 64
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    lui a1, 128
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    lui a1, 256
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    lui a1, 512
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    lui a1, 1024
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    lui a1, 2048
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    lui a1, 4096
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    lui a1, 8192
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    lui a1, 16384
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    lui a1, 32768
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    lui a1, 65536
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    lui a1, 131072
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    lui a1, 262144
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    slli a1, a0, 31
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    slli a1, a0, 32
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    slli a1, a0, 33
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    slli a1, a0, 34
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    slli a1, a0, 35
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    slli a1, a0, 36
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    slli a1, a0, 37
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    slli a1, a0, 38
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    slli a1, a0, 39
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    slli a1, a0, 40
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    slli a1, a0, 41
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    slli a1, a0, 42
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    slli a1, a0, 43
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    slli a1, a0, 44
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    slli a1, a0, 45
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    slli a1, a0, 46
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    slli a1, a0, 47
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    slli a1, a0, 48
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    slli a1, a0, 49
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    slli a1, a0, 50
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    slli a1, a0, 51
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    slli a1, a0, 52
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    slli a1, a0, 53
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    slli a1, a0, 54
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    slli a1, a0, 55
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    slli a1, a0, 56
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    slli a1, a0, 57
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    slli a1, a0, 58
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    slli a1, a0, 59
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    slli a1, a0, 60
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    slli a1, a0, 61
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    li a1, -1
+; RV64-NEXT:    slli a0, a0, 62
+; RV64-NEXT:    slli a1, a1, 63
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a0
+; RV64-NEXT:    vand.vx v12, v12, a1
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vmul.vv v8, v8, v12
+; RV64-NEXT:    vxor.vv v8, v16, v8
+; RV64-NEXT:    ret
+  %a = call <vscale x 4 x i64> @llvm.clmul.nxv4i64(<vscale x 4 x i64> %x, <vscale x 4 x i64> %y)
+  ret <vscale x 4 x i64> %a
+}
+
+define <vscale x 8 x i64> @clmul_nxv8i64(<vscale x 8 x i64> %x, <vscale x 8 x i64> %y) nounwind {
+; RV32-LABEL: clmul_nxv8i64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -352
+; RV32-NEXT:    sw ra, 348(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s0, 344(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s1, 340(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s2, 336(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s3, 332(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s4, 328(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s5, 324(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s6, 320(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s7, 316(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s8, 312(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s9, 308(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s10, 304(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s11, 300(sp) # 4-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    sub sp, sp, a0
+; RV32-NEXT:    lui a1, 524288
+; RV32-NEXT:    li s5, 1
+; RV32-NEXT:    li a3, 2
+; RV32-NEXT:    li a2, 4
+; RV32-NEXT:    li s10, 8
+; RV32-NEXT:    li a0, 16
+; RV32-NEXT:    li t6, 32
+; RV32-NEXT:    li s1, 64
+; RV32-NEXT:    li s3, 128
+; RV32-NEXT:    li s7, 256
+; RV32-NEXT:    li s4, 512
+; RV32-NEXT:    li s8, 1024
+; RV32-NEXT:    lui ra, 1
+; RV32-NEXT:    lui s11, 2
+; RV32-NEXT:    lui s9, 4
+; RV32-NEXT:    lui s6, 8
+; RV32-NEXT:    lui s2, 16
+; RV32-NEXT:    lui s0, 32
+; RV32-NEXT:    lui t5, 64
+; RV32-NEXT:    lui t4, 128
+; RV32-NEXT:    lui t3, 256
+; RV32-NEXT:    lui t2, 512
+; RV32-NEXT:    lui t1, 1024
+; RV32-NEXT:    lui t0, 2048
+; RV32-NEXT:    lui a7, 4096
+; RV32-NEXT:    lui a6, 8192
+; RV32-NEXT:    lui a5, 16384
+; RV32-NEXT:    lui a4, 32768
+; RV32-NEXT:    sw a1, 272(sp)
+; RV32-NEXT:    sw zero, 276(sp)
+; RV32-NEXT:    sw zero, 264(sp)
+; RV32-NEXT:    sw s5, 268(sp)
+; RV32-NEXT:    sw zero, 256(sp)
+; RV32-NEXT:    sw a3, 260(sp)
+; RV32-NEXT:    lui a3, 65536
+; RV32-NEXT:    sw zero, 248(sp)
+; RV32-NEXT:    sw a2, 252(sp)
+; RV32-NEXT:    lui a2, 131072
+; RV32-NEXT:    sw zero, 240(sp)
+; RV32-NEXT:    sw s10, 244(sp)
+; RV32-NEXT:    vsetvli s10, zero, e64, m8, ta, ma
+; RV32-NEXT:    vand.vi v24, v16, 2
+; RV32-NEXT:    vand.vi v0, v16, 1
+; RV32-NEXT:    vmul.vv v24, v8, v24
+; RV32-NEXT:    vmul.vv v0, v8, v0
+; RV32-NEXT:    vxor.vi v0, v0, 0
+; RV32-NEXT:    vxor.vv v24, v0, v24
+; RV32-NEXT:    vand.vi v0, v16, 4
+; RV32-NEXT:    vmul.vv v0, v8, v0
+; RV32-NEXT:    vxor.vv v24, v24, v0
+; RV32-NEXT:    vand.vi v0, v16, 8
+; RV32-NEXT:    sw zero, 232(sp)
+; RV32-NEXT:    sw a0, 236(sp)
+; RV32-NEXT:    vmul.vv v0, v8, v0
+; RV32-NEXT:    vxor.vv v24, v24, v0
+; RV32-NEXT:    vand.vx v0, v16, a0
+; RV32-NEXT:    addi s10, sp, 272
+; RV32-NEXT:    sw zero, 224(sp)
+; RV32-NEXT:    sw t6, 228(sp)
+; RV32-NEXT:    vmul.vv v0, v8, v0
+; RV32-NEXT:    vxor.vv v24, v24, v0
+; RV32-NEXT:    vand.vx v0, v16, t6
+; RV32-NEXT:    sw zero, 216(sp)
+; RV32-NEXT:    sw s1, 220(sp)
+; RV32-NEXT:    vmul.vv v0, v8, v0
+; RV32-NEXT:    vxor.vv v24, v24, v0
+; RV32-NEXT:    vand.vx v0, v16, s1
+; RV32-NEXT:    sw zero, 208(sp)
+; RV32-NEXT:    sw s3, 212(sp)
+; RV32-NEXT:    vmul.vv v0, v8, v0
+; RV32-NEXT:    vxor.vv v24, v24, v0
+; RV32-NEXT:    vand.vx v0, v16, s3
+; RV32-NEXT:    sw zero, 200(sp)
+; RV32-NEXT:    sw s7, 204(sp)
+; RV32-NEXT:    vmul.vv v0, v8, v0
+; RV32-NEXT:    vxor.vv v24, v24, v0
+; RV32-NEXT:    vand.vx v0, v16, s7
+; RV32-NEXT:    sw zero, 192(sp)
+; RV32-NEXT:    sw s4, 196(sp)
+; RV32-NEXT:    vmul.vv v0, v8, v0
+; RV32-NEXT:    vxor.vv v24, v24, v0
+; RV32-NEXT:    vand.vx v0, v16, s4
+; RV32-NEXT:    sw zero, 184(sp)
+; RV32-NEXT:    sw s8, 188(sp)
+; RV32-NEXT:    vmul.vv v0, v8, v0
+; RV32-NEXT:    vxor.vv v24, v24, v0
+; RV32-NEXT:    vand.vx v0, v16, s8
+; RV32-NEXT:    slli s5, s5, 11
+; RV32-NEXT:    sw zero, 176(sp)
+; RV32-NEXT:    sw s5, 180(sp)
+; RV32-NEXT:    vmul.vv v0, v8, v0
+; RV32-NEXT:    vxor.vv v24, v24, v0
+; RV32-NEXT:    vand.vx v0, v16, s5
+; RV32-NEXT:    addi s5, sp, 216
+; RV32-NEXT:    vmul.vv v0, v8, v0
+; RV32-NEXT:    vxor.vv v24, v24, v0
+; RV32-NEXT:    vand.vx v0, v16, ra
+; RV32-NEXT:    sw zero, 168(sp)
+; RV32-NEXT:    sw ra, 172(sp)
+; RV32-NEXT:    addi ra, sp, 208
+; RV32-NEXT:    vmul.vv v0, v8, v0
+; RV32-NEXT:    vxor.vv v24, v24, v0
+; RV32-NEXT:    vand.vx v0, v16, s11
+; RV32-NEXT:    sw zero, 160(sp)
+; RV32-NEXT:    sw s11, 164(sp)
+; RV32-NEXT:    addi s11, sp, 200
+; RV32-NEXT:    vmul.vv v0, v8, v0
+; RV32-NEXT:    vxor.vv v24, v24, v0
+; RV32-NEXT:    vand.vx v0, v16, s9
+; RV32-NEXT:    sw zero, 152(sp)
+; RV32-NEXT:    sw s9, 156(sp)
+; RV32-NEXT:    addi s9, sp, 192
+; RV32-NEXT:    vmul.vv v0, v8, v0
+; RV32-NEXT:    vxor.vv v24, v24, v0
+; RV32-NEXT:    vand.vx v0, v16, s6
+; RV32-NEXT:    sw zero, 144(sp)
+; RV32-NEXT:    sw s6, 148(sp)
+; RV32-NEXT:    addi s6, sp, 184
+; RV32-NEXT:    vmul.vv v0, v8, v0
+; RV32-NEXT:    vxor.vv v24, v24, v0
+; RV32-NEXT:    vand.vx v0, v16, s2
+; RV32-NEXT:    sw zero, 136(sp)
+; RV32-NEXT:    sw s2, 140(sp)
+; RV32-NEXT:    addi s3, sp, 176
+; RV32-NEXT:    vmul.vv v0, v8, v0
+; RV32-NEXT:    vxor.vv v24, v24, v0
+; RV32-NEXT:    vand.vx v0, v16, s0
+; RV32-NEXT:    sw zero, 128(sp)
+; RV32-NEXT:    sw s0, 132(sp)
+; RV32-NEXT:    addi s4, sp, 168
+; RV32-NEXT:    vmul.vv v0, v8, v0
+; RV32-NEXT:    vxor.vv v24, v24, v0
+; RV32-NEXT:    vand.vx v0, v16, t5
+; RV32-NEXT:    sw zero, 120(sp)
+; RV32-NEXT:    sw t5, 124(sp)
+; RV32-NEXT:    addi s2, sp, 160
+; RV32-NEXT:    vmul.vv v0, v8, v0
+; RV32-NEXT:    vxor.vv v24, v24, v0
+; RV32-NEXT:    vand.vx v0, v16, t4
+; RV32-NEXT:    sw zero, 112(sp)
+; RV32-NEXT:    sw t4, 116(sp)
+; RV32-NEXT:    addi s1, sp, 152
+; RV32-NEXT:    vmul.vv v0, v8, v0
+; RV32-NEXT:    vxor.vv v24, v24, v0
+; RV32-NEXT:    vand.vx v0, v16, t3
+; RV32-NEXT:    sw zero, 104(sp)
+; RV32-NEXT:    sw t3, 108(sp)
+; RV32-NEXT:    addi t6, sp, 144
+; RV32-NEXT:    vmul.vv v0, v8, v0
+; RV32-NEXT:    vxor.vv v24, v24, v0
+; RV32-NEXT:    vand.vx v0, v16, t2
+; RV32-NEXT:    sw zero, 96(sp)
+; RV32-NEXT:    sw t2, 100(sp)
+; RV32-NEXT:    addi s0, sp, 136
+; RV32-NEXT:    vmul.vv v0, v8, v0
+; RV32-NEXT:    vxor.vv v24, v24, v0
+; RV32-NEXT:    vand.vx v0, v16, t1
+; RV32-NEXT:    sw zero, 88(sp)
+; RV32-NEXT:    sw t1, 92(sp)
+; RV32-NEXT:    addi t5, sp, 128
+; RV32-NEXT:    vmul.vv v0, v8, v0
+; RV32-NEXT:    vxor.vv v24, v24, v0
+; RV32-NEXT:    vand.vx v0, v16, t0
+; RV32-NEXT:    sw zero, 80(sp)
+; RV32-NEXT:    sw t0, 84(sp)
+; RV32-NEXT:    addi t4, sp, 120
+; RV32-NEXT:    vmul.vv v0, v8, v0
+; RV32-NEXT:    vxor.vv v24, v24, v0
+; RV32-NEXT:    vand.vx v0, v16, a7
+; RV32-NEXT:    sw zero, 72(sp)
+; RV32-NEXT:    sw a7, 76(sp)
+; RV32-NEXT:    addi t2, sp, 112
+; RV32-NEXT:    vmul.vv v0, v8, v0
+; RV32-NEXT:    vxor.vv v24, v24, v0
+; RV32-NEXT:    vand.vx v0, v16, a6
+; RV32-NEXT:    sw zero, 64(sp)
+; RV32-NEXT:    sw a6, 68(sp)
+; RV32-NEXT:    addi t3, sp, 104
+; RV32-NEXT:    vmul.vv v0, v8, v0
+; RV32-NEXT:    vxor.vv v24, v24, v0
+; RV32-NEXT:    vand.vx v0, v16, a5
+; RV32-NEXT:    sw zero, 56(sp)
+; RV32-NEXT:    sw a5, 60(sp)
+; RV32-NEXT:    addi t1, sp, 96
+; RV32-NEXT:    vmul.vv v0, v8, v0
+; RV32-NEXT:    vxor.vv v24, v24, v0
+; RV32-NEXT:    vand.vx v0, v16, a4
+; RV32-NEXT:    sw zero, 48(sp)
+; RV32-NEXT:    sw a4, 52(sp)
+; RV32-NEXT:    addi t0, sp, 88
+; RV32-NEXT:    vmul.vv v0, v8, v0
+; RV32-NEXT:    vxor.vv v24, v24, v0
+; RV32-NEXT:    vand.vx v0, v16, a3
+; RV32-NEXT:    sw zero, 40(sp)
+; RV32-NEXT:    sw a3, 44(sp)
+; RV32-NEXT:    addi a7, sp, 80
+; RV32-NEXT:    vmul.vv v0, v8, v0
+; RV32-NEXT:    vxor.vv v24, v24, v0
+; RV32-NEXT:    vand.vx v0, v16, a2
+; RV32-NEXT:    sw zero, 32(sp)
+; RV32-NEXT:    sw a2, 36(sp)
+; RV32-NEXT:    sw zero, 24(sp)
+; RV32-NEXT:    lui a0, 262144
+; RV32-NEXT:    sw a0, 28(sp)
+; RV32-NEXT:    sw zero, 16(sp)
+; RV32-NEXT:    sw a1, 20(sp)
+; RV32-NEXT:    vmul.vv v0, v8, v0
+; RV32-NEXT:    vxor.vv v24, v24, v0
+; RV32-NEXT:    sw t2, 4(sp) # 4-byte Folded Spill
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 3
+; RV32-NEXT:    mv a2, a1
+; RV32-NEXT:    slli a1, a1, 5
+; RV32-NEXT:    add a1, a1, a2
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 288
+; RV32-NEXT:    vs8r.v v24, (a1) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vlse64.v v24, (s10), zero
+; RV32-NEXT:    addi a6, sp, 72
+; RV32-NEXT:    addi a5, sp, 64
+; RV32-NEXT:    addi a4, sp, 56
+; RV32-NEXT:    vand.vv v24, v16, v24
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 8
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 288
+; RV32-NEXT:    vs8r.v v24, (a1) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    addi a1, sp, 48
+; RV32-NEXT:    addi s10, sp, 40
+; RV32-NEXT:    addi a3, sp, 32
+; RV32-NEXT:    addi a2, sp, 24
+; RV32-NEXT:    addi s7, sp, 264
+; RV32-NEXT:    vlse64.v v24, (s7), zero
+; RV32-NEXT:    csrr t2, vlenb
+; RV32-NEXT:    slli t2, t2, 4
+; RV32-NEXT:    mv s7, t2
+; RV32-NEXT:    slli t2, t2, 1
+; RV32-NEXT:    add s7, s7, t2
+; RV32-NEXT:    slli t2, t2, 1
+; RV32-NEXT:    add s7, s7, t2
+; RV32-NEXT:    slli t2, t2, 1
+; RV32-NEXT:    add t2, t2, s7
+; RV32-NEXT:    add t2, sp, t2
+; RV32-NEXT:    addi t2, t2, 288
+; RV32-NEXT:    vs8r.v v24, (t2) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    addi s7, sp, 256
+; RV32-NEXT:    vlse64.v v0, (s7), zero
+; RV32-NEXT:    addi s7, sp, 248
+; RV32-NEXT:    vlse64.v v24, (s7), zero
+; RV32-NEXT:    csrr t2, vlenb
+; RV32-NEXT:    slli t2, t2, 3
+; RV32-NEXT:    mv s7, t2
+; RV32-NEXT:    slli t2, t2, 1
+; RV32-NEXT:    add s7, s7, t2
+; RV32-NEXT:    slli t2, t2, 1
+; RV32-NEXT:    add s7, s7, t2
+; RV32-NEXT:    slli t2, t2, 1
+; RV32-NEXT:    add s7, s7, t2
+; RV32-NEXT:    slli t2, t2, 1
+; RV32-NEXT:    add t2, t2, s7
+; RV32-NEXT:    add t2, sp, t2
+; RV32-NEXT:    addi t2, t2, 288
+; RV32-NEXT:    vs8r.v v24, (t2) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    addi s7, sp, 240
+; RV32-NEXT:    vlse64.v v24, (s7), zero
+; RV32-NEXT:    csrr t2, vlenb
+; RV32-NEXT:    slli t2, t2, 3
+; RV32-NEXT:    mv s7, t2
+; RV32-NEXT:    slli t2, t2, 1
+; RV32-NEXT:    add s7, s7, t2
+; RV32-NEXT:    slli t2, t2, 2
+; RV32-NEXT:    add s7, s7, t2
+; RV32-NEXT:    slli t2, t2, 1
+; RV32-NEXT:    add t2, t2, s7
+; RV32-NEXT:    add t2, sp, t2
+; RV32-NEXT:    addi t2, t2, 288
+; RV32-NEXT:    vs8r.v v24, (t2) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    csrr t2, vlenb
+; RV32-NEXT:    slli t2, t2, 4
+; RV32-NEXT:    mv s7, t2
+; RV32-NEXT:    slli t2, t2, 1
+; RV32-NEXT:    add s7, s7, t2
+; RV32-NEXT:    slli t2, t2, 1
+; RV32-NEXT:    add s7, s7, t2
+; RV32-NEXT:    slli t2, t2, 1
+; RV32-NEXT:    add t2, t2, s7
+; RV32-NEXT:    add t2, sp, t2
+; RV32-NEXT:    addi t2, t2, 288
+; RV32-NEXT:    vl8r.v v24, (t2) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vand.vv v24, v16, v24
+; RV32-NEXT:    csrr t2, vlenb
+; RV32-NEXT:    slli t2, t2, 5
+; RV32-NEXT:    mv s7, t2
+; RV32-NEXT:    slli t2, t2, 1
+; RV32-NEXT:    add s7, s7, t2
+; RV32-NEXT:    slli t2, t2, 1
+; RV32-NEXT:    add t2, t2, s7
+; RV32-NEXT:    add t2, sp, t2
+; RV32-NEXT:    addi t2, t2, 288
+; RV32-NEXT:    vs8r.v v24, (t2) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vand.vv v24, v16, v0
+; RV32-NEXT:    csrr t2, vlenb
+; RV32-NEXT:    slli t2, t2, 3
+; RV32-NEXT:    mv s7, t2
+; RV32-NEXT:    slli t2, t2, 2
+; RV32-NEXT:    add s7, s7, t2
+; RV32-NEXT:    slli t2, t2, 1
+; RV32-NEXT:    add s7, s7, t2
+; RV32-NEXT:    slli t2, t2, 1
+; RV32-NEXT:    add t2, t2, s7
+; RV32-NEXT:    add t2, sp, t2
+; RV32-NEXT:    addi t2, t2, 288
+; RV32-NEXT:    vs8r.v v24, (t2) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    csrr t2, vlenb
+; RV32-NEXT:    slli t2, t2, 3
+; RV32-NEXT:    mv s7, t2
+; RV32-NEXT:    slli t2, t2, 1
+; RV32-NEXT:    add s7, s7, t2
+; RV32-NEXT:    slli t2, t2, 1
+; RV32-NEXT:    add s7, s7, t2
+; RV32-NEXT:    slli t2, t2, 1
+; RV32-NEXT:    add s7, s7, t2
+; RV32-NEXT:    slli t2, t2, 1
+; RV32-NEXT:    add t2, t2, s7
+; RV32-NEXT:    add t2, sp, t2
+; RV32-NEXT:    addi t2, t2, 288
+; RV32-NEXT:    vl8r.v v24, (t2) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vand.vv v24, v16, v24
+; RV32-NEXT:    csrr t2, vlenb
+; RV32-NEXT:    slli t2, t2, 4
+; RV32-NEXT:    mv s7, t2
+; RV32-NEXT:    slli t2, t2, 1
+; RV32-NEXT:    add s7, s7, t2
+; RV32-NEXT:    slli t2, t2, 1
+; RV32-NEXT:    add s7, s7, t2
+; RV32-NEXT:    slli t2, t2, 1
+; RV32-NEXT:    add t2, t2, s7
+; RV32-NEXT:    add t2, sp, t2
+; RV32-NEXT:    addi t2, t2, 288
+; RV32-NEXT:    vs8r.v v24, (t2) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    csrr t2, vlenb
+; RV32-NEXT:    slli t2, t2, 3
+; RV32-NEXT:    mv s7, t2
+; RV32-NEXT:    slli t2, t2, 1
+; RV32-NEXT:    add s7, s7, t2
+; RV32-NEXT:    slli t2, t2, 2
+; RV32-NEXT:    add s7, s7, t2
+; RV32-NEXT:    slli t2, t2, 1
+; RV32-NEXT:    add t2, t2, s7
+; RV32-NEXT:    add t2, sp, t2
+; RV32-NEXT:    addi t2, t2, 288
+; RV32-NEXT:    vl8r.v v24, (t2) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vand.vv v24, v16, v24
+; RV32-NEXT:    csrr t2, vlenb
+; RV32-NEXT:    slli t2, t2, 3
+; RV32-NEXT:    mv s7, t2
+; RV32-NEXT:    slli t2, t2, 1
+; RV32-NEXT:    add s7, s7, t2
+; RV32-NEXT:    slli t2, t2, 1
+; RV32-NEXT:    add s7, s7, t2
+; RV32-NEXT:    slli t2, t2, 1
+; RV32-NEXT:    add s7, s7, t2
+; RV32-NEXT:    slli t2, t2, 1
+; RV32-NEXT:    add t2, t2, s7
+; RV32-NEXT:    add t2, sp, t2
+; RV32-NEXT:    addi t2, t2, 288
+; RV32-NEXT:    vs8r.v v24, (t2) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    addi s7, sp, 16
+; RV32-NEXT:    addi s8, sp, 232
+; RV32-NEXT:    vlse64.v v24, (s8), zero
+; RV32-NEXT:    csrr s8, vlenb
+; RV32-NEXT:    slli s8, s8, 4
+; RV32-NEXT:    mv t2, s8
+; RV32-NEXT:    slli s8, s8, 2
+; RV32-NEXT:    add t2, t2, s8
+; RV32-NEXT:    slli s8, s8, 1
+; RV32-NEXT:    add s8, s8, t2
+; RV32-NEXT:    lw t2, 4(sp) # 4-byte Folded Reload
+; RV32-NEXT:    add s8, sp, s8
+; RV32-NEXT:    addi s8, s8, 288
+; RV32-NEXT:    vs8r.v v24, (s8) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    addi s8, sp, 224
+; RV32-NEXT:    vlse64.v v0, (s8), zero
+; RV32-NEXT:    vlse64.v v24, (s5), zero
+; RV32-NEXT:    csrr s5, vlenb
+; RV32-NEXT:    slli s5, s5, 3
+; RV32-NEXT:    mv s8, s5
+; RV32-NEXT:    slli s5, s5, 1
+; RV32-NEXT:    add s8, s8, s5
+; RV32-NEXT:    slli s5, s5, 2
+; RV32-NEXT:    add s8, s8, s5
+; RV32-NEXT:    slli s5, s5, 1
+; RV32-NEXT:    add s5, s5, s8
+; RV32-NEXT:    add s5, sp, s5
+; RV32-NEXT:    addi s5, s5, 288
+; RV32-NEXT:    vs8r.v v24, (s5) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vlse64.v v24, (ra), zero
+; RV32-NEXT:    csrr s5, vlenb
+; RV32-NEXT:    slli s5, s5, 3
+; RV32-NEXT:    mv s8, s5
+; RV32-NEXT:    slli s5, s5, 3
+; RV32-NEXT:    add s8, s8, s5
+; RV32-NEXT:    slli s5, s5, 1
+; RV32-NEXT:    add s5, s5, s8
+; RV32-NEXT:    add s5, sp, s5
+; RV32-NEXT:    addi s5, s5, 288
+; RV32-NEXT:    vs8r.v v24, (s5) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    csrr s5, vlenb
+; RV32-NEXT:    slli s5, s5, 4
+; RV32-NEXT:    mv s8, s5
+; RV32-NEXT:    slli s5, s5, 2
+; RV32-NEXT:    add s8, s8, s5
+; RV32-NEXT:    slli s5, s5, 1
+; RV32-NEXT:    add s5, s5, s8
+; RV32-NEXT:    add s5, sp, s5
+; RV32-NEXT:    addi s5, s5, 288
+; RV32-NEXT:    vl8r.v v24, (s5) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vand.vv v24, v16, v24
+; RV32-NEXT:    csrr s5, vlenb
+; RV32-NEXT:    slli s5, s5, 3
+; RV32-NEXT:    mv s8, s5
+; RV32-NEXT:    slli s5, s5, 1
+; RV32-NEXT:    add s8, s8, s5
+; RV32-NEXT:    slli s5, s5, 1
+; RV32-NEXT:    add s5, s5, s8
+; RV32-NEXT:    add s5, sp, s5
+; RV32-NEXT:    addi s5, s5, 288
+; RV32-NEXT:    vs8r.v v24, (s5) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vand.vv v24, v16, v0
+; RV32-NEXT:    csrr s5, vlenb
+; RV32-NEXT:    slli s5, s5, 4
+; RV32-NEXT:    mv s8, s5
+; RV32-NEXT:    slli s5, s5, 1
+; RV32-NEXT:    add s8, s8, s5
+; RV32-NEXT:    slli s5, s5, 1
+; RV32-NEXT:    add s5, s5, s8
+; RV32-NEXT:    add s5, sp, s5
+; RV32-NEXT:    addi s5, s5, 288
+; RV32-NEXT:    vs8r.v v24, (s5) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    csrr s5, vlenb
+; RV32-NEXT:    slli s5, s5, 3
+; RV32-NEXT:    mv s8, s5
+; RV32-NEXT:    slli s5, s5, 1
+; RV32-NEXT:    add s8, s8, s5
+; RV32-NEXT:    slli s5, s5, 2
+; RV32-NEXT:    add s8, s8, s5
+; RV32-NEXT:    slli s5, s5, 1
+; RV32-NEXT:    add s5, s5, s8
+; RV32-NEXT:    add s5, sp, s5
+; RV32-NEXT:    addi s5, s5, 288
+; RV32-NEXT:    vl8r.v v24, (s5) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vand.vv v24, v16, v24
+; RV32-NEXT:    csrr s5, vlenb
+; RV32-NEXT:    slli s5, s5, 3
+; RV32-NEXT:    mv s8, s5
+; RV32-NEXT:    slli s5, s5, 2
+; RV32-NEXT:    add s8, s8, s5
+; RV32-NEXT:    slli s5, s5, 2
+; RV32-NEXT:    add s5, s5, s8
+; RV32-NEXT:    add s5, sp, s5
+; RV32-NEXT:    addi s5, s5, 288
+; RV32-NEXT:    vs8r.v v24, (s5) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    csrr s5, vlenb
+; RV32-NEXT:    slli s5, s5, 3
+; RV32-NEXT:    mv s8, s5
+; RV32-NEXT:    slli s5, s5, 3
+; RV32-NEXT:    add s8, s8, s5
+; RV32-NEXT:    slli s5, s5, 1
+; RV32-NEXT:    add s5, s5, s8
+; RV32-NEXT:    add s5, sp, s5
+; RV32-NEXT:    addi s5, s5, 288
+; RV32-NEXT:    vl8r.v v24, (s5) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vand.vv v24, v16, v24
+; RV32-NEXT:    csrr s5, vlenb
+; RV32-NEXT:    slli s5, s5, 3
+; RV32-NEXT:    mv s8, s5
+; RV32-NEXT:    slli s5, s5, 1
+; RV32-NEXT:    add s8, s8, s5
+; RV32-NEXT:    slli s5, s5, 2
+; RV32-NEXT:    add s8, s8, s5
+; RV32-NEXT:    slli s5, s5, 1
+; RV32-NEXT:    add s5, s5, s8
+; RV32-NEXT:    add s5, sp, s5
+; RV32-NEXT:    addi s5, s5, 288
+; RV32-NEXT:    vs8r.v v24, (s5) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vlse64.v v0, (s11), zero
+; RV32-NEXT:    vlse64.v v24, (s9), zero
+; RV32-NEXT:    csrr s5, vlenb
+; RV32-NEXT:    slli s5, s5, 3
+; RV32-NEXT:    mv s8, s5
+; RV32-NEXT:    slli s5, s5, 3
+; RV32-NEXT:    add s8, s8, s5
+; RV32-NEXT:    slli s5, s5, 1
+; RV32-NEXT:    add s5, s5, s8
+; RV32-NEXT:    add s5, sp, s5
+; RV32-NEXT:    addi s5, s5, 288
+; RV32-NEXT:    vs8r.v v24, (s5) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vlse64.v v24, (s6), zero
+; RV32-NEXT:    csrr s5, vlenb
+; RV32-NEXT:    slli s5, s5, 4
+; RV32-NEXT:    mv s6, s5
+; RV32-NEXT:    slli s5, s5, 2
+; RV32-NEXT:    add s6, s6, s5
+; RV32-NEXT:    slli s5, s5, 1
+; RV32-NEXT:    add s5, s5, s6
+; RV32-NEXT:    add s5, sp, s5
+; RV32-NEXT:    addi s5, s5, 288
+; RV32-NEXT:    vs8r.v v24, (s5) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vlse64.v v24, (s3), zero
+; RV32-NEXT:    csrr s3, vlenb
+; RV32-NEXT:    slli s3, s3, 6
+; RV32-NEXT:    mv s5, s3
+; RV32-NEXT:    slli s3, s3, 1
+; RV32-NEXT:    add s3, s3, s5
+; RV32-NEXT:    add s3, sp, s3
+; RV32-NEXT:    addi s3, s3, 288
+; RV32-NEXT:    vs8r.v v24, (s3) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vand.vv v0, v16, v0
+; RV32-NEXT:    csrr s3, vlenb
+; RV32-NEXT:    slli s3, s3, 4
+; RV32-NEXT:    mv s5, s3
+; RV32-NEXT:    slli s3, s3, 1
+; RV32-NEXT:    add s3, s3, s5
+; RV32-NEXT:    add s3, sp, s3
+; RV32-NEXT:    addi s3, s3, 288
+; RV32-NEXT:    vs8r.v v0, (s3) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    csrr s3, vlenb
+; RV32-NEXT:    slli s3, s3, 3
+; RV32-NEXT:    mv s5, s3
+; RV32-NEXT:    slli s3, s3, 3
+; RV32-NEXT:    add s5, s5, s3
+; RV32-NEXT:    slli s3, s3, 1
+; RV32-NEXT:    add s3, s3, s5
+; RV32-NEXT:    add s3, sp, s3
+; RV32-NEXT:    addi s3, s3, 288
+; RV32-NEXT:    vl8r.v v24, (s3) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vand.vv v24, v16, v24
+; RV32-NEXT:    csrr s3, vlenb
+; RV32-NEXT:    slli s3, s3, 3
+; RV32-NEXT:    mv s5, s3
+; RV32-NEXT:    slli s3, s3, 2
+; RV32-NEXT:    add s5, s5, s3
+; RV32-NEXT:    slli s3, s3, 1
+; RV32-NEXT:    add s3, s3, s5
+; RV32-NEXT:    add s3, sp, s3
+; RV32-NEXT:    addi s3, s3, 288
+; RV32-NEXT:    vs8r.v v24, (s3) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    csrr s3, vlenb
+; RV32-NEXT:    slli s3, s3, 4
+; RV32-NEXT:    mv s5, s3
+; RV32-NEXT:    slli s3, s3, 2
+; RV32-NEXT:    add s5, s5, s3
+; RV32-NEXT:    slli s3, s3, 1
+; RV32-NEXT:    add s3, s3, s5
+; RV32-NEXT:    add s3, sp, s3
+; RV32-NEXT:    addi s3, s3, 288
+; RV32-NEXT:    vl8r.v v24, (s3) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vand.vv v24, v16, v24
+; RV32-NEXT:    csrr s3, vlenb
+; RV32-NEXT:    slli s3, s3, 5
+; RV32-NEXT:    mv s5, s3
+; RV32-NEXT:    slli s3, s3, 2
+; RV32-NEXT:    add s3, s3, s5
+; RV32-NEXT:    add s3, sp, s3
+; RV32-NEXT:    addi s3, s3, 288
+; RV32-NEXT:    vs8r.v v24, (s3) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    csrr s3, vlenb
+; RV32-NEXT:    slli s3, s3, 6
+; RV32-NEXT:    mv s5, s3
+; RV32-NEXT:    slli s3, s3, 1
+; RV32-NEXT:    add s3, s3, s5
+; RV32-NEXT:    add s3, sp, s3
+; RV32-NEXT:    addi s3, s3, 288
+; RV32-NEXT:    vl8r.v v24, (s3) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vand.vv v24, v16, v24
+; RV32-NEXT:    csrr s3, vlenb
+; RV32-NEXT:    slli s3, s3, 4
+; RV32-NEXT:    mv s5, s3
+; RV32-NEXT:    slli s3, s3, 2
+; RV32-NEXT:    add s5, s5, s3
+; RV32-NEXT:    slli s3, s3, 1
+; RV32-NEXT:    add s3, s3, s5
+; RV32-NEXT:    add s3, sp, s3
+; RV32-NEXT:    addi s3, s3, 288
+; RV32-NEXT:    vs8r.v v24, (s3) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vlse64.v v24, (s4), zero
+; RV32-NEXT:    csrr s3, vlenb
+; RV32-NEXT:    slli s3, s3, 6
+; RV32-NEXT:    mv s4, s3
+; RV32-NEXT:    slli s3, s3, 1
+; RV32-NEXT:    add s3, s3, s4
+; RV32-NEXT:    add s3, sp, s3
+; RV32-NEXT:    addi s3, s3, 288
+; RV32-NEXT:    vs8r.v v24, (s3) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vlse64.v v0, (s2), zero
+; RV32-NEXT:    vlse64.v v24, (s1), zero
+; RV32-NEXT:    csrr s1, vlenb
+; RV32-NEXT:    slli s1, s1, 3
+; RV32-NEXT:    mv s2, s1
+; RV32-NEXT:    slli s1, s1, 3
+; RV32-NEXT:    add s2, s2, s1
+; RV32-NEXT:    slli s1, s1, 1
+; RV32-NEXT:    add s1, s1, s2
+; RV32-NEXT:    add s1, sp, s1
+; RV32-NEXT:    addi s1, s1, 288
+; RV32-NEXT:    vs8r.v v24, (s1) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vlse64.v v24, (t6), zero
+; RV32-NEXT:    csrr t6, vlenb
+; RV32-NEXT:    slli t6, t6, 3
+; RV32-NEXT:    mv s1, t6
+; RV32-NEXT:    slli t6, t6, 1
+; RV32-NEXT:    add s1, s1, t6
+; RV32-NEXT:    slli t6, t6, 1
+; RV32-NEXT:    add s1, s1, t6
+; RV32-NEXT:    slli t6, t6, 2
+; RV32-NEXT:    add t6, t6, s1
+; RV32-NEXT:    add t6, sp, t6
+; RV32-NEXT:    addi t6, t6, 288
+; RV32-NEXT:    vs8r.v v24, (t6) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    csrr t6, vlenb
+; RV32-NEXT:    slli t6, t6, 6
+; RV32-NEXT:    mv s1, t6
+; RV32-NEXT:    slli t6, t6, 1
+; RV32-NEXT:    add t6, t6, s1
+; RV32-NEXT:    add t6, sp, t6
+; RV32-NEXT:    addi t6, t6, 288
+; RV32-NEXT:    vl8r.v v24, (t6) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vand.vv v24, v16, v24
+; RV32-NEXT:    csrr t6, vlenb
+; RV32-NEXT:    slli t6, t6, 3
+; RV32-NEXT:    mv s1, t6
+; RV32-NEXT:    slli t6, t6, 2
+; RV32-NEXT:    add t6, t6, s1
+; RV32-NEXT:    add t6, sp, t6
+; RV32-NEXT:    addi t6, t6, 288
+; RV32-NEXT:    vs8r.v v24, (t6) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vand.vv v24, v16, v0
+; RV32-NEXT:    csrr t6, vlenb
+; RV32-NEXT:    slli t6, t6, 5
+; RV32-NEXT:    mv s1, t6
+; RV32-NEXT:    slli t6, t6, 1
+; RV32-NEXT:    add t6, t6, s1
+; RV32-NEXT:    add t6, sp, t6
+; RV32-NEXT:    addi t6, t6, 288
+; RV32-NEXT:    vs8r.v v24, (t6) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    csrr t6, vlenb
+; RV32-NEXT:    slli t6, t6, 3
+; RV32-NEXT:    mv s1, t6
+; RV32-NEXT:    slli t6, t6, 3
+; RV32-NEXT:    add s1, s1, t6
+; RV32-NEXT:    slli t6, t6, 1
+; RV32-NEXT:    add t6, t6, s1
+; RV32-NEXT:    add t6, sp, t6
+; RV32-NEXT:    addi t6, t6, 288
+; RV32-NEXT:    vl8r.v v24, (t6) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vand.vv v24, v16, v24
+; RV32-NEXT:    csrr t6, vlenb
+; RV32-NEXT:    slli t6, t6, 3
+; RV32-NEXT:    mv s1, t6
+; RV32-NEXT:    slli t6, t6, 1
+; RV32-NEXT:    add s1, s1, t6
+; RV32-NEXT:    slli t6, t6, 3
+; RV32-NEXT:    add t6, t6, s1
+; RV32-NEXT:    add t6, sp, t6
+; RV32-NEXT:    addi t6, t6, 288
+; RV32-NEXT:    vs8r.v v24, (t6) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    csrr t6, vlenb
+; RV32-NEXT:    slli t6, t6, 3
+; RV32-NEXT:    mv s1, t6
+; RV32-NEXT:    slli t6, t6, 1
+; RV32-NEXT:    add s1, s1, t6
+; RV32-NEXT:    slli t6, t6, 1
+; RV32-NEXT:    add s1, s1, t6
+; RV32-NEXT:    slli t6, t6, 2
+; RV32-NEXT:    add t6, t6, s1
+; RV32-NEXT:    add t6, sp, t6
+; RV32-NEXT:    addi t6, t6, 288
+; RV32-NEXT:    vl8r.v v24, (t6) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vand.vv v24, v16, v24
+; RV32-NEXT:    csrr t6, vlenb
+; RV32-NEXT:    slli t6, t6, 3
+; RV32-NEXT:    mv s1, t6
+; RV32-NEXT:    slli t6, t6, 3
+; RV32-NEXT:    add s1, s1, t6
+; RV32-NEXT:    slli t6, t6, 1
+; RV32-NEXT:    add t6, t6, s1
+; RV32-NEXT:    add t6, sp, t6
+; RV32-NEXT:    addi t6, t6, 288
+; RV32-NEXT:    vs8r.v v24, (t6) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vlse64.v v24, (s0), zero
+; RV32-NEXT:    csrr t6, vlenb
+; RV32-NEXT:    slli t6, t6, 3
+; RV32-NEXT:    mv s0, t6
+; RV32-NEXT:    slli t6, t6, 1
+; RV32-NEXT:    add s0, s0, t6
+; RV32-NEXT:    slli t6, t6, 1
+; RV32-NEXT:    add s0, s0, t6
+; RV32-NEXT:    slli t6, t6, 2
+; RV32-NEXT:    add t6, t6, s0
+; RV32-NEXT:    add t6, sp, t6
+; RV32-NEXT:    addi t6, t6, 288
+; RV32-NEXT:    vs8r.v v24, (t6) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vlse64.v v0, (t5), zero
+; RV32-NEXT:    vlse64.v v24, (t4), zero
+; RV32-NEXT:    csrr t4, vlenb
+; RV32-NEXT:    slli t4, t4, 6
+; RV32-NEXT:    mv t5, t4
+; RV32-NEXT:    slli t4, t4, 1
+; RV32-NEXT:    add t4, t4, t5
+; RV32-NEXT:    add t4, sp, t4
+; RV32-NEXT:    addi t4, t4, 288
+; RV32-NEXT:    vs8r.v v24, (t4) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vlse64.v v24, (t2), zero
+; RV32-NEXT:    csrr t2, vlenb
+; RV32-NEXT:    slli t2, t2, 4
+; RV32-NEXT:    mv t4, t2
+; RV32-NEXT:    slli t2, t2, 1
+; RV32-NEXT:    add t4, t4, t2
+; RV32-NEXT:    slli t2, t2, 2
+; RV32-NEXT:    add t2, t2, t4
+; RV32-NEXT:    add t2, sp, t2
+; RV32-NEXT:    addi t2, t2, 288
+; RV32-NEXT:    vs8r.v v24, (t2) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    csrr t2, vlenb
+; RV32-NEXT:    slli t2, t2, 3
+; RV32-NEXT:    mv t4, t2
+; RV32-NEXT:    slli t2, t2, 1
+; RV32-NEXT:    add t4, t4, t2
+; RV32-NEXT:    slli t2, t2, 1
+; RV32-NEXT:    add t4, t4, t2
+; RV32-NEXT:    slli t2, t2, 2
+; RV32-NEXT:    add t2, t2, t4
+; RV32-NEXT:    add t2, sp, t2
+; RV32-NEXT:    addi t2, t2, 288
+; RV32-NEXT:    vl8r.v v24, (t2) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vand.vv v24, v16, v24
+; RV32-NEXT:    csrr t2, vlenb
+; RV32-NEXT:    slli t2, t2, 5
+; RV32-NEXT:    add t2, sp, t2
+; RV32-NEXT:    addi t2, t2, 288
+; RV32-NEXT:    vs8r.v v24, (t2) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vand.vv v24, v16, v0
+; RV32-NEXT:    csrr t2, vlenb
+; RV32-NEXT:    slli t2, t2, 3
+; RV32-NEXT:    mv t4, t2
+; RV32-NEXT:    slli t2, t2, 1
+; RV32-NEXT:    add t4, t4, t2
+; RV32-NEXT:    slli t2, t2, 2
+; RV32-NEXT:    add t2, t2, t4
+; RV32-NEXT:    add t2, sp, t2
+; RV32-NEXT:    addi t2, t2, 288
+; RV32-NEXT:    vs8r.v v24, (t2) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    csrr t2, vlenb
+; RV32-NEXT:    slli t2, t2, 6
+; RV32-NEXT:    mv t4, t2
+; RV32-NEXT:    slli t2, t2, 1
+; RV32-NEXT:    add t2, t2, t4
+; RV32-NEXT:    add t2, sp, t2
+; RV32-NEXT:    addi t2, t2, 288
+; RV32-NEXT:    vl8r.v v24, (t2) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vand.vv v24, v16, v24
+; RV32-NEXT:    csrr t2, vlenb
+; RV32-NEXT:    slli t2, t2, 4
+; RV32-NEXT:    mv t4, t2
+; RV32-NEXT:    slli t2, t2, 3
+; RV32-NEXT:    add t2, t2, t4
+; RV32-NEXT:    add t2, sp, t2
+; RV32-NEXT:    addi t2, t2, 288
+; RV32-NEXT:    vs8r.v v24, (t2) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    csrr t2, vlenb
+; RV32-NEXT:    slli t2, t2, 4
+; RV32-NEXT:    mv t4, t2
+; RV32-NEXT:    slli t2, t2, 1
+; RV32-NEXT:    add t4, t4, t2
+; RV32-NEXT:    slli t2, t2, 2
+; RV32-NEXT:    add t2, t2, t4
+; RV32-NEXT:    add t2, sp, t2
+; RV32-NEXT:    addi t2, t2, 288
+; RV32-NEXT:    vl8r.v v24, (t2) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vand.vv v24, v16, v24
+; RV32-NEXT:    csrr t2, vlenb
+; RV32-NEXT:    slli t2, t2, 6
+; RV32-NEXT:    mv t4, t2
+; RV32-NEXT:    slli t2, t2, 1
+; RV32-NEXT:    add t2, t2, t4
+; RV32-NEXT:    add t2, sp, t2
+; RV32-NEXT:    addi t2, t2, 288
+; RV32-NEXT:    vs8r.v v24, (t2) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vlse64.v v24, (t3), zero
+; RV32-NEXT:    csrr t2, vlenb
+; RV32-NEXT:    slli t2, t2, 4
+; RV32-NEXT:    mv t3, t2
+; RV32-NEXT:    slli t2, t2, 1
+; RV32-NEXT:    add t3, t3, t2
+; RV32-NEXT:    slli t2, t2, 2
+; RV32-NEXT:    add t2, t2, t3
+; RV32-NEXT:    add t2, sp, t2
+; RV32-NEXT:    addi t2, t2, 288
+; RV32-NEXT:    vs8r.v v24, (t2) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vlse64.v v0, (t1), zero
+; RV32-NEXT:    vlse64.v v24, (t0), zero
+; RV32-NEXT:    csrr t0, vlenb
+; RV32-NEXT:    slli t0, t0, 3
+; RV32-NEXT:    mv t1, t0
+; RV32-NEXT:    slli t0, t0, 1
+; RV32-NEXT:    add t1, t1, t0
+; RV32-NEXT:    slli t0, t0, 1
+; RV32-NEXT:    add t1, t1, t0
+; RV32-NEXT:    slli t0, t0, 2
+; RV32-NEXT:    add t0, t0, t1
+; RV32-NEXT:    add t0, sp, t0
+; RV32-NEXT:    addi t0, t0, 288
+; RV32-NEXT:    vs8r.v v24, (t0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vlse64.v v24, (a7), zero
+; RV32-NEXT:    csrr a7, vlenb
+; RV32-NEXT:    slli a7, a7, 7
+; RV32-NEXT:    add a7, sp, a7
+; RV32-NEXT:    addi a7, a7, 288
+; RV32-NEXT:    vs8r.v v24, (a7) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    csrr a7, vlenb
+; RV32-NEXT:    slli a7, a7, 4
+; RV32-NEXT:    mv t0, a7
+; RV32-NEXT:    slli a7, a7, 1
+; RV32-NEXT:    add t0, t0, a7
+; RV32-NEXT:    slli a7, a7, 2
+; RV32-NEXT:    add a7, a7, t0
+; RV32-NEXT:    add a7, sp, a7
+; RV32-NEXT:    addi a7, a7, 288
+; RV32-NEXT:    vl8r.v v24, (a7) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vand.vv v24, v16, v24
+; RV32-NEXT:    csrr a7, vlenb
+; RV32-NEXT:    slli a7, a7, 3
+; RV32-NEXT:    mv t0, a7
+; RV32-NEXT:    slli a7, a7, 1
+; RV32-NEXT:    add a7, a7, t0
+; RV32-NEXT:    add a7, sp, a7
+; RV32-NEXT:    addi a7, a7, 288
+; RV32-NEXT:    vs8r.v v24, (a7) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vand.vv v24, v16, v0
+; RV32-NEXT:    csrr a7, vlenb
+; RV32-NEXT:    slli a7, a7, 4
+; RV32-NEXT:    mv t0, a7
+; RV32-NEXT:    slli a7, a7, 2
+; RV32-NEXT:    add a7, a7, t0
+; RV32-NEXT:    add a7, sp, a7
+; RV32-NEXT:    addi a7, a7, 288
+; RV32-NEXT:    vs8r.v v24, (a7) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    csrr a7, vlenb
+; RV32-NEXT:    slli a7, a7, 3
+; RV32-NEXT:    mv t0, a7
+; RV32-NEXT:    slli a7, a7, 1
+; RV32-NEXT:    add t0, t0, a7
+; RV32-NEXT:    slli a7, a7, 1
+; RV32-NEXT:    add t0, t0, a7
+; RV32-NEXT:    slli a7, a7, 2
+; RV32-NEXT:    add a7, a7, t0
+; RV32-NEXT:    add a7, sp, a7
+; RV32-NEXT:    addi a7, a7, 288
+; RV32-NEXT:    vl8r.v v24, (a7) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vand.vv v24, v16, v24
+; RV32-NEXT:    csrr a7, vlenb
+; RV32-NEXT:    slli a7, a7, 3
+; RV32-NEXT:    mv t0, a7
+; RV32-NEXT:    slli a7, a7, 4
+; RV32-NEXT:    add a7, a7, t0
+; RV32-NEXT:    add a7, sp, a7
+; RV32-NEXT:    addi a7, a7, 288
+; RV32-NEXT:    vs8r.v v24, (a7) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    csrr a7, vlenb
+; RV32-NEXT:    slli a7, a7, 7
+; RV32-NEXT:    add a7, sp, a7
+; RV32-NEXT:    addi a7, a7, 288
+; RV32-NEXT:    vl8r.v v24, (a7) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vand.vv v24, v16, v24
+; RV32-NEXT:    csrr a7, vlenb
+; RV32-NEXT:    slli a7, a7, 3
+; RV32-NEXT:    mv t0, a7
+; RV32-NEXT:    slli a7, a7, 1
+; RV32-NEXT:    add t0, t0, a7
+; RV32-NEXT:    slli a7, a7, 1
+; RV32-NEXT:    add t0, t0, a7
+; RV32-NEXT:    slli a7, a7, 2
+; RV32-NEXT:    add a7, a7, t0
+; RV32-NEXT:    add a7, sp, a7
+; RV32-NEXT:    addi a7, a7, 288
+; RV32-NEXT:    vs8r.v v24, (a7) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vlse64.v v24, (a6), zero
+; RV32-NEXT:    csrr a6, vlenb
+; RV32-NEXT:    slli a6, a6, 7
+; RV32-NEXT:    add a6, sp, a6
+; RV32-NEXT:    addi a6, a6, 288
+; RV32-NEXT:    vs8r.v v24, (a6) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vlse64.v v0, (a5), zero
+; RV32-NEXT:    vlse64.v v24, (a4), zero
+; RV32-NEXT:    csrr a4, vlenb
+; RV32-NEXT:    slli a4, a4, 4
+; RV32-NEXT:    mv a5, a4
+; RV32-NEXT:    slli a4, a4, 1
+; RV32-NEXT:    add a5, a5, a4
+; RV32-NEXT:    slli a4, a4, 2
+; RV32-NEXT:    add a4, a4, a5
+; RV32-NEXT:    add a4, sp, a4
+; RV32-NEXT:    addi a4, a4, 288
+; RV32-NEXT:    vs8r.v v24, (a4) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vlse64.v v24, (a1), zero
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 3
+; RV32-NEXT:    mv a4, a1
+; RV32-NEXT:    slli a1, a1, 1
+; RV32-NEXT:    add a4, a4, a1
+; RV32-NEXT:    slli a1, a1, 1
+; RV32-NEXT:    add a4, a4, a1
+; RV32-NEXT:    slli a1, a1, 1
+; RV32-NEXT:    add a1, a1, a4
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 288
+; RV32-NEXT:    vs8r.v v24, (a1) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 7
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 288
+; RV32-NEXT:    vl8r.v v24, (a1) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vand.vv v24, v16, v24
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 4
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 288
+; RV32-NEXT:    vs8r.v v24, (a1) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vand.vv v24, v16, v0
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 3
+; RV32-NEXT:    mv a4, a1
+; RV32-NEXT:    slli a1, a1, 3
+; RV32-NEXT:    add a1, a1, a4
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 288
+; RV32-NEXT:    vs8r.v v24, (a1) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 4
+; RV32-NEXT:    mv a4, a1
+; RV32-NEXT:    slli a1, a1, 1
+; RV32-NEXT:    add a4, a4, a1
+; RV32-NEXT:    slli a1, a1, 2
+; RV32-NEXT:    add a1, a1, a4
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 288
+; RV32-NEXT:    vl8r.v v24, (a1) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vand.vv v24, v16, v24
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 7
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 288
+; RV32-NEXT:    vs8r.v v24, (a1) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 3
+; RV32-NEXT:    mv a4, a1
+; RV32-NEXT:    slli a1, a1, 1
+; RV32-NEXT:    add a4, a4, a1
+; RV32-NEXT:    slli a1, a1, 1
+; RV32-NEXT:    add a4, a4, a1
+; RV32-NEXT:    slli a1, a1, 1
+; RV32-NEXT:    add a1, a1, a4
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 288
+; RV32-NEXT:    vl8r.v v24, (a1) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vand.vv v24, v16, v24
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 4
+; RV32-NEXT:    mv a4, a1
+; RV32-NEXT:    slli a1, a1, 1
+; RV32-NEXT:    add a4, a4, a1
+; RV32-NEXT:    slli a1, a1, 2
+; RV32-NEXT:    add a1, a1, a4
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 288
+; RV32-NEXT:    vs8r.v v24, (a1) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vlse64.v v24, (s10), zero
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 6
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 288
+; RV32-NEXT:    vs8r.v v24, (a1) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vlse64.v v0, (a3), zero
+; RV32-NEXT:    vlse64.v v24, (a2), zero
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 3
+; RV32-NEXT:    mv a2, a1
+; RV32-NEXT:    slli a1, a1, 1
+; RV32-NEXT:    add a2, a2, a1
+; RV32-NEXT:    slli a1, a1, 1
+; RV32-NEXT:    add a2, a2, a1
+; RV32-NEXT:    slli a1, a1, 1
+; RV32-NEXT:    add a1, a1, a2
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 288
+; RV32-NEXT:    vs8r.v v24, (a1) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vlse64.v v24, (s7), zero
+; RV32-NEXT:    addi a1, sp, 288
+; RV32-NEXT:    vs8r.v v24, (a1) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 6
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 288
+; RV32-NEXT:    vl8r.v v24, (a1) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vand.vv v24, v16, v24
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 3
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 288
+; RV32-NEXT:    vs8r.v v24, (a1) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vand.vv v24, v16, v0
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 6
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 288
+; RV32-NEXT:    vs8r.v v24, (a1) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 3
+; RV32-NEXT:    mv a2, a1
+; RV32-NEXT:    slli a1, a1, 1
+; RV32-NEXT:    add a2, a2, a1
+; RV32-NEXT:    slli a1, a1, 1
+; RV32-NEXT:    add a2, a2, a1
+; RV32-NEXT:    slli a1, a1, 1
+; RV32-NEXT:    add a1, a1, a2
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 288
+; RV32-NEXT:    vl8r.v v24, (a1) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vand.vv v24, v16, v24
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 3
+; RV32-NEXT:    mv a2, a1
+; RV32-NEXT:    slli a1, a1, 1
+; RV32-NEXT:    add a2, a2, a1
+; RV32-NEXT:    slli a1, a1, 1
+; RV32-NEXT:    add a2, a2, a1
+; RV32-NEXT:    slli a1, a1, 1
+; RV32-NEXT:    add a1, a1, a2
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 288
+; RV32-NEXT:    vs8r.v v24, (a1) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    addi a1, sp, 288
+; RV32-NEXT:    vl8r.v v24, (a1) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vand.vv v0, v16, v24
+; RV32-NEXT:    vand.vx v16, v16, a0
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vxor.vv v16, v24, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 8
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vmul.vv v24, v8, v24
+; RV32-NEXT:    vxor.vv v16, v16, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vmul.vv v24, v8, v24
+; RV32-NEXT:    vxor.vv v16, v16, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vmul.vv v24, v8, v24
+; RV32-NEXT:    vxor.vv v16, v16, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vmul.vv v24, v8, v24
+; RV32-NEXT:    vxor.vv v16, v16, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vmul.vv v24, v8, v24
+; RV32-NEXT:    vxor.vv v16, v16, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vmul.vv v24, v8, v24
+; RV32-NEXT:    vxor.vv v16, v16, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vmul.vv v24, v8, v24
+; RV32-NEXT:    vxor.vv v16, v16, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vmul.vv v24, v8, v24
+; RV32-NEXT:    vxor.vv v16, v16, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vmul.vv v24, v8, v24
+; RV32-NEXT:    vxor.vv v16, v16, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vmul.vv v24, v8, v24
+; RV32-NEXT:    vxor.vv v16, v16, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vmul.vv v24, v8, v24
+; RV32-NEXT:    vxor.vv v16, v16, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vmul.vv v24, v8, v24
+; RV32-NEXT:    vxor.vv v16, v16, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vmul.vv v24, v8, v24
+; RV32-NEXT:    vxor.vv v16, v16, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vmul.vv v24, v8, v24
+; RV32-NEXT:    vxor.vv v16, v16, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vmul.vv v24, v8, v24
+; RV32-NEXT:    vxor.vv v16, v16, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vmul.vv v24, v8, v24
+; RV32-NEXT:    vxor.vv v16, v16, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vmul.vv v24, v8, v24
+; RV32-NEXT:    vxor.vv v16, v16, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vmul.vv v24, v8, v24
+; RV32-NEXT:    vxor.vv v16, v16, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vmul.vv v24, v8, v24
+; RV32-NEXT:    vxor.vv v16, v16, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vmul.vv v24, v8, v24
+; RV32-NEXT:    vxor.vv v16, v16, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 6
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vmul.vv v24, v8, v24
+; RV32-NEXT:    vxor.vv v16, v16, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vmul.vv v24, v8, v24
+; RV32-NEXT:    vxor.vv v16, v16, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vmul.vv v24, v8, v24
+; RV32-NEXT:    vxor.vv v16, v16, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vmul.vv v24, v8, v24
+; RV32-NEXT:    vxor.vv v16, v16, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vmul.vv v24, v8, v24
+; RV32-NEXT:    vxor.vv v16, v16, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vmul.vv v24, v8, v24
+; RV32-NEXT:    vxor.vv v16, v16, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vmul.vv v24, v8, v24
+; RV32-NEXT:    vxor.vv v16, v16, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 7
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vmul.vv v24, v8, v24
+; RV32-NEXT:    vxor.vv v16, v16, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vmul.vv v24, v8, v24
+; RV32-NEXT:    vxor.vv v16, v16, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vmul.vv v24, v8, v24
+; RV32-NEXT:    vxor.vv v16, v16, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 6
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vmul.vv v24, v8, v24
+; RV32-NEXT:    vxor.vv v16, v16, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vmul.vv v24, v8, v24
+; RV32-NEXT:    vxor.vv v16, v16, v24
+; RV32-NEXT:    vmul.vv v8, v8, v0
+; RV32-NEXT:    vxor.vv v8, v16, v8
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add sp, sp, a0
+; RV32-NEXT:    lw ra, 348(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s0, 344(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s1, 340(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s2, 336(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s3, 332(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s4, 328(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s5, 324(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s6, 320(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s7, 316(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s8, 312(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s9, 308(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s10, 304(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s11, 300(sp) # 4-byte Folded Reload
+; RV32-NEXT:    addi sp, sp, 352
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: clmul_nxv8i64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
+; RV64-NEXT:    vand.vi v24, v16, 2
+; RV64-NEXT:    vand.vi v0, v16, 1
+; RV64-NEXT:    vmul.vv v24, v8, v24
+; RV64-NEXT:    vmul.vv v0, v8, v0
+; RV64-NEXT:    vxor.vv v24, v0, v24
+; RV64-NEXT:    vand.vi v0, v16, 4
+; RV64-NEXT:    vmul.vv v0, v8, v0
+; RV64-NEXT:    vxor.vv v24, v24, v0
+; RV64-NEXT:    vand.vi v0, v16, 8
+; RV64-NEXT:    li a0, 16
+; RV64-NEXT:    li a1, 32
+; RV64-NEXT:    vmul.vv v0, v8, v0
+; RV64-NEXT:    vxor.vv v24, v24, v0
+; RV64-NEXT:    vand.vx v0, v16, a0
+; RV64-NEXT:    li a0, 64
+; RV64-NEXT:    vmul.vv v0, v8, v0
+; RV64-NEXT:    vxor.vv v24, v24, v0
+; RV64-NEXT:    vand.vx v0, v16, a1
+; RV64-NEXT:    li a1, 128
+; RV64-NEXT:    vmul.vv v0, v8, v0
+; RV64-NEXT:    vxor.vv v24, v24, v0
+; RV64-NEXT:    vand.vx v0, v16, a0
+; RV64-NEXT:    li a0, 256
+; RV64-NEXT:    vmul.vv v0, v8, v0
+; RV64-NEXT:    vxor.vv v24, v24, v0
+; RV64-NEXT:    vand.vx v0, v16, a1
+; RV64-NEXT:    li a1, 512
+; RV64-NEXT:    vmul.vv v0, v8, v0
+; RV64-NEXT:    vxor.vv v24, v24, v0
+; RV64-NEXT:    vand.vx v0, v16, a0
+; RV64-NEXT:    li a2, 1024
+; RV64-NEXT:    vmul.vv v0, v8, v0
+; RV64-NEXT:    vxor.vv v24, v24, v0
+; RV64-NEXT:    vand.vx v0, v16, a1
+; RV64-NEXT:    li a0, 1
+; RV64-NEXT:    vmul.vv v0, v8, v0
+; RV64-NEXT:    vxor.vv v24, v24, v0
+; RV64-NEXT:    vand.vx v0, v16, a2
+; RV64-NEXT:    slli a1, a0, 11
+; RV64-NEXT:    vmul.vv v0, v8, v0
+; RV64-NEXT:    vxor.vv v24, v24, v0
+; RV64-NEXT:    vand.vx v0, v16, a1
+; RV64-NEXT:    lui a1, 1
+; RV64-NEXT:    vmul.vv v0, v8, v0
+; RV64-NEXT:    vxor.vv v24, v24, v0
+; RV64-NEXT:    vand.vx v0, v16, a1
+; RV64-NEXT:    lui a1, 2
+; RV64-NEXT:    vmul.vv v0, v8, v0
+; RV64-NEXT:    vxor.vv v24, v24, v0
+; RV64-NEXT:    vand.vx v0, v16, a1
+; RV64-NEXT:    lui a1, 4
+; RV64-NEXT:    vmul.vv v0, v8, v0
+; RV64-NEXT:    vxor.vv v24, v24, v0
+; RV64-NEXT:    vand.vx v0, v16, a1
+; RV64-NEXT:    lui a1, 8
+; RV64-NEXT:    vmul.vv v0, v8, v0
+; RV64-NEXT:    vxor.vv v24, v24, v0
+; RV64-NEXT:    vand.vx v0, v16, a1
+; RV64-NEXT:    lui a1, 16
+; RV64-NEXT:    vmul.vv v0, v8, v0
+; RV64-NEXT:    vxor.vv v24, v24, v0
+; RV64-NEXT:    vand.vx v0, v16, a1
+; RV64-NEXT:    lui a1, 32
+; RV64-NEXT:    vmul.vv v0, v8, v0
+; RV64-NEXT:    vxor.vv v24, v24, v0
+; RV64-NEXT:    vand.vx v0, v16, a1
+; RV64-NEXT:    lui a1, 64
+; RV64-NEXT:    vmul.vv v0, v8, v0
+; RV64-NEXT:    vxor.vv v24, v24, v0
+; RV64-NEXT:    vand.vx v0, v16, a1
+; RV64-NEXT:    lui a1, 128
+; RV64-NEXT:    vmul.vv v0, v8, v0
+; RV64-NEXT:    vxor.vv v24, v24, v0
+; RV64-NEXT:    vand.vx v0, v16, a1
+; RV64-NEXT:    lui a1, 256
+; RV64-NEXT:    vmul.vv v0, v8, v0
+; RV64-NEXT:    vxor.vv v24, v24, v0
+; RV64-NEXT:    vand.vx v0, v16, a1
+; RV64-NEXT:    lui a1, 512
+; RV64-NEXT:    vmul.vv v0, v8, v0
+; RV64-NEXT:    vxor.vv v24, v24, v0
+; RV64-NEXT:    vand.vx v0, v16, a1
+; RV64-NEXT:    lui a1, 1024
+; RV64-NEXT:    vmul.vv v0, v8, v0
+; RV64-NEXT:    vxor.vv v24, v24, v0
+; RV64-NEXT:    vand.vx v0, v16, a1
+; RV64-NEXT:    lui a1, 2048
+; RV64-NEXT:    vmul.vv v0, v8, v0
+; RV64-NEXT:    vxor.vv v24, v24, v0
+; RV64-NEXT:    vand.vx v0, v16, a1
+; RV64-NEXT:    lui a1, 4096
+; RV64-NEXT:    vmul.vv v0, v8, v0
+; RV64-NEXT:    vxor.vv v24, v24, v0
+; RV64-NEXT:    vand.vx v0, v16, a1
+; RV64-NEXT:    lui a1, 8192
+; RV64-NEXT:    vmul.vv v0, v8, v0
+; RV64-NEXT:    vxor.vv v24, v24, v0
+; RV64-NEXT:    vand.vx v0, v16, a1
+; RV64-NEXT:    lui a1, 16384
+; RV64-NEXT:    vmul.vv v0, v8, v0
+; RV64-NEXT:    vxor.vv v24, v24, v0
+; RV64-NEXT:    vand.vx v0, v16, a1
+; RV64-NEXT:    lui a1, 32768
+; RV64-NEXT:    vmul.vv v0, v8, v0
+; RV64-NEXT:    vxor.vv v24, v24, v0
+; RV64-NEXT:    vand.vx v0, v16, a1
+; RV64-NEXT:    lui a1, 65536
+; RV64-NEXT:    vmul.vv v0, v8, v0
+; RV64-NEXT:    vxor.vv v24, v24, v0
+; RV64-NEXT:    vand.vx v0, v16, a1
+; RV64-NEXT:    lui a1, 131072
+; RV64-NEXT:    vmul.vv v0, v8, v0
+; RV64-NEXT:    vxor.vv v24, v24, v0
+; RV64-NEXT:    vand.vx v0, v16, a1
+; RV64-NEXT:    lui a1, 262144
+; RV64-NEXT:    vmul.vv v0, v8, v0
+; RV64-NEXT:    vxor.vv v24, v24, v0
+; RV64-NEXT:    vand.vx v0, v16, a1
+; RV64-NEXT:    slli a1, a0, 31
+; RV64-NEXT:    vmul.vv v0, v8, v0
+; RV64-NEXT:    vxor.vv v24, v24, v0
+; RV64-NEXT:    vand.vx v0, v16, a1
+; RV64-NEXT:    slli a1, a0, 32
+; RV64-NEXT:    vmul.vv v0, v8, v0
+; RV64-NEXT:    vxor.vv v24, v24, v0
+; RV64-NEXT:    vand.vx v0, v16, a1
+; RV64-NEXT:    slli a1, a0, 33
+; RV64-NEXT:    vmul.vv v0, v8, v0
+; RV64-NEXT:    vxor.vv v24, v24, v0
+; RV64-NEXT:    vand.vx v0, v16, a1
+; RV64-NEXT:    slli a1, a0, 34
+; RV64-NEXT:    vmul.vv v0, v8, v0
+; RV64-NEXT:    vxor.vv v24, v24, v0
+; RV64-NEXT:    vand.vx v0, v16, a1
+; RV64-NEXT:    slli a1, a0, 35
+; RV64-NEXT:    vmul.vv v0, v8, v0
+; RV64-NEXT:    vxor.vv v24, v24, v0
+; RV64-NEXT:    vand.vx v0, v16, a1
+; RV64-NEXT:    slli a1, a0, 36
+; RV64-NEXT:    vmul.vv v0, v8, v0
+; RV64-NEXT:    vxor.vv v24, v24, v0
+; RV64-NEXT:    vand.vx v0, v16, a1
+; RV64-NEXT:    slli a1, a0, 37
+; RV64-NEXT:    vmul.vv v0, v8, v0
+; RV64-NEXT:    vxor.vv v24, v24, v0
+; RV64-NEXT:    vand.vx v0, v16, a1
+; RV64-NEXT:    slli a1, a0, 38
+; RV64-NEXT:    vmul.vv v0, v8, v0
+; RV64-NEXT:    vxor.vv v24, v24, v0
+; RV64-NEXT:    vand.vx v0, v16, a1
+; RV64-NEXT:    slli a1, a0, 39
+; RV64-NEXT:    vmul.vv v0, v8, v0
+; RV64-NEXT:    vxor.vv v24, v24, v0
+; RV64-NEXT:    vand.vx v0, v16, a1
+; RV64-NEXT:    slli a1, a0, 40
+; RV64-NEXT:    vmul.vv v0, v8, v0
+; RV64-NEXT:    vxor.vv v24, v24, v0
+; RV64-NEXT:    vand.vx v0, v16, a1
+; RV64-NEXT:    slli a1, a0, 41
+; RV64-NEXT:    vmul.vv v0, v8, v0
+; RV64-NEXT:    vxor.vv v24, v24, v0
+; RV64-NEXT:    vand.vx v0, v16, a1
+; RV64-NEXT:    slli a1, a0, 42
+; RV64-NEXT:    vmul.vv v0, v8, v0
+; RV64-NEXT:    vxor.vv v24, v24, v0
+; RV64-NEXT:    vand.vx v0, v16, a1
+; RV64-NEXT:    slli a1, a0, 43
+; RV64-NEXT:    vmul.vv v0, v8, v0
+; RV64-NEXT:    vxor.vv v24, v24, v0
+; RV64-NEXT:    vand.vx v0, v16, a1
+; RV64-NEXT:    slli a1, a0, 44
+; RV64-NEXT:    vmul.vv v0, v8, v0
+; RV64-NEXT:    vxor.vv v24, v24, v0
+; RV64-NEXT:    vand.vx v0, v16, a1
+; RV64-NEXT:    slli a1, a0, 45
+; RV64-NEXT:    vmul.vv v0, v8, v0
+; RV64-NEXT:    vxor.vv v24, v24, v0
+; RV64-NEXT:    vand.vx v0, v16, a1
+; RV64-NEXT:    slli a1, a0, 46
+; RV64-NEXT:    vmul.vv v0, v8, v0
+; RV64-NEXT:    vxor.vv v24, v24, v0
+; RV64-NEXT:    vand.vx v0, v16, a1
+; RV64-NEXT:    slli a1, a0, 47
+; RV64-NEXT:    vmul.vv v0, v8, v0
+; RV64-NEXT:    vxor.vv v24, v24, v0
+; RV64-NEXT:    vand.vx v0, v16, a1
+; RV64-NEXT:    slli a1, a0, 48
+; RV64-NEXT:    vmul.vv v0, v8, v0
+; RV64-NEXT:    vxor.vv v24, v24, v0
+; RV64-NEXT:    vand.vx v0, v16, a1
+; RV64-NEXT:    slli a1, a0, 49
+; RV64-NEXT:    vmul.vv v0, v8, v0
+; RV64-NEXT:    vxor.vv v24, v24, v0
+; RV64-NEXT:    vand.vx v0, v16, a1
+; RV64-NEXT:    slli a1, a0, 50
+; RV64-NEXT:    vmul.vv v0, v8, v0
+; RV64-NEXT:    vxor.vv v24, v24, v0
+; RV64-NEXT:    vand.vx v0, v16, a1
+; RV64-NEXT:    slli a1, a0, 51
+; RV64-NEXT:    vmul.vv v0, v8, v0
+; RV64-NEXT:    vxor.vv v24, v24, v0
+; RV64-NEXT:    vand.vx v0, v16, a1
+; RV64-NEXT:    slli a1, a0, 52
+; RV64-NEXT:    vmul.vv v0, v8, v0
+; RV64-NEXT:    vxor.vv v24, v24, v0
+; RV64-NEXT:    vand.vx v0, v16, a1
+; RV64-NEXT:    slli a1, a0, 53
+; RV64-NEXT:    vmul.vv v0, v8, v0
+; RV64-NEXT:    vxor.vv v24, v24, v0
+; RV64-NEXT:    vand.vx v0, v16, a1
+; RV64-NEXT:    slli a1, a0, 54
+; RV64-NEXT:    vmul.vv v0, v8, v0
+; RV64-NEXT:    vxor.vv v24, v24, v0
+; RV64-NEXT:    vand.vx v0, v16, a1
+; RV64-NEXT:    slli a1, a0, 55
+; RV64-NEXT:    vmul.vv v0, v8, v0
+; RV64-NEXT:    vxor.vv v24, v24, v0
+; RV64-NEXT:    vand.vx v0, v16, a1
+; RV64-NEXT:    slli a1, a0, 56
+; RV64-NEXT:    vmul.vv v0, v8, v0
+; RV64-NEXT:    vxor.vv v24, v24, v0
+; RV64-NEXT:    vand.vx v0, v16, a1
+; RV64-NEXT:    slli a1, a0, 57
+; RV64-NEXT:    vmul.vv v0, v8, v0
+; RV64-NEXT:    vxor.vv v24, v24, v0
+; RV64-NEXT:    vand.vx v0, v16, a1
+; RV64-NEXT:    slli a1, a0, 58
+; RV64-NEXT:    vmul.vv v0, v8, v0
+; RV64-NEXT:    vxor.vv v24, v24, v0
+; RV64-NEXT:    vand.vx v0, v16, a1
+; RV64-NEXT:    slli a1, a0, 59
+; RV64-NEXT:    vmul.vv v0, v8, v0
+; RV64-NEXT:    vxor.vv v24, v24, v0
+; RV64-NEXT:    vand.vx v0, v16, a1
+; RV64-NEXT:    slli a1, a0, 60
+; RV64-NEXT:    vmul.vv v0, v8, v0
+; RV64-NEXT:    vxor.vv v24, v24, v0
+; RV64-NEXT:    vand.vx v0, v16, a1
+; RV64-NEXT:    slli a1, a0, 61
+; RV64-NEXT:    vmul.vv v0, v8, v0
+; RV64-NEXT:    vxor.vv v24, v24, v0
+; RV64-NEXT:    vand.vx v0, v16, a1
+; RV64-NEXT:    li a1, -1
+; RV64-NEXT:    slli a0, a0, 62
+; RV64-NEXT:    slli a1, a1, 63
+; RV64-NEXT:    vmul.vv v0, v8, v0
+; RV64-NEXT:    vxor.vv v24, v24, v0
+; RV64-NEXT:    vand.vx v0, v16, a0
+; RV64-NEXT:    vand.vx v16, v16, a1
+; RV64-NEXT:    vmul.vv v0, v8, v0
+; RV64-NEXT:    vxor.vv v24, v24, v0
+; RV64-NEXT:    vmul.vv v8, v8, v16
+; RV64-NEXT:    vxor.vv v8, v24, v8
+; RV64-NEXT:    ret
+  %a = call <vscale x 8 x i64> @llvm.clmul.nxv8i64(<vscale x 8 x i64> %x, <vscale x 8 x i64> %y)
+  ret <vscale x 8 x i64> %a
+}
+
+define <vscale x 1 x i32> @clmulr_nxv1i32(<vscale x 1 x i32> %x, <vscale x 1 x i32> %y) nounwind {
+; CHECK-LABEL: clmulr_nxv1i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    sub sp, sp, a0
+; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
+; CHECK-NEXT:    vsrl.vi v9, v8, 8
+; CHECK-NEXT:    lui a4, 16
+; CHECK-NEXT:    vsrl.vi v10, v8, 24
+; CHECK-NEXT:    vsll.vi v11, v8, 24
+; CHECK-NEXT:    lui a0, 61681
+; CHECK-NEXT:    lui a1, 209715
+; CHECK-NEXT:    lui a5, 349525
+; CHECK-NEXT:    li a6, 16
+; CHECK-NEXT:    addi a3, a4, -256
+; CHECK-NEXT:    addi a2, a0, -241
+; CHECK-NEXT:    addi a1, a1, 819
+; CHECK-NEXT:    addi a0, a5, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a3
+; CHECK-NEXT:    vand.vx v8, v8, a3
+; CHECK-NEXT:    vor.vv v9, v9, v10
+; CHECK-NEXT:    vsll.vi v8, v8, 8
+; CHECK-NEXT:    vor.vv v8, v11, v8
+; CHECK-NEXT:    vor.vv v8, v8, v9
+; CHECK-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-NEXT:    vand.vx v8, v8, a2
+; CHECK-NEXT:    vand.vx v9, v9, a2
+; CHECK-NEXT:    vsll.vi v8, v8, 4
+; CHECK-NEXT:    vor.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 2
+; CHECK-NEXT:    vand.vx v8, v8, a1
+; CHECK-NEXT:    vand.vx v9, v9, a1
+; CHECK-NEXT:    vsll.vi v8, v8, 2
+; CHECK-NEXT:    vor.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vadd.vv v8, v8, v8
+; CHECK-NEXT:    vor.vv v8, v9, v8
+; CHECK-NEXT:    vand.vx v9, v8, a6
+; CHECK-NEXT:    li a5, 32
+; CHECK-NEXT:    vand.vx v10, v8, a5
+; CHECK-NEXT:    li a5, 64
+; CHECK-NEXT:    vand.vx v11, v8, a5
+; CHECK-NEXT:    li a5, 128
+; CHECK-NEXT:    vand.vx v12, v8, a5
+; CHECK-NEXT:    li a5, 256
+; CHECK-NEXT:    vand.vx v13, v8, a5
+; CHECK-NEXT:    li a5, 512
+; CHECK-NEXT:    vand.vx v14, v8, a5
+; CHECK-NEXT:    li a5, 1024
+; CHECK-NEXT:    vand.vx v15, v8, a5
+; CHECK-NEXT:    li a5, 1
+; CHECK-NEXT:    slli a5, a5, 11
+; CHECK-NEXT:    vand.vx v16, v8, a5
+; CHECK-NEXT:    lui a5, 1
+; CHECK-NEXT:    vand.vx v17, v8, a5
+; CHECK-NEXT:    lui a5, 2
+; CHECK-NEXT:    vand.vx v18, v8, a5
+; CHECK-NEXT:    lui a5, 4
+; CHECK-NEXT:    vand.vx v19, v8, a5
+; CHECK-NEXT:    lui a5, 8
+; CHECK-NEXT:    vand.vx v20, v8, a5
+; CHECK-NEXT:    lui a5, 32
+; CHECK-NEXT:    vand.vx v21, v8, a4
+; CHECK-NEXT:    lui a4, 64
+; CHECK-NEXT:    vand.vx v22, v8, a5
+; CHECK-NEXT:    lui a5, 128
+; CHECK-NEXT:    vand.vx v23, v8, a4
+; CHECK-NEXT:    lui a4, 256
+; CHECK-NEXT:    vand.vx v24, v8, a5
+; CHECK-NEXT:    lui a5, 512
+; CHECK-NEXT:    vand.vx v25, v8, a4
+; CHECK-NEXT:    lui a4, 1024
+; CHECK-NEXT:    vand.vx v26, v8, a5
+; CHECK-NEXT:    lui a5, 2048
+; CHECK-NEXT:    vand.vx v27, v8, a4
+; CHECK-NEXT:    lui a4, 4096
+; CHECK-NEXT:    vand.vx v28, v8, a5
+; CHECK-NEXT:    lui a5, 8192
+; CHECK-NEXT:    vand.vx v29, v8, a4
+; CHECK-NEXT:    lui a4, 16384
+; CHECK-NEXT:    vand.vx v30, v8, a5
+; CHECK-NEXT:    lui a5, 32768
+; CHECK-NEXT:    vand.vx v31, v8, a4
+; CHECK-NEXT:    lui a4, 65536
+; CHECK-NEXT:    vand.vx v7, v8, a5
+; CHECK-NEXT:    lui a5, 131072
+; CHECK-NEXT:    vand.vx v6, v8, a4
+; CHECK-NEXT:    lui a4, 262144
+; CHECK-NEXT:    vand.vx v5, v8, a5
+; CHECK-NEXT:    lui a5, 524288
+; CHECK-NEXT:    vand.vi v4, v8, 2
+; CHECK-NEXT:    vand.vi v3, v8, 1
+; CHECK-NEXT:    vand.vi v2, v8, 4
+; CHECK-NEXT:    vand.vi v1, v8, 8
+; CHECK-NEXT:    vand.vx v0, v8, a4
+; CHECK-NEXT:    vmul.vv v4, v8, v4
+; CHECK-NEXT:    vmul.vv v3, v8, v3
+; CHECK-NEXT:    vmul.vv v2, v8, v2
+; CHECK-NEXT:    vmul.vv v1, v8, v1
+; CHECK-NEXT:    vmul.vv v9, v8, v9
+; CHECK-NEXT:    vmul.vv v10, v8, v10
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vmul.vv v12, v8, v12
+; CHECK-NEXT:    vmul.vv v13, v8, v13
+; CHECK-NEXT:    vmul.vv v14, v8, v14
+; CHECK-NEXT:    vmul.vv v15, v8, v15
+; CHECK-NEXT:    vmul.vv v16, v8, v16
+; CHECK-NEXT:    vmul.vv v17, v8, v17
+; CHECK-NEXT:    vmul.vv v18, v8, v18
+; CHECK-NEXT:    vmul.vv v19, v8, v19
+; CHECK-NEXT:    vmul.vv v20, v8, v20
+; CHECK-NEXT:    vmul.vv v21, v8, v21
+; CHECK-NEXT:    vmul.vv v22, v8, v22
+; CHECK-NEXT:    vmul.vv v23, v8, v23
+; CHECK-NEXT:    vmul.vv v24, v8, v24
+; CHECK-NEXT:    vmul.vv v25, v8, v25
+; CHECK-NEXT:    vmul.vv v26, v8, v26
+; CHECK-NEXT:    vmul.vv v27, v8, v27
+; CHECK-NEXT:    vmul.vv v28, v8, v28
+; CHECK-NEXT:    vmul.vv v29, v8, v29
+; CHECK-NEXT:    vmul.vv v30, v8, v30
+; CHECK-NEXT:    vmul.vv v31, v8, v31
+; CHECK-NEXT:    vmul.vv v7, v8, v7
+; CHECK-NEXT:    vmul.vv v6, v8, v6
+; CHECK-NEXT:    vmul.vv v5, v8, v5
+; CHECK-NEXT:    vmul.vv v0, v8, v0
+; CHECK-NEXT:    addi a4, sp, 16
+; CHECK-NEXT:    vs1r.v v0, (a4) # vscale x 8-byte Folded Spill
+; CHECK-NEXT:    vand.vx v0, v8, a5
+; CHECK-NEXT:    vmul.vv v8, v8, v0
+; CHECK-NEXT:    vxor.vv v4, v3, v4
+; CHECK-NEXT:    vxor.vv v4, v4, v2
+; CHECK-NEXT:    vxor.vv v4, v4, v1
+; CHECK-NEXT:    vxor.vv v9, v4, v9
+; CHECK-NEXT:    vxor.vv v9, v9, v10
+; CHECK-NEXT:    vxor.vv v9, v9, v11
+; CHECK-NEXT:    vxor.vv v9, v9, v12
+; CHECK-NEXT:    vxor.vv v10, v9, v13
+; CHECK-NEXT:    vxor.vv v10, v10, v14
+; CHECK-NEXT:    vxor.vv v10, v10, v15
+; CHECK-NEXT:    vxor.vv v10, v10, v16
+; CHECK-NEXT:    vxor.vv v10, v10, v17
+; CHECK-NEXT:    vxor.vv v10, v10, v18
+; CHECK-NEXT:    vxor.vv v10, v10, v19
+; CHECK-NEXT:    vxor.vv v10, v10, v20
+; CHECK-NEXT:    vxor.vv v10, v10, v21
+; CHECK-NEXT:    vxor.vv v10, v10, v22
+; CHECK-NEXT:    vxor.vv v10, v10, v23
+; CHECK-NEXT:    vxor.vv v10, v10, v24
+; CHECK-NEXT:    vxor.vv v10, v10, v25
+; CHECK-NEXT:    vxor.vv v10, v10, v26
+; CHECK-NEXT:    vxor.vv v10, v10, v27
+; CHECK-NEXT:    vxor.vv v10, v10, v28
+; CHECK-NEXT:    vsll.vi v9, v9, 24
+; CHECK-NEXT:    vxor.vv v11, v10, v29
+; CHECK-NEXT:    vxor.vv v11, v11, v30
+; CHECK-NEXT:    vand.vx v12, v10, a3
+; CHECK-NEXT:    vsll.vi v12, v12, 8
+; CHECK-NEXT:    vor.vv v9, v9, v12
+; CHECK-NEXT:    vxor.vv v11, v11, v31
+; CHECK-NEXT:    vxor.vv v11, v11, v7
+; CHECK-NEXT:    vxor.vv v11, v11, v6
+; CHECK-NEXT:    vxor.vv v11, v11, v5
+; CHECK-NEXT:    vsrl.vi v10, v10, 8
+; CHECK-NEXT:    vand.vx v10, v10, a3
+; CHECK-NEXT:    addi a3, sp, 16
+; CHECK-NEXT:    vl1r.v v12, (a3) # vscale x 8-byte Folded Reload
+; CHECK-NEXT:    vxor.vv v11, v11, v12
+; CHECK-NEXT:    vxor.vv v8, v11, v8
+; CHECK-NEXT:    vsrl.vi v8, v8, 24
+; CHECK-NEXT:    vor.vv v8, v10, v8
+; CHECK-NEXT:    vor.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-NEXT:    vand.vx v8, v8, a2
+; CHECK-NEXT:    vand.vx v9, v9, a2
+; CHECK-NEXT:    vsll.vi v8, v8, 4
+; CHECK-NEXT:    vor.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 2
+; CHECK-NEXT:    vand.vx v8, v8, a1
+; CHECK-NEXT:    vand.vx v9, v9, a1
+; CHECK-NEXT:    vsll.vi v8, v8, 2
+; CHECK-NEXT:    vor.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vadd.vv v8, v8, v8
+; CHECK-NEXT:    vor.vv v8, v9, v8
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    add sp, sp, a0
+; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    ret
+  %a = call <vscale x 1 x i32> @llvm.clmulr.nxv1i32(<vscale x 1 x i32> %x, <vscale x 1 x i32> %y)
+  ret <vscale x 1 x i32> %a
+}
+
+define <vscale x 2 x i32> @clmulr_nxv2i32(<vscale x 2 x i32> %x, <vscale x 2 x i32> %y) nounwind {
+; CHECK-LABEL: clmulr_nxv2i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    sub sp, sp, a0
+; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
+; CHECK-NEXT:    vsrl.vi v9, v8, 8
+; CHECK-NEXT:    lui a4, 16
+; CHECK-NEXT:    vsrl.vi v10, v8, 24
+; CHECK-NEXT:    vsll.vi v11, v8, 24
+; CHECK-NEXT:    lui a0, 61681
+; CHECK-NEXT:    lui a1, 209715
+; CHECK-NEXT:    lui a5, 349525
+; CHECK-NEXT:    li a6, 16
+; CHECK-NEXT:    addi a3, a4, -256
+; CHECK-NEXT:    addi a2, a0, -241
+; CHECK-NEXT:    addi a1, a1, 819
+; CHECK-NEXT:    addi a0, a5, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a3
+; CHECK-NEXT:    vand.vx v8, v8, a3
+; CHECK-NEXT:    vor.vv v9, v9, v10
+; CHECK-NEXT:    vsll.vi v8, v8, 8
+; CHECK-NEXT:    vor.vv v8, v11, v8
+; CHECK-NEXT:    vor.vv v8, v8, v9
+; CHECK-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-NEXT:    vand.vx v8, v8, a2
+; CHECK-NEXT:    vand.vx v9, v9, a2
+; CHECK-NEXT:    vsll.vi v8, v8, 4
+; CHECK-NEXT:    vor.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 2
+; CHECK-NEXT:    vand.vx v8, v8, a1
+; CHECK-NEXT:    vand.vx v9, v9, a1
+; CHECK-NEXT:    vsll.vi v8, v8, 2
+; CHECK-NEXT:    vor.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vadd.vv v8, v8, v8
+; CHECK-NEXT:    vor.vv v8, v9, v8
+; CHECK-NEXT:    vand.vx v9, v8, a6
+; CHECK-NEXT:    li a5, 32
+; CHECK-NEXT:    vand.vx v10, v8, a5
+; CHECK-NEXT:    li a5, 64
+; CHECK-NEXT:    vand.vx v11, v8, a5
+; CHECK-NEXT:    li a5, 128
+; CHECK-NEXT:    vand.vx v12, v8, a5
+; CHECK-NEXT:    li a5, 256
+; CHECK-NEXT:    vand.vx v13, v8, a5
+; CHECK-NEXT:    li a5, 512
+; CHECK-NEXT:    vand.vx v14, v8, a5
+; CHECK-NEXT:    li a5, 1024
+; CHECK-NEXT:    vand.vx v15, v8, a5
+; CHECK-NEXT:    li a5, 1
+; CHECK-NEXT:    slli a5, a5, 11
+; CHECK-NEXT:    vand.vx v16, v8, a5
+; CHECK-NEXT:    lui a5, 1
+; CHECK-NEXT:    vand.vx v17, v8, a5
+; CHECK-NEXT:    lui a5, 2
+; CHECK-NEXT:    vand.vx v18, v8, a5
+; CHECK-NEXT:    lui a5, 4
+; CHECK-NEXT:    vand.vx v19, v8, a5
+; CHECK-NEXT:    lui a5, 8
+; CHECK-NEXT:    vand.vx v20, v8, a5
+; CHECK-NEXT:    lui a5, 32
+; CHECK-NEXT:    vand.vx v21, v8, a4
+; CHECK-NEXT:    lui a4, 64
+; CHECK-NEXT:    vand.vx v22, v8, a5
+; CHECK-NEXT:    lui a5, 128
+; CHECK-NEXT:    vand.vx v23, v8, a4
+; CHECK-NEXT:    lui a4, 256
+; CHECK-NEXT:    vand.vx v24, v8, a5
+; CHECK-NEXT:    lui a5, 512
+; CHECK-NEXT:    vand.vx v25, v8, a4
+; CHECK-NEXT:    lui a4, 1024
+; CHECK-NEXT:    vand.vx v26, v8, a5
+; CHECK-NEXT:    lui a5, 2048
+; CHECK-NEXT:    vand.vx v27, v8, a4
+; CHECK-NEXT:    lui a4, 4096
+; CHECK-NEXT:    vand.vx v28, v8, a5
+; CHECK-NEXT:    lui a5, 8192
+; CHECK-NEXT:    vand.vx v29, v8, a4
+; CHECK-NEXT:    lui a4, 16384
+; CHECK-NEXT:    vand.vx v30, v8, a5
+; CHECK-NEXT:    lui a5, 32768
+; CHECK-NEXT:    vand.vx v31, v8, a4
+; CHECK-NEXT:    lui a4, 65536
+; CHECK-NEXT:    vand.vx v7, v8, a5
+; CHECK-NEXT:    lui a5, 131072
+; CHECK-NEXT:    vand.vx v6, v8, a4
+; CHECK-NEXT:    lui a4, 262144
+; CHECK-NEXT:    vand.vx v5, v8, a5
+; CHECK-NEXT:    lui a5, 524288
+; CHECK-NEXT:    vand.vi v4, v8, 2
+; CHECK-NEXT:    vand.vi v3, v8, 1
+; CHECK-NEXT:    vand.vi v2, v8, 4
+; CHECK-NEXT:    vand.vi v1, v8, 8
+; CHECK-NEXT:    vand.vx v0, v8, a4
+; CHECK-NEXT:    vmul.vv v4, v8, v4
+; CHECK-NEXT:    vmul.vv v3, v8, v3
+; CHECK-NEXT:    vmul.vv v2, v8, v2
+; CHECK-NEXT:    vmul.vv v1, v8, v1
+; CHECK-NEXT:    vmul.vv v9, v8, v9
+; CHECK-NEXT:    vmul.vv v10, v8, v10
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vmul.vv v12, v8, v12
+; CHECK-NEXT:    vmul.vv v13, v8, v13
+; CHECK-NEXT:    vmul.vv v14, v8, v14
+; CHECK-NEXT:    vmul.vv v15, v8, v15
+; CHECK-NEXT:    vmul.vv v16, v8, v16
+; CHECK-NEXT:    vmul.vv v17, v8, v17
+; CHECK-NEXT:    vmul.vv v18, v8, v18
+; CHECK-NEXT:    vmul.vv v19, v8, v19
+; CHECK-NEXT:    vmul.vv v20, v8, v20
+; CHECK-NEXT:    vmul.vv v21, v8, v21
+; CHECK-NEXT:    vmul.vv v22, v8, v22
+; CHECK-NEXT:    vmul.vv v23, v8, v23
+; CHECK-NEXT:    vmul.vv v24, v8, v24
+; CHECK-NEXT:    vmul.vv v25, v8, v25
+; CHECK-NEXT:    vmul.vv v26, v8, v26
+; CHECK-NEXT:    vmul.vv v27, v8, v27
+; CHECK-NEXT:    vmul.vv v28, v8, v28
+; CHECK-NEXT:    vmul.vv v29, v8, v29
+; CHECK-NEXT:    vmul.vv v30, v8, v30
+; CHECK-NEXT:    vmul.vv v31, v8, v31
+; CHECK-NEXT:    vmul.vv v7, v8, v7
+; CHECK-NEXT:    vmul.vv v6, v8, v6
+; CHECK-NEXT:    vmul.vv v5, v8, v5
+; CHECK-NEXT:    vmul.vv v0, v8, v0
+; CHECK-NEXT:    addi a4, sp, 16
+; CHECK-NEXT:    vs1r.v v0, (a4) # vscale x 8-byte Folded Spill
+; CHECK-NEXT:    vand.vx v0, v8, a5
+; CHECK-NEXT:    vmul.vv v8, v8, v0
+; CHECK-NEXT:    vxor.vv v4, v3, v4
+; CHECK-NEXT:    vxor.vv v4, v4, v2
+; CHECK-NEXT:    vxor.vv v4, v4, v1
+; CHECK-NEXT:    vxor.vv v9, v4, v9
+; CHECK-NEXT:    vxor.vv v9, v9, v10
+; CHECK-NEXT:    vxor.vv v9, v9, v11
+; CHECK-NEXT:    vxor.vv v9, v9, v12
+; CHECK-NEXT:    vxor.vv v10, v9, v13
+; CHECK-NEXT:    vxor.vv v10, v10, v14
+; CHECK-NEXT:    vxor.vv v10, v10, v15
+; CHECK-NEXT:    vxor.vv v10, v10, v16
+; CHECK-NEXT:    vxor.vv v10, v10, v17
+; CHECK-NEXT:    vxor.vv v10, v10, v18
+; CHECK-NEXT:    vxor.vv v10, v10, v19
+; CHECK-NEXT:    vxor.vv v10, v10, v20
+; CHECK-NEXT:    vxor.vv v10, v10, v21
+; CHECK-NEXT:    vxor.vv v10, v10, v22
+; CHECK-NEXT:    vxor.vv v10, v10, v23
+; CHECK-NEXT:    vxor.vv v10, v10, v24
+; CHECK-NEXT:    vxor.vv v10, v10, v25
+; CHECK-NEXT:    vxor.vv v10, v10, v26
+; CHECK-NEXT:    vxor.vv v10, v10, v27
+; CHECK-NEXT:    vxor.vv v10, v10, v28
+; CHECK-NEXT:    vsll.vi v9, v9, 24
+; CHECK-NEXT:    vxor.vv v11, v10, v29
+; CHECK-NEXT:    vxor.vv v11, v11, v30
+; CHECK-NEXT:    vand.vx v12, v10, a3
+; CHECK-NEXT:    vsll.vi v12, v12, 8
+; CHECK-NEXT:    vor.vv v9, v9, v12
+; CHECK-NEXT:    vxor.vv v11, v11, v31
+; CHECK-NEXT:    vxor.vv v11, v11, v7
+; CHECK-NEXT:    vxor.vv v11, v11, v6
+; CHECK-NEXT:    vxor.vv v11, v11, v5
+; CHECK-NEXT:    vsrl.vi v10, v10, 8
+; CHECK-NEXT:    vand.vx v10, v10, a3
+; CHECK-NEXT:    addi a3, sp, 16
+; CHECK-NEXT:    vl1r.v v12, (a3) # vscale x 8-byte Folded Reload
+; CHECK-NEXT:    vxor.vv v11, v11, v12
+; CHECK-NEXT:    vxor.vv v8, v11, v8
+; CHECK-NEXT:    vsrl.vi v8, v8, 24
+; CHECK-NEXT:    vor.vv v8, v10, v8
+; CHECK-NEXT:    vor.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-NEXT:    vand.vx v8, v8, a2
+; CHECK-NEXT:    vand.vx v9, v9, a2
+; CHECK-NEXT:    vsll.vi v8, v8, 4
+; CHECK-NEXT:    vor.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 2
+; CHECK-NEXT:    vand.vx v8, v8, a1
+; CHECK-NEXT:    vand.vx v9, v9, a1
+; CHECK-NEXT:    vsll.vi v8, v8, 2
+; CHECK-NEXT:    vor.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vadd.vv v8, v8, v8
+; CHECK-NEXT:    vor.vv v8, v9, v8
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    add sp, sp, a0
+; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    ret
+  %a = call <vscale x 2 x i32> @llvm.clmulr.nxv2i32(<vscale x 2 x i32> %x, <vscale x 2 x i32> %y)
+  ret <vscale x 2 x i32> %a
+}
+
+define <vscale x 4 x i32> @clmulr_nxv4i32(<vscale x 4 x i32> %x, <vscale x 4 x i32> %y) nounwind {
+; RV32-LABEL: clmulr_nxv4i32:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -64
+; RV32-NEXT:    sw s0, 60(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s1, 56(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s2, 52(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s3, 48(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s4, 44(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s5, 40(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s6, 36(sp) # 4-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    sub sp, sp, a0
+; RV32-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
+; RV32-NEXT:    vsrl.vi v10, v8, 8
+; RV32-NEXT:    lui a0, 16
+; RV32-NEXT:    vsrl.vi v12, v8, 24
+; RV32-NEXT:    vsll.vi v14, v8, 24
+; RV32-NEXT:    lui a1, 61681
+; RV32-NEXT:    lui a2, 209715
+; RV32-NEXT:    lui s6, 349525
+; RV32-NEXT:    li t2, 16
+; RV32-NEXT:    li t5, 32
+; RV32-NEXT:    li s2, 64
+; RV32-NEXT:    li s5, 128
+; RV32-NEXT:    li s4, 256
+; RV32-NEXT:    li s3, 512
+; RV32-NEXT:    li s1, 1024
+; RV32-NEXT:    li s0, 1
+; RV32-NEXT:    lui t6, 1
+; RV32-NEXT:    lui t4, 2
+; RV32-NEXT:    lui t3, 4
+; RV32-NEXT:    lui a5, 8
+; RV32-NEXT:    lui a6, 32
+; RV32-NEXT:    lui a7, 64
+; RV32-NEXT:    lui t0, 128
+; RV32-NEXT:    lui t1, 256
+; RV32-NEXT:    addi a4, a0, -256
+; RV32-NEXT:    addi a3, a1, -241
+; RV32-NEXT:    addi a2, a2, 819
+; RV32-NEXT:    addi a1, s6, 1365
+; RV32-NEXT:    vand.vx v10, v10, a4
+; RV32-NEXT:    vand.vx v8, v8, a4
+; RV32-NEXT:    vor.vv v10, v10, v12
+; RV32-NEXT:    vsll.vi v8, v8, 8
+; RV32-NEXT:    vor.vv v8, v14, v8
+; RV32-NEXT:    vor.vv v8, v8, v10
+; RV32-NEXT:    vsrl.vi v10, v8, 4
+; RV32-NEXT:    vand.vx v8, v8, a3
+; RV32-NEXT:    vand.vx v10, v10, a3
+; RV32-NEXT:    vsll.vi v8, v8, 4
+; RV32-NEXT:    vor.vv v8, v10, v8
+; RV32-NEXT:    vsrl.vi v10, v8, 2
+; RV32-NEXT:    vand.vx v8, v8, a2
+; RV32-NEXT:    vand.vx v10, v10, a2
+; RV32-NEXT:    vsll.vi v8, v8, 2
+; RV32-NEXT:    vor.vv v8, v10, v8
+; RV32-NEXT:    vsrl.vi v10, v8, 1
+; RV32-NEXT:    vand.vx v8, v8, a1
+; RV32-NEXT:    vand.vx v10, v10, a1
+; RV32-NEXT:    vadd.vv v8, v8, v8
+; RV32-NEXT:    vor.vv v8, v10, v8
+; RV32-NEXT:    vand.vx v10, v8, t2
+; RV32-NEXT:    lui t2, 512
+; RV32-NEXT:    vand.vx v12, v8, t5
+; RV32-NEXT:    lui t5, 1024
+; RV32-NEXT:    vand.vx v14, v8, s2
+; RV32-NEXT:    lui s2, 2048
+; RV32-NEXT:    vand.vx v16, v8, s5
+; RV32-NEXT:    lui s5, 4096
+; RV32-NEXT:    vand.vx v26, v8, s4
+; RV32-NEXT:    lui s4, 8192
+; RV32-NEXT:    vand.vx v28, v8, s3
+; RV32-NEXT:    lui s3, 16384
+; RV32-NEXT:    vand.vx v18, v8, s1
+; RV32-NEXT:    lui s1, 32768
+; RV32-NEXT:    slli s0, s0, 11
+; RV32-NEXT:    vand.vx v20, v8, s0
+; RV32-NEXT:    lui s0, 65536
+; RV32-NEXT:    vand.vx v22, v8, t6
+; RV32-NEXT:    lui t6, 131072
+; RV32-NEXT:    vand.vx v24, v8, t4
+; RV32-NEXT:    lui t4, 262144
+; RV32-NEXT:    vand.vx v30, v8, t3
+; RV32-NEXT:    lui t3, 524288
+; RV32-NEXT:    vand.vi v6, v8, 2
+; RV32-NEXT:    vand.vi v4, v8, 1
+; RV32-NEXT:    vand.vi v2, v8, 4
+; RV32-NEXT:    vand.vi v0, v8, 8
+; RV32-NEXT:    vmul.vv v6, v8, v6
+; RV32-NEXT:    sw a0, 4(sp) # 4-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv s6, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, s6
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 32
+; RV32-NEXT:    vs2r.v v6, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vmul.vv v6, v8, v4
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv s6, a0
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, a0, s6
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 32
+; RV32-NEXT:    vs2r.v v6, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vmul.vv v6, v8, v2
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 32
+; RV32-NEXT:    vs2r.v v6, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vmul.vv v6, v8, v0
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv s6, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add s6, s6, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add s6, s6, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, s6
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 32
+; RV32-NEXT:    vs2r.v v6, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv s6, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add s6, s6, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, s6
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 32
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vmul.vv v10, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv s6, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add s6, s6, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, s6
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 32
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vmul.vv v10, v8, v14
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv s6, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, s6
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 32
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vmul.vv v10, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv s6, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add s6, s6, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, s6
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 32
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vmul.vv v10, v8, v26
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv s6, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, s6
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 32
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vmul.vv v10, v8, v28
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv s6, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, s6
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 32
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vmul.vv v10, v8, v18
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 32
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vmul.vv v10, v8, v20
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv s6, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add s6, s6, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, s6
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 32
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vmul.vv v10, v8, v22
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv s6, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, s6
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 32
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vmul.vv v10, v8, v24
+; RV32-NEXT:    csrr s6, vlenb
+; RV32-NEXT:    slli s6, s6, 1
+; RV32-NEXT:    mv a0, s6
+; RV32-NEXT:    slli s6, s6, 2
+; RV32-NEXT:    add s6, s6, a0
+; RV32-NEXT:    lw a0, 4(sp) # 4-byte Folded Reload
+; RV32-NEXT:    add s6, sp, s6
+; RV32-NEXT:    addi s6, s6, 32
+; RV32-NEXT:    vs2r.v v10, (s6) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vmul.vv v10, v8, v30
+; RV32-NEXT:    csrr s6, vlenb
+; RV32-NEXT:    slli s6, s6, 3
+; RV32-NEXT:    add s6, sp, s6
+; RV32-NEXT:    addi s6, s6, 32
+; RV32-NEXT:    vs2r.v v10, (s6) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vx v10, v8, a5
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a5, vlenb
+; RV32-NEXT:    slli a5, a5, 1
+; RV32-NEXT:    mv s6, a5
+; RV32-NEXT:    slli a5, a5, 1
+; RV32-NEXT:    add a5, a5, s6
+; RV32-NEXT:    add a5, sp, a5
+; RV32-NEXT:    addi a5, a5, 32
+; RV32-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vx v10, v8, a0
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 32
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vx v10, v8, a6
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 32
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vx v10, v8, a7
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    addi a0, sp, 32
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vx v10, v8, t0
+; RV32-NEXT:    vmul.vv v6, v8, v10
+; RV32-NEXT:    vand.vx v10, v8, t1
+; RV32-NEXT:    vmul.vv v30, v8, v10
+; RV32-NEXT:    vand.vx v10, v8, t2
+; RV32-NEXT:    vmul.vv v28, v8, v10
+; RV32-NEXT:    vand.vx v10, v8, t5
+; RV32-NEXT:    vmul.vv v26, v8, v10
+; RV32-NEXT:    vand.vx v10, v8, s2
+; RV32-NEXT:    vmul.vv v22, v8, v10
+; RV32-NEXT:    vand.vx v10, v8, s5
+; RV32-NEXT:    vmul.vv v18, v8, v10
+; RV32-NEXT:    vand.vx v10, v8, s4
+; RV32-NEXT:    vmul.vv v16, v8, v10
+; RV32-NEXT:    vand.vx v10, v8, s3
+; RV32-NEXT:    vmul.vv v24, v8, v10
+; RV32-NEXT:    vand.vx v10, v8, s1
+; RV32-NEXT:    vmul.vv v20, v8, v10
+; RV32-NEXT:    vand.vx v10, v8, s0
+; RV32-NEXT:    vmul.vv v12, v8, v10
+; RV32-NEXT:    vand.vx v10, v8, t6
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    vand.vx v14, v8, t4
+; RV32-NEXT:    vmul.vv v14, v8, v14
+; RV32-NEXT:    vand.vx v0, v8, t3
+; RV32-NEXT:    vmul.vv v8, v8, v0
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 32
+; RV32-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 32
+; RV32-NEXT:    vl2r.v v0, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v0, v0, v2
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 32
+; RV32-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v0, v0, v2
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a5, a5, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a5, a5, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 32
+; RV32-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v0, v0, v2
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a5, a5, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 32
+; RV32-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v0, v0, v2
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a5, a5, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 32
+; RV32-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v0, v0, v2
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 32
+; RV32-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v0, v0, v2
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a5, a5, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 32
+; RV32-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v0, v0, v2
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 32
+; RV32-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v2, v0, v2
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 32
+; RV32-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v2, v2, v4
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 32
+; RV32-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v2, v2, v4
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a5, a5, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 32
+; RV32-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v2, v2, v4
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 32
+; RV32-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v2, v2, v4
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 32
+; RV32-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v2, v2, v4
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 32
+; RV32-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v2, v2, v4
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 32
+; RV32-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v2, v2, v4
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 32
+; RV32-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v2, v2, v4
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 32
+; RV32-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v2, v2, v4
+; RV32-NEXT:    addi a0, sp, 32
+; RV32-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v4, v2, v4
+; RV32-NEXT:    vxor.vv v6, v4, v6
+; RV32-NEXT:    vxor.vv v30, v6, v30
+; RV32-NEXT:    vxor.vv v28, v30, v28
+; RV32-NEXT:    vxor.vv v26, v28, v26
+; RV32-NEXT:    vxor.vv v22, v26, v22
+; RV32-NEXT:    vsll.vi v26, v0, 24
+; RV32-NEXT:    vxor.vv v18, v22, v18
+; RV32-NEXT:    vxor.vv v16, v18, v16
+; RV32-NEXT:    vand.vx v18, v22, a4
+; RV32-NEXT:    vsll.vi v18, v18, 8
+; RV32-NEXT:    vor.vv v18, v26, v18
+; RV32-NEXT:    vxor.vv v16, v16, v24
+; RV32-NEXT:    vxor.vv v16, v16, v20
+; RV32-NEXT:    vxor.vv v12, v16, v12
+; RV32-NEXT:    vxor.vv v10, v12, v10
+; RV32-NEXT:    vsrl.vi v12, v22, 8
+; RV32-NEXT:    vand.vx v12, v12, a4
+; RV32-NEXT:    vxor.vv v10, v10, v14
+; RV32-NEXT:    vxor.vv v8, v10, v8
+; RV32-NEXT:    vsrl.vi v8, v8, 24
+; RV32-NEXT:    vor.vv v8, v12, v8
+; RV32-NEXT:    vor.vv v8, v18, v8
+; RV32-NEXT:    vsrl.vi v10, v8, 4
+; RV32-NEXT:    vand.vx v8, v8, a3
+; RV32-NEXT:    vand.vx v10, v10, a3
+; RV32-NEXT:    vsll.vi v8, v8, 4
+; RV32-NEXT:    vor.vv v8, v10, v8
+; RV32-NEXT:    vsrl.vi v10, v8, 2
+; RV32-NEXT:    vand.vx v8, v8, a2
+; RV32-NEXT:    vand.vx v10, v10, a2
+; RV32-NEXT:    vsll.vi v8, v8, 2
+; RV32-NEXT:    vor.vv v8, v10, v8
+; RV32-NEXT:    vsrl.vi v10, v8, 1
+; RV32-NEXT:    vand.vx v8, v8, a1
+; RV32-NEXT:    vand.vx v10, v10, a1
+; RV32-NEXT:    vadd.vv v8, v8, v8
+; RV32-NEXT:    vor.vv v8, v10, v8
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add sp, sp, a0
+; RV32-NEXT:    lw s0, 60(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s1, 56(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s2, 52(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s3, 48(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s4, 44(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s5, 40(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s6, 36(sp) # 4-byte Folded Reload
+; RV32-NEXT:    addi sp, sp, 64
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: clmulr_nxv4i32:
+; RV64:       # %bb.0:
+; RV64-NEXT:    addi sp, sp, -96
+; RV64-NEXT:    sd s0, 88(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s1, 80(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s2, 72(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s3, 64(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s4, 56(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s5, 48(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s6, 40(sp) # 8-byte Folded Spill
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    sub sp, sp, a0
+; RV64-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
+; RV64-NEXT:    vsrl.vi v10, v8, 8
+; RV64-NEXT:    lui a0, 16
+; RV64-NEXT:    vsrl.vi v12, v8, 24
+; RV64-NEXT:    vsll.vi v14, v8, 24
+; RV64-NEXT:    lui a1, 61681
+; RV64-NEXT:    lui a2, 209715
+; RV64-NEXT:    lui s6, 349525
+; RV64-NEXT:    li t2, 16
+; RV64-NEXT:    li t5, 32
+; RV64-NEXT:    li s2, 64
+; RV64-NEXT:    li s5, 128
+; RV64-NEXT:    li s4, 256
+; RV64-NEXT:    li s3, 512
+; RV64-NEXT:    li s1, 1024
+; RV64-NEXT:    li s0, 1
+; RV64-NEXT:    lui t6, 1
+; RV64-NEXT:    lui t4, 2
+; RV64-NEXT:    lui t3, 4
+; RV64-NEXT:    lui a5, 8
+; RV64-NEXT:    lui a6, 32
+; RV64-NEXT:    lui a7, 64
+; RV64-NEXT:    lui t0, 128
+; RV64-NEXT:    lui t1, 256
+; RV64-NEXT:    addi a4, a0, -256
+; RV64-NEXT:    addi a3, a1, -241
+; RV64-NEXT:    addi a2, a2, 819
+; RV64-NEXT:    addi a1, s6, 1365
+; RV64-NEXT:    vand.vx v10, v10, a4
+; RV64-NEXT:    vand.vx v8, v8, a4
+; RV64-NEXT:    vor.vv v10, v10, v12
+; RV64-NEXT:    vsll.vi v8, v8, 8
+; RV64-NEXT:    vor.vv v8, v14, v8
+; RV64-NEXT:    vor.vv v8, v8, v10
+; RV64-NEXT:    vsrl.vi v10, v8, 4
+; RV64-NEXT:    vand.vx v8, v8, a3
+; RV64-NEXT:    vand.vx v10, v10, a3
+; RV64-NEXT:    vsll.vi v8, v8, 4
+; RV64-NEXT:    vor.vv v8, v10, v8
+; RV64-NEXT:    vsrl.vi v10, v8, 2
+; RV64-NEXT:    vand.vx v8, v8, a2
+; RV64-NEXT:    vand.vx v10, v10, a2
+; RV64-NEXT:    vsll.vi v8, v8, 2
+; RV64-NEXT:    vor.vv v8, v10, v8
+; RV64-NEXT:    vsrl.vi v10, v8, 1
+; RV64-NEXT:    vand.vx v8, v8, a1
+; RV64-NEXT:    vand.vx v10, v10, a1
+; RV64-NEXT:    vadd.vv v8, v8, v8
+; RV64-NEXT:    vor.vv v8, v10, v8
+; RV64-NEXT:    vand.vx v10, v8, t2
+; RV64-NEXT:    lui t2, 512
+; RV64-NEXT:    vand.vx v12, v8, t5
+; RV64-NEXT:    lui t5, 1024
+; RV64-NEXT:    vand.vx v14, v8, s2
+; RV64-NEXT:    lui s2, 2048
+; RV64-NEXT:    vand.vx v16, v8, s5
+; RV64-NEXT:    lui s5, 4096
+; RV64-NEXT:    vand.vx v26, v8, s4
+; RV64-NEXT:    lui s4, 8192
+; RV64-NEXT:    vand.vx v28, v8, s3
+; RV64-NEXT:    lui s3, 16384
+; RV64-NEXT:    vand.vx v18, v8, s1
+; RV64-NEXT:    lui s1, 32768
+; RV64-NEXT:    slli s0, s0, 11
+; RV64-NEXT:    vand.vx v20, v8, s0
+; RV64-NEXT:    lui s0, 65536
+; RV64-NEXT:    vand.vx v22, v8, t6
+; RV64-NEXT:    lui t6, 131072
+; RV64-NEXT:    vand.vx v24, v8, t4
+; RV64-NEXT:    lui t4, 262144
+; RV64-NEXT:    vand.vx v30, v8, t3
+; RV64-NEXT:    lui t3, 524288
+; RV64-NEXT:    vand.vi v6, v8, 2
+; RV64-NEXT:    vand.vi v4, v8, 1
+; RV64-NEXT:    vand.vi v2, v8, 4
+; RV64-NEXT:    vand.vi v0, v8, 8
+; RV64-NEXT:    vmul.vv v6, v8, v6
+; RV64-NEXT:    sd a0, 8(sp) # 8-byte Folded Spill
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv s6, a0
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, a0, s6
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vs2r.v v6, (a0) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    vmul.vv v6, v8, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    mv s6, a0
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    add a0, a0, s6
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vs2r.v v6, (a0) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    vmul.vv v6, v8, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vs2r.v v6, (a0) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    vmul.vv v6, v8, v0
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    mv s6, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add s6, s6, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add s6, s6, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, s6
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vs2r.v v6, (a0) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    vmul.vv v10, v8, v10
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv s6, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add s6, s6, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, s6
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    vmul.vv v10, v8, v12
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    mv s6, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add s6, s6, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, s6
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    vmul.vv v10, v8, v14
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    mv s6, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, s6
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    vmul.vv v10, v8, v16
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    mv s6, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add s6, s6, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, s6
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    vmul.vv v10, v8, v26
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv s6, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, s6
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    vmul.vv v10, v8, v28
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    mv s6, a0
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, a0, s6
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    vmul.vv v10, v8, v18
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    vmul.vv v10, v8, v20
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    mv s6, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add s6, s6, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, s6
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    vmul.vv v10, v8, v22
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv s6, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, s6
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    vmul.vv v10, v8, v24
+; RV64-NEXT:    csrr s6, vlenb
+; RV64-NEXT:    slli s6, s6, 1
+; RV64-NEXT:    mv a0, s6
+; RV64-NEXT:    slli s6, s6, 2
+; RV64-NEXT:    add s6, s6, a0
+; RV64-NEXT:    ld a0, 8(sp) # 8-byte Folded Reload
+; RV64-NEXT:    add s6, sp, s6
+; RV64-NEXT:    addi s6, s6, 32
+; RV64-NEXT:    vs2r.v v10, (s6) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    vmul.vv v10, v8, v30
+; RV64-NEXT:    csrr s6, vlenb
+; RV64-NEXT:    slli s6, s6, 3
+; RV64-NEXT:    add s6, sp, s6
+; RV64-NEXT:    addi s6, s6, 32
+; RV64-NEXT:    vs2r.v v10, (s6) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    vand.vx v10, v8, a5
+; RV64-NEXT:    vmul.vv v10, v8, v10
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    mv s6, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add a5, a5, s6
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 32
+; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    vand.vx v10, v8, a0
+; RV64-NEXT:    vmul.vv v10, v8, v10
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    vand.vx v10, v8, a6
+; RV64-NEXT:    vmul.vv v10, v8, v10
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    vand.vx v10, v8, a7
+; RV64-NEXT:    vmul.vv v10, v8, v10
+; RV64-NEXT:    addi a0, sp, 32
+; RV64-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    vand.vx v10, v8, t0
+; RV64-NEXT:    vmul.vv v6, v8, v10
+; RV64-NEXT:    vand.vx v10, v8, t1
+; RV64-NEXT:    vmul.vv v30, v8, v10
+; RV64-NEXT:    vand.vx v10, v8, t2
+; RV64-NEXT:    vmul.vv v28, v8, v10
+; RV64-NEXT:    vand.vx v10, v8, t5
+; RV64-NEXT:    vmul.vv v26, v8, v10
+; RV64-NEXT:    vand.vx v10, v8, s2
+; RV64-NEXT:    vmul.vv v22, v8, v10
+; RV64-NEXT:    vand.vx v10, v8, s5
+; RV64-NEXT:    vmul.vv v18, v8, v10
+; RV64-NEXT:    vand.vx v10, v8, s4
+; RV64-NEXT:    vmul.vv v16, v8, v10
+; RV64-NEXT:    vand.vx v10, v8, s3
+; RV64-NEXT:    vmul.vv v24, v8, v10
+; RV64-NEXT:    vand.vx v10, v8, s1
+; RV64-NEXT:    vmul.vv v20, v8, v10
+; RV64-NEXT:    vand.vx v10, v8, s0
+; RV64-NEXT:    vmul.vv v12, v8, v10
+; RV64-NEXT:    vand.vx v10, v8, t6
+; RV64-NEXT:    vmul.vv v10, v8, v10
+; RV64-NEXT:    vand.vx v14, v8, t4
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vand.vx v0, v8, t3
+; RV64-NEXT:    vmul.vv v8, v8, v0
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl2r.v v0, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a5, a5, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a5, a5, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a5, a5, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a5, a5, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a5, a5, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v2, v0, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v2, v2, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v2, v2, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a5, a5, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v2, v2, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v2, v2, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v2, v2, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v2, v2, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v2, v2, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v2, v2, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v2, v2, v4
+; RV64-NEXT:    addi a0, sp, 32
+; RV64-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v4, v2, v4
+; RV64-NEXT:    vxor.vv v6, v4, v6
+; RV64-NEXT:    vxor.vv v30, v6, v30
+; RV64-NEXT:    vxor.vv v28, v30, v28
+; RV64-NEXT:    vxor.vv v26, v28, v26
+; RV64-NEXT:    vxor.vv v22, v26, v22
+; RV64-NEXT:    vsll.vi v26, v0, 24
+; RV64-NEXT:    vxor.vv v18, v22, v18
+; RV64-NEXT:    vxor.vv v16, v18, v16
+; RV64-NEXT:    vand.vx v18, v22, a4
+; RV64-NEXT:    vsll.vi v18, v18, 8
+; RV64-NEXT:    vor.vv v18, v26, v18
+; RV64-NEXT:    vxor.vv v16, v16, v24
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vxor.vv v12, v16, v12
+; RV64-NEXT:    vxor.vv v10, v12, v10
+; RV64-NEXT:    vsrl.vi v12, v22, 8
+; RV64-NEXT:    vand.vx v12, v12, a4
+; RV64-NEXT:    vxor.vv v10, v10, v14
+; RV64-NEXT:    vxor.vv v8, v10, v8
+; RV64-NEXT:    vsrl.vi v8, v8, 24
+; RV64-NEXT:    vor.vv v8, v12, v8
+; RV64-NEXT:    vor.vv v8, v18, v8
+; RV64-NEXT:    vsrl.vi v10, v8, 4
+; RV64-NEXT:    vand.vx v8, v8, a3
+; RV64-NEXT:    vand.vx v10, v10, a3
+; RV64-NEXT:    vsll.vi v8, v8, 4
+; RV64-NEXT:    vor.vv v8, v10, v8
+; RV64-NEXT:    vsrl.vi v10, v8, 2
+; RV64-NEXT:    vand.vx v8, v8, a2
+; RV64-NEXT:    vand.vx v10, v10, a2
+; RV64-NEXT:    vsll.vi v8, v8, 2
+; RV64-NEXT:    vor.vv v8, v10, v8
+; RV64-NEXT:    vsrl.vi v10, v8, 1
+; RV64-NEXT:    vand.vx v8, v8, a1
+; RV64-NEXT:    vand.vx v10, v10, a1
+; RV64-NEXT:    vadd.vv v8, v8, v8
+; RV64-NEXT:    vor.vv v8, v10, v8
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add sp, sp, a0
+; RV64-NEXT:    ld s0, 88(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s1, 80(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s2, 72(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s3, 64(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s4, 56(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s5, 48(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s6, 40(sp) # 8-byte Folded Reload
+; RV64-NEXT:    addi sp, sp, 96
+; RV64-NEXT:    ret
+  %a = call <vscale x 4 x i32> @llvm.clmulr.nxv4i32(<vscale x 4 x i32> %x, <vscale x 4 x i32> %y)
+  ret <vscale x 4 x i32> %a
+}
+
+define <vscale x 8 x i32> @clmulr_nxv8i32(<vscale x 8 x i32> %x, <vscale x 8 x i32> %y) nounwind {
+; RV32-LABEL: clmulr_nxv8i32:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -80
+; RV32-NEXT:    sw ra, 76(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s0, 72(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s1, 68(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s2, 64(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s3, 60(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s4, 56(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s5, 52(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s6, 48(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s7, 44(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s8, 40(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s9, 36(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s10, 32(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s11, 28(sp) # 4-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    sub sp, sp, a0
+; RV32-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
+; RV32-NEXT:    vsrl.vi v12, v8, 8
+; RV32-NEXT:    lui a5, 16
+; RV32-NEXT:    vsrl.vi v16, v8, 24
+; RV32-NEXT:    vsll.vi v20, v8, 24
+; RV32-NEXT:    lui a1, 61681
+; RV32-NEXT:    lui a2, 209715
+; RV32-NEXT:    lui ra, 349525
+; RV32-NEXT:    li s9, 16
+; RV32-NEXT:    li s8, 32
+; RV32-NEXT:    li s6, 64
+; RV32-NEXT:    li a7, 512
+; RV32-NEXT:    li t0, 1024
+; RV32-NEXT:    li a0, 1
+; RV32-NEXT:    lui t1, 1
+; RV32-NEXT:    lui t2, 2
+; RV32-NEXT:    lui t3, 4
+; RV32-NEXT:    lui t4, 8
+; RV32-NEXT:    lui t5, 32
+; RV32-NEXT:    lui t6, 64
+; RV32-NEXT:    lui s0, 128
+; RV32-NEXT:    lui s1, 256
+; RV32-NEXT:    lui s2, 512
+; RV32-NEXT:    lui s3, 1024
+; RV32-NEXT:    lui s4, 2048
+; RV32-NEXT:    lui s5, 4096
+; RV32-NEXT:    lui s7, 8192
+; RV32-NEXT:    lui s10, 16384
+; RV32-NEXT:    lui s11, 32768
+; RV32-NEXT:    addi a4, a5, -256
+; RV32-NEXT:    addi a3, a1, -241
+; RV32-NEXT:    addi a2, a2, 819
+; RV32-NEXT:    addi a1, ra, 1365
+; RV32-NEXT:    vand.vx v12, v12, a4
+; RV32-NEXT:    vand.vx v8, v8, a4
+; RV32-NEXT:    vor.vv v12, v12, v16
+; RV32-NEXT:    vsll.vi v8, v8, 8
+; RV32-NEXT:    vor.vv v8, v20, v8
+; RV32-NEXT:    vor.vv v8, v8, v12
+; RV32-NEXT:    vsrl.vi v12, v8, 4
+; RV32-NEXT:    vand.vx v8, v8, a3
+; RV32-NEXT:    vand.vx v12, v12, a3
+; RV32-NEXT:    vsll.vi v8, v8, 4
+; RV32-NEXT:    vor.vv v8, v12, v8
+; RV32-NEXT:    vsrl.vi v12, v8, 2
+; RV32-NEXT:    vand.vx v8, v8, a2
+; RV32-NEXT:    vand.vx v12, v12, a2
+; RV32-NEXT:    vsll.vi v8, v8, 2
+; RV32-NEXT:    vor.vv v8, v12, v8
+; RV32-NEXT:    vsrl.vi v12, v8, 1
+; RV32-NEXT:    vand.vx v8, v8, a1
+; RV32-NEXT:    vand.vx v12, v12, a1
+; RV32-NEXT:    vadd.vv v8, v8, v8
+; RV32-NEXT:    vor.vv v8, v12, v8
+; RV32-NEXT:    vand.vx v12, v8, s9
+; RV32-NEXT:    lui s9, 65536
+; RV32-NEXT:    vand.vx v16, v8, s8
+; RV32-NEXT:    lui s8, 131072
+; RV32-NEXT:    vand.vx v20, v8, s6
+; RV32-NEXT:    lui s6, 262144
+; RV32-NEXT:    slli ra, a0, 11
+; RV32-NEXT:    vand.vi v24, v8, 2
+; RV32-NEXT:    vand.vi v28, v8, 1
+; RV32-NEXT:    vand.vi v4, v8, 4
+; RV32-NEXT:    vand.vi v0, v8, 8
+; RV32-NEXT:    vmul.vv v24, v8, v24
+; RV32-NEXT:    sw a4, 4(sp) # 4-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a4, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a4, a4, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a4
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs4r.v v24, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vmul.vv v24, v8, v28
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a4, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a4, a4, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a4
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs4r.v v24, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vmul.vv v24, v8, v4
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    mv a4, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a4
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs4r.v v24, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vmul.vv v24, v8, v0
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a4, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a4, a4, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a4, a4, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a4
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs4r.v v24, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a4, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a4, a4, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a4
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vmul.vv v12, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a4, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a4, a4, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a4
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vmul.vv v12, v8, v20
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a4, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a4
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    lui a0, 524288
+; RV32-NEXT:    li a6, 128
+; RV32-NEXT:    vand.vx v12, v8, a6
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a4, vlenb
+; RV32-NEXT:    slli a4, a4, 2
+; RV32-NEXT:    mv a6, a4
+; RV32-NEXT:    slli a4, a4, 1
+; RV32-NEXT:    add a6, a6, a4
+; RV32-NEXT:    slli a4, a4, 3
+; RV32-NEXT:    add a4, a4, a6
+; RV32-NEXT:    add a4, sp, a4
+; RV32-NEXT:    addi a4, a4, 16
+; RV32-NEXT:    vs4r.v v12, (a4) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    li a6, 256
+; RV32-NEXT:    vand.vx v12, v8, a6
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a6, vlenb
+; RV32-NEXT:    slli a6, a6, 3
+; RV32-NEXT:    mv a4, a6
+; RV32-NEXT:    slli a6, a6, 3
+; RV32-NEXT:    add a6, a6, a4
+; RV32-NEXT:    lw a4, 4(sp) # 4-byte Folded Reload
+; RV32-NEXT:    add a6, sp, a6
+; RV32-NEXT:    addi a6, a6, 16
+; RV32-NEXT:    vs4r.v v12, (a6) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vx v12, v8, a7
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a6, vlenb
+; RV32-NEXT:    slli a6, a6, 2
+; RV32-NEXT:    mv a7, a6
+; RV32-NEXT:    slli a6, a6, 4
+; RV32-NEXT:    add a6, a6, a7
+; RV32-NEXT:    add a6, sp, a6
+; RV32-NEXT:    addi a6, a6, 16
+; RV32-NEXT:    vs4r.v v12, (a6) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vx v12, v8, t0
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a6, vlenb
+; RV32-NEXT:    slli a6, a6, 6
+; RV32-NEXT:    add a6, sp, a6
+; RV32-NEXT:    addi a6, a6, 16
+; RV32-NEXT:    vs4r.v v12, (a6) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vx v12, v8, ra
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a6, vlenb
+; RV32-NEXT:    slli a6, a6, 2
+; RV32-NEXT:    mv a7, a6
+; RV32-NEXT:    slli a6, a6, 1
+; RV32-NEXT:    add a7, a7, a6
+; RV32-NEXT:    slli a6, a6, 1
+; RV32-NEXT:    add a7, a7, a6
+; RV32-NEXT:    slli a6, a6, 1
+; RV32-NEXT:    add a6, a6, a7
+; RV32-NEXT:    add a6, sp, a6
+; RV32-NEXT:    addi a6, a6, 16
+; RV32-NEXT:    vs4r.v v12, (a6) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vx v12, v8, t1
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a6, vlenb
+; RV32-NEXT:    slli a6, a6, 3
+; RV32-NEXT:    mv a7, a6
+; RV32-NEXT:    slli a6, a6, 1
+; RV32-NEXT:    add a7, a7, a6
+; RV32-NEXT:    slli a6, a6, 1
+; RV32-NEXT:    add a6, a6, a7
+; RV32-NEXT:    add a6, sp, a6
+; RV32-NEXT:    addi a6, a6, 16
+; RV32-NEXT:    vs4r.v v12, (a6) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vx v12, v8, t2
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a6, vlenb
+; RV32-NEXT:    slli a6, a6, 2
+; RV32-NEXT:    mv a7, a6
+; RV32-NEXT:    slli a6, a6, 2
+; RV32-NEXT:    add a7, a7, a6
+; RV32-NEXT:    slli a6, a6, 1
+; RV32-NEXT:    add a6, a6, a7
+; RV32-NEXT:    add a6, sp, a6
+; RV32-NEXT:    addi a6, a6, 16
+; RV32-NEXT:    vs4r.v v12, (a6) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vx v12, v8, t3
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a6, vlenb
+; RV32-NEXT:    slli a6, a6, 4
+; RV32-NEXT:    mv a7, a6
+; RV32-NEXT:    slli a6, a6, 1
+; RV32-NEXT:    add a6, a6, a7
+; RV32-NEXT:    add a6, sp, a6
+; RV32-NEXT:    addi a6, a6, 16
+; RV32-NEXT:    vs4r.v v12, (a6) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vx v12, v8, t4
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a6, vlenb
+; RV32-NEXT:    slli a6, a6, 2
+; RV32-NEXT:    mv a7, a6
+; RV32-NEXT:    slli a6, a6, 1
+; RV32-NEXT:    add a7, a7, a6
+; RV32-NEXT:    slli a6, a6, 2
+; RV32-NEXT:    add a6, a6, a7
+; RV32-NEXT:    add a6, sp, a6
+; RV32-NEXT:    addi a6, a6, 16
+; RV32-NEXT:    vs4r.v v12, (a6) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vx v12, v8, a5
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a5, vlenb
+; RV32-NEXT:    slli a5, a5, 3
+; RV32-NEXT:    mv a6, a5
+; RV32-NEXT:    slli a5, a5, 2
+; RV32-NEXT:    add a5, a5, a6
+; RV32-NEXT:    add a5, sp, a5
+; RV32-NEXT:    addi a5, a5, 16
+; RV32-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vx v12, v8, t5
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a5, vlenb
+; RV32-NEXT:    slli a5, a5, 2
+; RV32-NEXT:    mv a6, a5
+; RV32-NEXT:    slli a5, a5, 3
+; RV32-NEXT:    add a5, a5, a6
+; RV32-NEXT:    add a5, sp, a5
+; RV32-NEXT:    addi a5, a5, 16
+; RV32-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vx v12, v8, t6
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a5, vlenb
+; RV32-NEXT:    slli a5, a5, 5
+; RV32-NEXT:    add a5, sp, a5
+; RV32-NEXT:    addi a5, a5, 16
+; RV32-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vx v12, v8, s0
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a5, vlenb
+; RV32-NEXT:    slli a5, a5, 2
+; RV32-NEXT:    mv a6, a5
+; RV32-NEXT:    slli a5, a5, 1
+; RV32-NEXT:    add a6, a6, a5
+; RV32-NEXT:    slli a5, a5, 1
+; RV32-NEXT:    add a5, a5, a6
+; RV32-NEXT:    add a5, sp, a5
+; RV32-NEXT:    addi a5, a5, 16
+; RV32-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vx v12, v8, s1
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a5, vlenb
+; RV32-NEXT:    slli a5, a5, 3
+; RV32-NEXT:    mv a6, a5
+; RV32-NEXT:    slli a5, a5, 1
+; RV32-NEXT:    add a5, a5, a6
+; RV32-NEXT:    add a5, sp, a5
+; RV32-NEXT:    addi a5, a5, 16
+; RV32-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vx v12, v8, s2
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a5, vlenb
+; RV32-NEXT:    slli a5, a5, 2
+; RV32-NEXT:    mv a6, a5
+; RV32-NEXT:    slli a5, a5, 2
+; RV32-NEXT:    add a5, a5, a6
+; RV32-NEXT:    add a5, sp, a5
+; RV32-NEXT:    addi a5, a5, 16
+; RV32-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vx v12, v8, s3
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a5, vlenb
+; RV32-NEXT:    slli a5, a5, 4
+; RV32-NEXT:    add a5, sp, a5
+; RV32-NEXT:    addi a5, a5, 16
+; RV32-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vx v12, v8, s4
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a5, vlenb
+; RV32-NEXT:    slli a5, a5, 3
+; RV32-NEXT:    add a5, sp, a5
+; RV32-NEXT:    addi a5, a5, 16
+; RV32-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vx v12, v8, s5
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    addi a5, sp, 16
+; RV32-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vx v12, v8, s7
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    vand.vx v16, v8, s10
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    csrr a5, vlenb
+; RV32-NEXT:    slli a5, a5, 2
+; RV32-NEXT:    mv a6, a5
+; RV32-NEXT:    slli a5, a5, 1
+; RV32-NEXT:    add a5, a5, a6
+; RV32-NEXT:    add a5, sp, a5
+; RV32-NEXT:    addi a5, a5, 16
+; RV32-NEXT:    vs4r.v v16, (a5) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vx v16, v8, s11
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    csrr a5, vlenb
+; RV32-NEXT:    slli a5, a5, 2
+; RV32-NEXT:    add a5, sp, a5
+; RV32-NEXT:    addi a5, a5, 16
+; RV32-NEXT:    vs4r.v v16, (a5) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vx v16, v8, s9
+; RV32-NEXT:    vmul.vv v28, v8, v16
+; RV32-NEXT:    vand.vx v16, v8, s8
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    vand.vx v20, v8, s6
+; RV32-NEXT:    vmul.vv v4, v8, v20
+; RV32-NEXT:    vand.vx v20, v8, a0
+; RV32-NEXT:    vmul.vv v20, v8, v20
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a5, a5, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl4r.v v8, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a5, a5, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl4r.v v0, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v0, v8
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl4r.v v0, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v0
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a5, a5, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a5, a5, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl4r.v v0, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v0
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a5, a5, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl4r.v v0, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v0
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a5, a5, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl4r.v v0, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v0
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl4r.v v0, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v0
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a5, a5, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl4r.v v0, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v0
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl4r.v v0, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v0, v8, v0
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v0, v0, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 6
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v0, v0, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a5, a5, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a5, a5, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v0, v0, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a5, a5, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v0, v0, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a5, a5, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v0, v0, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v0, v0, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a5, a5, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v0, v0, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v0, v0, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v0, v0, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v0, v0, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a5, a5, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v0, v0, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v0, v0, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v0, v0, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v0, v0, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v0, v0, v24
+; RV32-NEXT:    addi a0, sp, 16
+; RV32-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v24, v0, v24
+; RV32-NEXT:    vxor.vv v12, v24, v12
+; RV32-NEXT:    vsll.vi v8, v8, 24
+; RV32-NEXT:    vand.vx v24, v0, a4
+; RV32-NEXT:    vsll.vi v24, v24, 8
+; RV32-NEXT:    vor.vv v8, v8, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v12, v12, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v12, v12, v24
+; RV32-NEXT:    vxor.vv v12, v12, v28
+; RV32-NEXT:    vxor.vv v12, v12, v16
+; RV32-NEXT:    vsrl.vi v16, v0, 8
+; RV32-NEXT:    vand.vx v16, v16, a4
+; RV32-NEXT:    vxor.vv v12, v12, v4
+; RV32-NEXT:    vxor.vv v12, v12, v20
+; RV32-NEXT:    vsrl.vi v12, v12, 24
+; RV32-NEXT:    vor.vv v12, v16, v12
+; RV32-NEXT:    vor.vv v8, v8, v12
+; RV32-NEXT:    vsrl.vi v12, v8, 4
+; RV32-NEXT:    vand.vx v8, v8, a3
+; RV32-NEXT:    vand.vx v12, v12, a3
+; RV32-NEXT:    vsll.vi v8, v8, 4
+; RV32-NEXT:    vor.vv v8, v12, v8
+; RV32-NEXT:    vsrl.vi v12, v8, 2
+; RV32-NEXT:    vand.vx v8, v8, a2
+; RV32-NEXT:    vand.vx v12, v12, a2
+; RV32-NEXT:    vsll.vi v8, v8, 2
+; RV32-NEXT:    vor.vv v8, v12, v8
+; RV32-NEXT:    vsrl.vi v12, v8, 1
+; RV32-NEXT:    vand.vx v8, v8, a1
+; RV32-NEXT:    vand.vx v12, v12, a1
+; RV32-NEXT:    vadd.vv v8, v8, v8
+; RV32-NEXT:    vor.vv v8, v12, v8
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add sp, sp, a0
+; RV32-NEXT:    lw ra, 76(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s0, 72(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s1, 68(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s2, 64(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s3, 60(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s4, 56(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s5, 52(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s6, 48(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s7, 44(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s8, 40(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s9, 36(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s10, 32(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s11, 28(sp) # 4-byte Folded Reload
+; RV32-NEXT:    addi sp, sp, 80
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: clmulr_nxv8i32:
+; RV64:       # %bb.0:
+; RV64-NEXT:    addi sp, sp, -144
+; RV64-NEXT:    sd ra, 136(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s0, 128(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s1, 120(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s2, 112(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s3, 104(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s4, 96(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s5, 88(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s6, 80(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s7, 72(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s8, 64(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s9, 56(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s10, 48(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s11, 40(sp) # 8-byte Folded Spill
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    sub sp, sp, a0
+; RV64-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
+; RV64-NEXT:    vsrl.vi v12, v8, 8
+; RV64-NEXT:    lui a5, 16
+; RV64-NEXT:    vsrl.vi v16, v8, 24
+; RV64-NEXT:    vsll.vi v20, v8, 24
+; RV64-NEXT:    lui a1, 61681
+; RV64-NEXT:    lui a2, 209715
+; RV64-NEXT:    lui ra, 349525
+; RV64-NEXT:    li s9, 16
+; RV64-NEXT:    li s8, 32
+; RV64-NEXT:    li s6, 64
+; RV64-NEXT:    li a7, 512
+; RV64-NEXT:    li t0, 1024
+; RV64-NEXT:    li a0, 1
+; RV64-NEXT:    lui t1, 1
+; RV64-NEXT:    lui t2, 2
+; RV64-NEXT:    lui t3, 4
+; RV64-NEXT:    lui t4, 8
+; RV64-NEXT:    lui t5, 32
+; RV64-NEXT:    lui t6, 64
+; RV64-NEXT:    lui s0, 128
+; RV64-NEXT:    lui s1, 256
+; RV64-NEXT:    lui s2, 512
+; RV64-NEXT:    lui s3, 1024
+; RV64-NEXT:    lui s4, 2048
+; RV64-NEXT:    lui s5, 4096
+; RV64-NEXT:    lui s7, 8192
+; RV64-NEXT:    lui s10, 16384
+; RV64-NEXT:    lui s11, 32768
+; RV64-NEXT:    addi a4, a5, -256
+; RV64-NEXT:    addi a3, a1, -241
+; RV64-NEXT:    addi a2, a2, 819
+; RV64-NEXT:    addi a1, ra, 1365
+; RV64-NEXT:    vand.vx v12, v12, a4
+; RV64-NEXT:    vand.vx v8, v8, a4
+; RV64-NEXT:    vor.vv v12, v12, v16
+; RV64-NEXT:    vsll.vi v8, v8, 8
+; RV64-NEXT:    vor.vv v8, v20, v8
+; RV64-NEXT:    vor.vv v8, v8, v12
+; RV64-NEXT:    vsrl.vi v12, v8, 4
+; RV64-NEXT:    vand.vx v8, v8, a3
+; RV64-NEXT:    vand.vx v12, v12, a3
+; RV64-NEXT:    vsll.vi v8, v8, 4
+; RV64-NEXT:    vor.vv v8, v12, v8
+; RV64-NEXT:    vsrl.vi v12, v8, 2
+; RV64-NEXT:    vand.vx v8, v8, a2
+; RV64-NEXT:    vand.vx v12, v12, a2
+; RV64-NEXT:    vsll.vi v8, v8, 2
+; RV64-NEXT:    vor.vv v8, v12, v8
+; RV64-NEXT:    vsrl.vi v12, v8, 1
+; RV64-NEXT:    vand.vx v8, v8, a1
+; RV64-NEXT:    vand.vx v12, v12, a1
+; RV64-NEXT:    vadd.vv v8, v8, v8
+; RV64-NEXT:    vor.vv v8, v12, v8
+; RV64-NEXT:    vand.vx v12, v8, s9
+; RV64-NEXT:    lui s9, 65536
+; RV64-NEXT:    vand.vx v16, v8, s8
+; RV64-NEXT:    lui s8, 131072
+; RV64-NEXT:    vand.vx v20, v8, s6
+; RV64-NEXT:    lui s6, 262144
+; RV64-NEXT:    slli ra, a0, 11
+; RV64-NEXT:    vand.vi v24, v8, 2
+; RV64-NEXT:    vand.vi v28, v8, 1
+; RV64-NEXT:    vand.vi v4, v8, 4
+; RV64-NEXT:    vand.vi v0, v8, 8
+; RV64-NEXT:    vmul.vv v24, v8, v24
+; RV64-NEXT:    sd a4, 8(sp) # 8-byte Folded Spill
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    mv a4, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a4, a4, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a4
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vs4r.v v24, (a0) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    vmul.vv v24, v8, v28
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a4, a0
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a4, a4, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a4
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vs4r.v v24, (a0) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    vmul.vv v24, v8, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 5
+; RV64-NEXT:    mv a4, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a4
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vs4r.v v24, (a0) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    vmul.vv v24, v8, v0
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a4, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a4, a4, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a4, a4, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a4
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vs4r.v v24, (a0) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    mv a4, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a4, a4, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a4
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    vmul.vv v12, v8, v16
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a4, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a4, a4, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a4
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    vmul.vv v12, v8, v20
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    mv a4, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a4
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    lui a0, 524288
+; RV64-NEXT:    li a6, 128
+; RV64-NEXT:    vand.vx v12, v8, a6
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a4, vlenb
+; RV64-NEXT:    slli a4, a4, 2
+; RV64-NEXT:    mv a6, a4
+; RV64-NEXT:    slli a4, a4, 1
+; RV64-NEXT:    add a6, a6, a4
+; RV64-NEXT:    slli a4, a4, 3
+; RV64-NEXT:    add a4, a4, a6
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    addi a4, a4, 32
+; RV64-NEXT:    vs4r.v v12, (a4) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    li a6, 256
+; RV64-NEXT:    vand.vx v12, v8, a6
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a6, vlenb
+; RV64-NEXT:    slli a6, a6, 3
+; RV64-NEXT:    mv a4, a6
+; RV64-NEXT:    slli a6, a6, 3
+; RV64-NEXT:    add a6, a6, a4
+; RV64-NEXT:    ld a4, 8(sp) # 8-byte Folded Reload
+; RV64-NEXT:    add a6, sp, a6
+; RV64-NEXT:    addi a6, a6, 32
+; RV64-NEXT:    vs4r.v v12, (a6) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    vand.vx v12, v8, a7
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a6, vlenb
+; RV64-NEXT:    slli a6, a6, 2
+; RV64-NEXT:    mv a7, a6
+; RV64-NEXT:    slli a6, a6, 4
+; RV64-NEXT:    add a6, a6, a7
+; RV64-NEXT:    add a6, sp, a6
+; RV64-NEXT:    addi a6, a6, 32
+; RV64-NEXT:    vs4r.v v12, (a6) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    vand.vx v12, v8, t0
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a6, vlenb
+; RV64-NEXT:    slli a6, a6, 6
+; RV64-NEXT:    add a6, sp, a6
+; RV64-NEXT:    addi a6, a6, 32
+; RV64-NEXT:    vs4r.v v12, (a6) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    vand.vx v12, v8, ra
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a6, vlenb
+; RV64-NEXT:    slli a6, a6, 2
+; RV64-NEXT:    mv a7, a6
+; RV64-NEXT:    slli a6, a6, 1
+; RV64-NEXT:    add a7, a7, a6
+; RV64-NEXT:    slli a6, a6, 1
+; RV64-NEXT:    add a7, a7, a6
+; RV64-NEXT:    slli a6, a6, 1
+; RV64-NEXT:    add a6, a6, a7
+; RV64-NEXT:    add a6, sp, a6
+; RV64-NEXT:    addi a6, a6, 32
+; RV64-NEXT:    vs4r.v v12, (a6) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    vand.vx v12, v8, t1
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a6, vlenb
+; RV64-NEXT:    slli a6, a6, 3
+; RV64-NEXT:    mv a7, a6
+; RV64-NEXT:    slli a6, a6, 1
+; RV64-NEXT:    add a7, a7, a6
+; RV64-NEXT:    slli a6, a6, 1
+; RV64-NEXT:    add a6, a6, a7
+; RV64-NEXT:    add a6, sp, a6
+; RV64-NEXT:    addi a6, a6, 32
+; RV64-NEXT:    vs4r.v v12, (a6) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    vand.vx v12, v8, t2
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a6, vlenb
+; RV64-NEXT:    slli a6, a6, 2
+; RV64-NEXT:    mv a7, a6
+; RV64-NEXT:    slli a6, a6, 2
+; RV64-NEXT:    add a7, a7, a6
+; RV64-NEXT:    slli a6, a6, 1
+; RV64-NEXT:    add a6, a6, a7
+; RV64-NEXT:    add a6, sp, a6
+; RV64-NEXT:    addi a6, a6, 32
+; RV64-NEXT:    vs4r.v v12, (a6) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    vand.vx v12, v8, t3
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a6, vlenb
+; RV64-NEXT:    slli a6, a6, 4
+; RV64-NEXT:    mv a7, a6
+; RV64-NEXT:    slli a6, a6, 1
+; RV64-NEXT:    add a6, a6, a7
+; RV64-NEXT:    add a6, sp, a6
+; RV64-NEXT:    addi a6, a6, 32
+; RV64-NEXT:    vs4r.v v12, (a6) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    vand.vx v12, v8, t4
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a6, vlenb
+; RV64-NEXT:    slli a6, a6, 2
+; RV64-NEXT:    mv a7, a6
+; RV64-NEXT:    slli a6, a6, 1
+; RV64-NEXT:    add a7, a7, a6
+; RV64-NEXT:    slli a6, a6, 2
+; RV64-NEXT:    add a6, a6, a7
+; RV64-NEXT:    add a6, sp, a6
+; RV64-NEXT:    addi a6, a6, 32
+; RV64-NEXT:    vs4r.v v12, (a6) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    vand.vx v12, v8, a5
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 3
+; RV64-NEXT:    mv a6, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add a5, a5, a6
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 32
+; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    vand.vx v12, v8, t5
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    mv a6, a5
+; RV64-NEXT:    slli a5, a5, 3
+; RV64-NEXT:    add a5, a5, a6
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 32
+; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    vand.vx v12, v8, t6
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 32
+; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    vand.vx v12, v8, s0
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    mv a6, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add a6, a6, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add a5, a5, a6
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 32
+; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    vand.vx v12, v8, s1
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 3
+; RV64-NEXT:    mv a6, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add a5, a5, a6
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 32
+; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    vand.vx v12, v8, s2
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    mv a6, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add a5, a5, a6
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 32
+; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    vand.vx v12, v8, s3
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 4
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 32
+; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    vand.vx v12, v8, s4
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 3
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 32
+; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    vand.vx v12, v8, s5
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    addi a5, sp, 32
+; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    vand.vx v12, v8, s7
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    vand.vx v16, v8, s10
+; RV64-NEXT:    vmul.vv v16, v8, v16
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    mv a6, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add a5, a5, a6
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 32
+; RV64-NEXT:    vs4r.v v16, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    vand.vx v16, v8, s11
+; RV64-NEXT:    vmul.vv v16, v8, v16
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 32
+; RV64-NEXT:    vs4r.v v16, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    vand.vx v16, v8, s9
+; RV64-NEXT:    vmul.vv v28, v8, v16
+; RV64-NEXT:    vand.vx v16, v8, s8
+; RV64-NEXT:    vmul.vv v16, v8, v16
+; RV64-NEXT:    vand.vx v20, v8, s6
+; RV64-NEXT:    vmul.vv v4, v8, v20
+; RV64-NEXT:    vand.vx v20, v8, a0
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a5, a5, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl4r.v v8, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a5, a5, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl4r.v v0, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v8, v0, v8
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 5
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl4r.v v0, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v8, v8, v0
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a5, a5, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a5, a5, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl4r.v v0, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v8, v8, v0
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a5, a5, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl4r.v v0, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v8, v8, v0
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a5, a5, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl4r.v v0, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v8, v8, v0
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl4r.v v0, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v8, v8, v0
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a5, a5, a0
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl4r.v v0, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v8, v8, v0
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl4r.v v0, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v8, v0
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v24
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 6
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v24
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a5, a5, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a5, a5, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v24
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a5, a5, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v24
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a5, a5, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v24
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v24
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a5, a5, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v24
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v24
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v24
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v24
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a5, a5, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v24
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v24
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v24
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v24
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v24
+; RV64-NEXT:    addi a0, sp, 32
+; RV64-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v24, v0, v24
+; RV64-NEXT:    vxor.vv v12, v24, v12
+; RV64-NEXT:    vsll.vi v8, v8, 24
+; RV64-NEXT:    vand.vx v24, v0, a4
+; RV64-NEXT:    vsll.vi v24, v24, 8
+; RV64-NEXT:    vor.vv v8, v8, v24
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v12, v12, v24
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v12, v12, v24
+; RV64-NEXT:    vxor.vv v12, v12, v28
+; RV64-NEXT:    vxor.vv v12, v12, v16
+; RV64-NEXT:    vsrl.vi v16, v0, 8
+; RV64-NEXT:    vand.vx v16, v16, a4
+; RV64-NEXT:    vxor.vv v12, v12, v4
+; RV64-NEXT:    vxor.vv v12, v12, v20
+; RV64-NEXT:    vsrl.vi v12, v12, 24
+; RV64-NEXT:    vor.vv v12, v16, v12
+; RV64-NEXT:    vor.vv v8, v8, v12
+; RV64-NEXT:    vsrl.vi v12, v8, 4
+; RV64-NEXT:    vand.vx v8, v8, a3
+; RV64-NEXT:    vand.vx v12, v12, a3
+; RV64-NEXT:    vsll.vi v8, v8, 4
+; RV64-NEXT:    vor.vv v8, v12, v8
+; RV64-NEXT:    vsrl.vi v12, v8, 2
+; RV64-NEXT:    vand.vx v8, v8, a2
+; RV64-NEXT:    vand.vx v12, v12, a2
+; RV64-NEXT:    vsll.vi v8, v8, 2
+; RV64-NEXT:    vor.vv v8, v12, v8
+; RV64-NEXT:    vsrl.vi v12, v8, 1
+; RV64-NEXT:    vand.vx v8, v8, a1
+; RV64-NEXT:    vand.vx v12, v12, a1
+; RV64-NEXT:    vadd.vv v8, v8, v8
+; RV64-NEXT:    vor.vv v8, v12, v8
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add sp, sp, a0
+; RV64-NEXT:    ld ra, 136(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s0, 128(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s1, 120(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s2, 112(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s3, 104(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s4, 96(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s5, 88(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s6, 80(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s7, 72(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s8, 64(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s9, 56(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s10, 48(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s11, 40(sp) # 8-byte Folded Reload
+; RV64-NEXT:    addi sp, sp, 144
+; RV64-NEXT:    ret
+  %a = call <vscale x 8 x i32> @llvm.clmulr.nxv8i32(<vscale x 8 x i32> %x, <vscale x 8 x i32> %x)
+  ret <vscale x 8 x i32> %a
+}
+
+define <vscale x 16 x i32> @clmulr_nxv16i32(<vscale x 16 x i32> %x, <vscale x 16 x i32> %y) nounwind {
+; RV32-LABEL: clmulr_nxv16i32:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -80
+; RV32-NEXT:    sw ra, 76(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s0, 72(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s1, 68(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s2, 64(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s3, 60(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s4, 56(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s5, 52(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s6, 48(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s7, 44(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s8, 40(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s9, 36(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s10, 32(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s11, 28(sp) # 4-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    sub sp, sp, a0
+; RV32-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
+; RV32-NEXT:    vsrl.vi v16, v8, 8
+; RV32-NEXT:    lui a5, 16
+; RV32-NEXT:    vsrl.vi v24, v8, 24
+; RV32-NEXT:    vsll.vi v0, v8, 24
+; RV32-NEXT:    lui a1, 61681
+; RV32-NEXT:    lui a2, 209715
+; RV32-NEXT:    lui ra, 349525
+; RV32-NEXT:    li t5, 16
+; RV32-NEXT:    li t2, 32
+; RV32-NEXT:    li a7, 64
+; RV32-NEXT:    li t0, 512
+; RV32-NEXT:    li t1, 1024
+; RV32-NEXT:    li a0, 1
+; RV32-NEXT:    lui t3, 1
+; RV32-NEXT:    lui t4, 2
+; RV32-NEXT:    lui t6, 4
+; RV32-NEXT:    lui s0, 8
+; RV32-NEXT:    lui s1, 32
+; RV32-NEXT:    lui s2, 64
+; RV32-NEXT:    lui s3, 128
+; RV32-NEXT:    lui s4, 256
+; RV32-NEXT:    lui s5, 512
+; RV32-NEXT:    lui s6, 1024
+; RV32-NEXT:    lui s7, 2048
+; RV32-NEXT:    lui s8, 4096
+; RV32-NEXT:    lui s9, 8192
+; RV32-NEXT:    lui s10, 16384
+; RV32-NEXT:    lui s11, 32768
+; RV32-NEXT:    addi a4, a5, -256
+; RV32-NEXT:    addi a3, a1, -241
+; RV32-NEXT:    addi a2, a2, 819
+; RV32-NEXT:    addi a1, ra, 1365
+; RV32-NEXT:    slli a0, a0, 11
+; RV32-NEXT:    vand.vx v16, v16, a4
+; RV32-NEXT:    vand.vx v8, v8, a4
+; RV32-NEXT:    vor.vv v16, v16, v24
+; RV32-NEXT:    vsll.vi v8, v8, 8
+; RV32-NEXT:    vor.vv v8, v0, v8
+; RV32-NEXT:    vor.vv v8, v8, v16
+; RV32-NEXT:    vsrl.vi v16, v8, 4
+; RV32-NEXT:    vand.vx v8, v8, a3
+; RV32-NEXT:    vand.vx v16, v16, a3
+; RV32-NEXT:    vsll.vi v8, v8, 4
+; RV32-NEXT:    vor.vv v8, v16, v8
+; RV32-NEXT:    vsrl.vi v16, v8, 2
+; RV32-NEXT:    vand.vx v8, v8, a2
+; RV32-NEXT:    vand.vx v16, v16, a2
+; RV32-NEXT:    vsll.vi v8, v8, 2
+; RV32-NEXT:    vor.vv v8, v16, v8
+; RV32-NEXT:    vsrl.vi v16, v8, 1
+; RV32-NEXT:    vand.vx v8, v8, a1
+; RV32-NEXT:    vand.vx v16, v16, a1
+; RV32-NEXT:    vadd.vv v8, v8, v8
+; RV32-NEXT:    vor.vv v8, v16, v8
+; RV32-NEXT:    vand.vi v16, v8, 2
+; RV32-NEXT:    vand.vi v24, v8, 1
+; RV32-NEXT:    vand.vi v0, v8, 4
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    sw a0, 4(sp) # 4-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a6, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a6, a6, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a6, a6, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a6
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vmul.vv v24, v8, v24
+; RV32-NEXT:    vmul.vv v0, v8, v0
+; RV32-NEXT:    vand.vi v16, v8, 8
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    mv a6, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a6, a6, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a6
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    lui ra, 65536
+; RV32-NEXT:    vand.vx v16, v8, t5
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a6, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a6, a6, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a6, a6, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a6
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    lui t5, 131072
+; RV32-NEXT:    vand.vx v16, v8, t2
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a6, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a6, a6, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a6
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    lui t2, 262144
+; RV32-NEXT:    vand.vx v16, v8, a7
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a6, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a6, a6, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a6
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    lui a7, 524288
+; RV32-NEXT:    li a6, 128
+; RV32-NEXT:    vand.vx v16, v8, a6
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 6
+; RV32-NEXT:    mv a6, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a6
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    li a6, 256
+; RV32-NEXT:    vand.vx v16, v8, a6
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    csrr a6, vlenb
+; RV32-NEXT:    slli a6, a6, 3
+; RV32-NEXT:    mv a0, a6
+; RV32-NEXT:    slli a6, a6, 1
+; RV32-NEXT:    add a0, a0, a6
+; RV32-NEXT:    slli a6, a6, 1
+; RV32-NEXT:    add a0, a0, a6
+; RV32-NEXT:    slli a6, a6, 2
+; RV32-NEXT:    add a6, a6, a0
+; RV32-NEXT:    lw a0, 4(sp) # 4-byte Folded Reload
+; RV32-NEXT:    add a6, sp, a6
+; RV32-NEXT:    addi a6, a6, 16
+; RV32-NEXT:    vs8r.v v16, (a6) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vand.vx v16, v8, t0
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    csrr a6, vlenb
+; RV32-NEXT:    slli a6, a6, 4
+; RV32-NEXT:    mv t0, a6
+; RV32-NEXT:    slli a6, a6, 1
+; RV32-NEXT:    add t0, t0, a6
+; RV32-NEXT:    slli a6, a6, 2
+; RV32-NEXT:    add a6, a6, t0
+; RV32-NEXT:    add a6, sp, a6
+; RV32-NEXT:    addi a6, a6, 16
+; RV32-NEXT:    vs8r.v v16, (a6) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vand.vx v16, v8, t1
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    csrr a6, vlenb
+; RV32-NEXT:    slli a6, a6, 3
+; RV32-NEXT:    mv t0, a6
+; RV32-NEXT:    slli a6, a6, 2
+; RV32-NEXT:    add t0, t0, a6
+; RV32-NEXT:    slli a6, a6, 2
+; RV32-NEXT:    add a6, a6, t0
+; RV32-NEXT:    add a6, sp, a6
+; RV32-NEXT:    addi a6, a6, 16
+; RV32-NEXT:    vs8r.v v16, (a6) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vand.vx v16, v8, a0
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    mv a6, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a6
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vand.vx v16, v8, t3
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a6, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a6, a6, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a6
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vand.vx v16, v8, t4
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a6, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a6
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vand.vx v16, v8, t6
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a6, a0
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, a0, a6
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vand.vx v16, v8, s0
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 7
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vand.vx v16, v8, a5
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a5, a5, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a5, a5, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vand.vx v16, v8, s1
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a5, a5, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vand.vx v16, v8, s2
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a5, a5, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vand.vx v16, v8, s3
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vand.vx v16, v8, s4
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a5, a5, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vand.vx v16, v8, s5
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vand.vx v16, v8, s6
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vand.vx v16, v8, s7
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a5, a5, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vand.vx v16, v8, s8
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vand.vx v16, v8, s9
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vand.vx v16, v8, s10
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 6
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vand.vx v16, v8, s11
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vand.vx v16, v8, ra
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vand.vx v16, v8, t5
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vand.vx v16, v8, t2
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vand.vx v16, v8, a7
+; RV32-NEXT:    vmul.vv v8, v8, v16
+; RV32-NEXT:    addi a0, sp, 16
+; RV32-NEXT:    vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a5, a5, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a5, a5, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v24, v8
+; RV32-NEXT:    vxor.vv v8, v8, v0
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a5, a5, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a5, a5, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a5, a5, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a5, a5, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a5, a5, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 6
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vxor.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a5, a5, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a5, a5, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v16, v8
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a5, a5, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a5, a5, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a5, a5, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 7
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a5, a5, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a5, a5, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a5, a5, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a5, a5, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a5, a5, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a5, a5, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vxor.vv v0, v8, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vxor.vv v0, v0, v24
+; RV32-NEXT:    vsll.vi v16, v16, 24
+; RV32-NEXT:    vand.vx v24, v8, a4
+; RV32-NEXT:    vsll.vi v24, v24, 8
+; RV32-NEXT:    vor.vv v16, v16, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 6
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vxor.vv v24, v0, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vxor.vv v24, v24, v0
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vxor.vv v24, v24, v0
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vxor.vv v24, v24, v0
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vxor.vv v24, v24, v0
+; RV32-NEXT:    addi a0, sp, 16
+; RV32-NEXT:    vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vxor.vv v24, v24, v0
+; RV32-NEXT:    vsrl.vi v8, v8, 8
+; RV32-NEXT:    vand.vx v8, v8, a4
+; RV32-NEXT:    vsrl.vi v24, v24, 24
+; RV32-NEXT:    vor.vv v8, v8, v24
+; RV32-NEXT:    vor.vv v8, v16, v8
+; RV32-NEXT:    vsrl.vi v16, v8, 4
+; RV32-NEXT:    vand.vx v8, v8, a3
+; RV32-NEXT:    vand.vx v16, v16, a3
+; RV32-NEXT:    vsll.vi v8, v8, 4
+; RV32-NEXT:    vor.vv v8, v16, v8
+; RV32-NEXT:    vsrl.vi v16, v8, 2
+; RV32-NEXT:    vand.vx v8, v8, a2
+; RV32-NEXT:    vand.vx v16, v16, a2
+; RV32-NEXT:    vsll.vi v8, v8, 2
+; RV32-NEXT:    vor.vv v8, v16, v8
+; RV32-NEXT:    vsrl.vi v16, v8, 1
+; RV32-NEXT:    vand.vx v8, v8, a1
+; RV32-NEXT:    vand.vx v16, v16, a1
+; RV32-NEXT:    vadd.vv v8, v8, v8
+; RV32-NEXT:    vor.vv v8, v16, v8
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add sp, sp, a0
+; RV32-NEXT:    lw ra, 76(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s0, 72(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s1, 68(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s2, 64(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s3, 60(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s4, 56(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s5, 52(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s6, 48(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s7, 44(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s8, 40(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s9, 36(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s10, 32(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s11, 28(sp) # 4-byte Folded Reload
+; RV32-NEXT:    addi sp, sp, 80
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: clmulr_nxv16i32:
+; RV64:       # %bb.0:
+; RV64-NEXT:    addi sp, sp, -144
+; RV64-NEXT:    sd ra, 136(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s0, 128(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s1, 120(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s2, 112(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s3, 104(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s4, 96(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s5, 88(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s6, 80(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s7, 72(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s8, 64(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s9, 56(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s10, 48(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s11, 40(sp) # 8-byte Folded Spill
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    sub sp, sp, a0
+; RV64-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
+; RV64-NEXT:    vsrl.vi v16, v8, 8
+; RV64-NEXT:    lui a5, 16
+; RV64-NEXT:    vsrl.vi v24, v8, 24
+; RV64-NEXT:    vsll.vi v0, v8, 24
+; RV64-NEXT:    lui a1, 61681
+; RV64-NEXT:    lui a2, 209715
+; RV64-NEXT:    lui ra, 349525
+; RV64-NEXT:    li t5, 16
+; RV64-NEXT:    li t2, 32
+; RV64-NEXT:    li a7, 64
+; RV64-NEXT:    li t0, 512
+; RV64-NEXT:    li t1, 1024
+; RV64-NEXT:    li a0, 1
+; RV64-NEXT:    lui t3, 1
+; RV64-NEXT:    lui t4, 2
+; RV64-NEXT:    lui t6, 4
+; RV64-NEXT:    lui s0, 8
+; RV64-NEXT:    lui s1, 32
+; RV64-NEXT:    lui s2, 64
+; RV64-NEXT:    lui s3, 128
+; RV64-NEXT:    lui s4, 256
+; RV64-NEXT:    lui s5, 512
+; RV64-NEXT:    lui s6, 1024
+; RV64-NEXT:    lui s7, 2048
+; RV64-NEXT:    lui s8, 4096
+; RV64-NEXT:    lui s9, 8192
+; RV64-NEXT:    lui s10, 16384
+; RV64-NEXT:    lui s11, 32768
+; RV64-NEXT:    addi a4, a5, -256
+; RV64-NEXT:    addi a3, a1, -241
+; RV64-NEXT:    addi a2, a2, 819
+; RV64-NEXT:    addi a1, ra, 1365
+; RV64-NEXT:    slli a0, a0, 11
+; RV64-NEXT:    vand.vx v16, v16, a4
+; RV64-NEXT:    vand.vx v8, v8, a4
+; RV64-NEXT:    vor.vv v16, v16, v24
+; RV64-NEXT:    vsll.vi v8, v8, 8
+; RV64-NEXT:    vor.vv v8, v0, v8
+; RV64-NEXT:    vor.vv v8, v8, v16
+; RV64-NEXT:    vsrl.vi v16, v8, 4
+; RV64-NEXT:    vand.vx v8, v8, a3
+; RV64-NEXT:    vand.vx v16, v16, a3
+; RV64-NEXT:    vsll.vi v8, v8, 4
+; RV64-NEXT:    vor.vv v8, v16, v8
+; RV64-NEXT:    vsrl.vi v16, v8, 2
+; RV64-NEXT:    vand.vx v8, v8, a2
+; RV64-NEXT:    vand.vx v16, v16, a2
+; RV64-NEXT:    vsll.vi v8, v8, 2
+; RV64-NEXT:    vor.vv v8, v16, v8
+; RV64-NEXT:    vsrl.vi v16, v8, 1
+; RV64-NEXT:    vand.vx v8, v8, a1
+; RV64-NEXT:    vand.vx v16, v16, a1
+; RV64-NEXT:    vadd.vv v8, v8, v8
+; RV64-NEXT:    vor.vv v8, v16, v8
+; RV64-NEXT:    vand.vi v16, v8, 2
+; RV64-NEXT:    vand.vi v24, v8, 1
+; RV64-NEXT:    vand.vi v0, v8, 4
+; RV64-NEXT:    vmul.vv v16, v8, v16
+; RV64-NEXT:    sd a0, 8(sp) # 8-byte Folded Spill
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    mv a6, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a6, a6, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a6, a6, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a6
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    vmul.vv v24, v8, v24
+; RV64-NEXT:    vmul.vv v0, v8, v0
+; RV64-NEXT:    vand.vi v16, v8, 8
+; RV64-NEXT:    vmul.vv v16, v8, v16
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 5
+; RV64-NEXT:    mv a6, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a6, a6, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a6
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    lui ra, 65536
+; RV64-NEXT:    vand.vx v16, v8, t5
+; RV64-NEXT:    vmul.vv v16, v8, v16
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    mv a6, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a6, a6, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a6, a6, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a6
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    lui t5, 131072
+; RV64-NEXT:    vand.vx v16, v8, t2
+; RV64-NEXT:    vmul.vv v16, v8, v16
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    mv a6, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a6, a6, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a6
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    lui t2, 262144
+; RV64-NEXT:    vand.vx v16, v8, a7
+; RV64-NEXT:    vmul.vv v16, v8, v16
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    mv a6, a0
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a6, a6, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a6
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    lui a7, 524288
+; RV64-NEXT:    li a6, 128
+; RV64-NEXT:    vand.vx v16, v8, a6
+; RV64-NEXT:    vmul.vv v16, v8, v16
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 6
+; RV64-NEXT:    mv a6, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a6
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    li a6, 256
+; RV64-NEXT:    vand.vx v16, v8, a6
+; RV64-NEXT:    vmul.vv v16, v8, v16
+; RV64-NEXT:    csrr a6, vlenb
+; RV64-NEXT:    slli a6, a6, 3
+; RV64-NEXT:    mv a0, a6
+; RV64-NEXT:    slli a6, a6, 1
+; RV64-NEXT:    add a0, a0, a6
+; RV64-NEXT:    slli a6, a6, 1
+; RV64-NEXT:    add a0, a0, a6
+; RV64-NEXT:    slli a6, a6, 2
+; RV64-NEXT:    add a6, a6, a0
+; RV64-NEXT:    ld a0, 8(sp) # 8-byte Folded Reload
+; RV64-NEXT:    add a6, sp, a6
+; RV64-NEXT:    addi a6, a6, 32
+; RV64-NEXT:    vs8r.v v16, (a6) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    vand.vx v16, v8, t0
+; RV64-NEXT:    vmul.vv v16, v8, v16
+; RV64-NEXT:    csrr a6, vlenb
+; RV64-NEXT:    slli a6, a6, 4
+; RV64-NEXT:    mv t0, a6
+; RV64-NEXT:    slli a6, a6, 1
+; RV64-NEXT:    add t0, t0, a6
+; RV64-NEXT:    slli a6, a6, 2
+; RV64-NEXT:    add a6, a6, t0
+; RV64-NEXT:    add a6, sp, a6
+; RV64-NEXT:    addi a6, a6, 32
+; RV64-NEXT:    vs8r.v v16, (a6) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    vand.vx v16, v8, t1
+; RV64-NEXT:    vmul.vv v16, v8, v16
+; RV64-NEXT:    csrr a6, vlenb
+; RV64-NEXT:    slli a6, a6, 3
+; RV64-NEXT:    mv t0, a6
+; RV64-NEXT:    slli a6, a6, 2
+; RV64-NEXT:    add t0, t0, a6
+; RV64-NEXT:    slli a6, a6, 2
+; RV64-NEXT:    add a6, a6, t0
+; RV64-NEXT:    add a6, sp, a6
+; RV64-NEXT:    addi a6, a6, 32
+; RV64-NEXT:    vs8r.v v16, (a6) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    vand.vx v16, v8, a0
+; RV64-NEXT:    vmul.vv v16, v8, v16
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 5
+; RV64-NEXT:    mv a6, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a6
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    vand.vx v16, v8, t3
+; RV64-NEXT:    vmul.vv v16, v8, v16
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    mv a6, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a6, a6, a0
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, a0, a6
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    vand.vx v16, v8, t4
+; RV64-NEXT:    vmul.vv v16, v8, v16
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    mv a6, a0
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, a0, a6
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    vand.vx v16, v8, t6
+; RV64-NEXT:    vmul.vv v16, v8, v16
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    mv a6, a0
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    add a0, a0, a6
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    vand.vx v16, v8, s0
+; RV64-NEXT:    vmul.vv v16, v8, v16
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 7
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    vand.vx v16, v8, a5
+; RV64-NEXT:    vmul.vv v16, v8, v16
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a5, a5, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a5, a5, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    vand.vx v16, v8, s1
+; RV64-NEXT:    vmul.vv v16, v8, v16
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a5, a5, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    vand.vx v16, v8, s2
+; RV64-NEXT:    vmul.vv v16, v8, v16
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a5, a5, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    vand.vx v16, v8, s3
+; RV64-NEXT:    vmul.vv v16, v8, v16
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 5
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    vand.vx v16, v8, s4
+; RV64-NEXT:    vmul.vv v16, v8, v16
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a5, a5, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    vand.vx v16, v8, s5
+; RV64-NEXT:    vmul.vv v16, v8, v16
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    vand.vx v16, v8, s6
+; RV64-NEXT:    vmul.vv v16, v8, v16
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    vand.vx v16, v8, s7
+; RV64-NEXT:    vmul.vv v16, v8, v16
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a5, a5, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    vand.vx v16, v8, s8
+; RV64-NEXT:    vmul.vv v16, v8, v16
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    vand.vx v16, v8, s9
+; RV64-NEXT:    vmul.vv v16, v8, v16
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    vand.vx v16, v8, s10
+; RV64-NEXT:    vmul.vv v16, v8, v16
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 6
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    vand.vx v16, v8, s11
+; RV64-NEXT:    vmul.vv v16, v8, v16
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    vand.vx v16, v8, ra
+; RV64-NEXT:    vmul.vv v16, v8, v16
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    vand.vx v16, v8, t5
+; RV64-NEXT:    vmul.vv v16, v8, v16
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    vand.vx v16, v8, t2
+; RV64-NEXT:    vmul.vv v16, v8, v16
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    vand.vx v16, v8, a7
+; RV64-NEXT:    vmul.vv v8, v8, v16
+; RV64-NEXT:    addi a0, sp, 32
+; RV64-NEXT:    vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a5, a5, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a5, a5, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vxor.vv v8, v24, v8
+; RV64-NEXT:    vxor.vv v8, v8, v0
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 5
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a5, a5, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vxor.vv v8, v8, v16
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a5, a5, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a5, a5, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vxor.vv v8, v8, v16
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a5, a5, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vxor.vv v8, v8, v16
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a5, a5, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vxor.vv v8, v8, v16
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 6
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vxor.vv v16, v8, v16
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a5, a5, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a5, a5, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vxor.vv v8, v16, v8
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a5, a5, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vxor.vv v8, v8, v24
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a5, a5, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vxor.vv v8, v8, v24
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 5
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vxor.vv v8, v8, v24
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a5, a5, a0
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vxor.vv v8, v8, v24
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vxor.vv v8, v8, v24
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vxor.vv v8, v8, v24
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 7
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vxor.vv v8, v8, v24
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a5, a5, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a5, a5, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vxor.vv v8, v8, v24
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a5, a5, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vxor.vv v8, v8, v24
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a5, a5, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vxor.vv v8, v8, v24
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 5
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vxor.vv v8, v8, v24
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a5, a5, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vxor.vv v8, v8, v24
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vxor.vv v8, v8, v24
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vxor.vv v8, v8, v24
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a5, a5, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vxor.vv v8, v8, v24
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v8, v24
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v24
+; RV64-NEXT:    vsll.vi v16, v16, 24
+; RV64-NEXT:    vand.vx v24, v8, a4
+; RV64-NEXT:    vsll.vi v24, v24, 8
+; RV64-NEXT:    vor.vv v16, v16, v24
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 6
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vxor.vv v24, v0, v24
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vxor.vv v24, v24, v0
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vxor.vv v24, v24, v0
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vxor.vv v24, v24, v0
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vxor.vv v24, v24, v0
+; RV64-NEXT:    addi a0, sp, 32
+; RV64-NEXT:    vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vxor.vv v24, v24, v0
+; RV64-NEXT:    vsrl.vi v8, v8, 8
+; RV64-NEXT:    vand.vx v8, v8, a4
+; RV64-NEXT:    vsrl.vi v24, v24, 24
+; RV64-NEXT:    vor.vv v8, v8, v24
+; RV64-NEXT:    vor.vv v8, v16, v8
+; RV64-NEXT:    vsrl.vi v16, v8, 4
+; RV64-NEXT:    vand.vx v8, v8, a3
+; RV64-NEXT:    vand.vx v16, v16, a3
+; RV64-NEXT:    vsll.vi v8, v8, 4
+; RV64-NEXT:    vor.vv v8, v16, v8
+; RV64-NEXT:    vsrl.vi v16, v8, 2
+; RV64-NEXT:    vand.vx v8, v8, a2
+; RV64-NEXT:    vand.vx v16, v16, a2
+; RV64-NEXT:    vsll.vi v8, v8, 2
+; RV64-NEXT:    vor.vv v8, v16, v8
+; RV64-NEXT:    vsrl.vi v16, v8, 1
+; RV64-NEXT:    vand.vx v8, v8, a1
+; RV64-NEXT:    vand.vx v16, v16, a1
+; RV64-NEXT:    vadd.vv v8, v8, v8
+; RV64-NEXT:    vor.vv v8, v16, v8
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add sp, sp, a0
+; RV64-NEXT:    ld ra, 136(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s0, 128(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s1, 120(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s2, 112(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s3, 104(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s4, 96(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s5, 88(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s6, 80(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s7, 72(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s8, 64(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s9, 56(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s10, 48(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s11, 40(sp) # 8-byte Folded Reload
+; RV64-NEXT:    addi sp, sp, 144
+; RV64-NEXT:    ret
+  %a = call <vscale x 16 x i32> @llvm.clmulr.nxv16i32(<vscale x 16 x i32> %x, <vscale x 16 x i32> %y)
+  ret <vscale x 16 x i32> %a
+}
+
+define <vscale x 1 x i64> @clmulr_nxv1i64(<vscale x 1 x i64> %x, <vscale x 1 x i64> %y) nounwind {
+; RV32-LABEL: clmulr_nxv1i64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -352
+; RV32-NEXT:    sw ra, 348(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s0, 344(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s1, 340(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s2, 336(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s3, 332(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s4, 328(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s5, 324(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s6, 320(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s7, 316(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s8, 312(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s9, 308(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s10, 304(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s11, 300(sp) # 4-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    sub sp, sp, a0
+; RV32-NEXT:    lui s7, 1044480
+; RV32-NEXT:    lui a7, 524288
+; RV32-NEXT:    li s11, 1
+; RV32-NEXT:    li s8, 2
+; RV32-NEXT:    li s9, 4
+; RV32-NEXT:    li s10, 8
+; RV32-NEXT:    li a3, 16
+; RV32-NEXT:    li a4, 32
+; RV32-NEXT:    li a5, 64
+; RV32-NEXT:    li a6, 128
+; RV32-NEXT:    li ra, 256
+; RV32-NEXT:    li a0, 512
+; RV32-NEXT:    li a1, 1024
+; RV32-NEXT:    lui a2, 1
+; RV32-NEXT:    lui t0, 2
+; RV32-NEXT:    lui t1, 4
+; RV32-NEXT:    lui t2, 8
+; RV32-NEXT:    lui t3, 16
+; RV32-NEXT:    lui t4, 32
+; RV32-NEXT:    lui t5, 64
+; RV32-NEXT:    lui t6, 128
+; RV32-NEXT:    lui s0, 256
+; RV32-NEXT:    lui s1, 512
+; RV32-NEXT:    lui s2, 1024
+; RV32-NEXT:    lui s3, 2048
+; RV32-NEXT:    lui s4, 4096
+; RV32-NEXT:    lui s5, 8192
+; RV32-NEXT:    lui s6, 16384
+; RV32-NEXT:    sw s7, 272(sp)
+; RV32-NEXT:    lui s7, 32768
+; RV32-NEXT:    sw zero, 276(sp)
+; RV32-NEXT:    sw a7, 264(sp)
+; RV32-NEXT:    sw zero, 268(sp)
+; RV32-NEXT:    sw zero, 256(sp)
+; RV32-NEXT:    sw s11, 260(sp)
+; RV32-NEXT:    sw zero, 248(sp)
+; RV32-NEXT:    sw s8, 252(sp)
+; RV32-NEXT:    lui s8, 65536
+; RV32-NEXT:    sw zero, 240(sp)
+; RV32-NEXT:    sw s9, 244(sp)
+; RV32-NEXT:    lui s9, 131072
+; RV32-NEXT:    sw zero, 232(sp)
+; RV32-NEXT:    sw s10, 236(sp)
+; RV32-NEXT:    lui s10, 262144
+; RV32-NEXT:    sw zero, 224(sp)
+; RV32-NEXT:    sw a3, 228(sp)
+; RV32-NEXT:    sw zero, 216(sp)
+; RV32-NEXT:    sw a4, 220(sp)
+; RV32-NEXT:    sw zero, 208(sp)
+; RV32-NEXT:    sw a5, 212(sp)
+; RV32-NEXT:    sw zero, 200(sp)
+; RV32-NEXT:    sw a6, 204(sp)
+; RV32-NEXT:    sw zero, 192(sp)
+; RV32-NEXT:    sw ra, 196(sp)
+; RV32-NEXT:    sw zero, 184(sp)
+; RV32-NEXT:    sw a0, 188(sp)
+; RV32-NEXT:    sw zero, 176(sp)
+; RV32-NEXT:    sw a1, 180(sp)
+; RV32-NEXT:    slli s11, s11, 11
+; RV32-NEXT:    sw zero, 168(sp)
+; RV32-NEXT:    sw s11, 172(sp)
+; RV32-NEXT:    sw zero, 160(sp)
+; RV32-NEXT:    sw a2, 164(sp)
+; RV32-NEXT:    sw zero, 152(sp)
+; RV32-NEXT:    sw t0, 156(sp)
+; RV32-NEXT:    sw zero, 144(sp)
+; RV32-NEXT:    sw t1, 148(sp)
+; RV32-NEXT:    sw zero, 136(sp)
+; RV32-NEXT:    sw t2, 140(sp)
+; RV32-NEXT:    sw zero, 128(sp)
+; RV32-NEXT:    sw t3, 132(sp)
+; RV32-NEXT:    sw zero, 120(sp)
+; RV32-NEXT:    sw t4, 124(sp)
+; RV32-NEXT:    sw zero, 112(sp)
+; RV32-NEXT:    sw t5, 116(sp)
+; RV32-NEXT:    sw zero, 104(sp)
+; RV32-NEXT:    sw t6, 108(sp)
+; RV32-NEXT:    sw zero, 96(sp)
+; RV32-NEXT:    sw s0, 100(sp)
+; RV32-NEXT:    sw zero, 88(sp)
+; RV32-NEXT:    sw s1, 92(sp)
+; RV32-NEXT:    sw zero, 80(sp)
+; RV32-NEXT:    sw s2, 84(sp)
+; RV32-NEXT:    sw zero, 72(sp)
+; RV32-NEXT:    sw s3, 76(sp)
+; RV32-NEXT:    sw zero, 64(sp)
+; RV32-NEXT:    sw s4, 68(sp)
+; RV32-NEXT:    sw zero, 56(sp)
+; RV32-NEXT:    sw s5, 60(sp)
+; RV32-NEXT:    sw zero, 48(sp)
+; RV32-NEXT:    sw s6, 52(sp)
+; RV32-NEXT:    sw zero, 40(sp)
+; RV32-NEXT:    sw s7, 44(sp)
+; RV32-NEXT:    sw zero, 32(sp)
+; RV32-NEXT:    sw s8, 36(sp)
+; RV32-NEXT:    sw zero, 24(sp)
+; RV32-NEXT:    sw s9, 28(sp)
+; RV32-NEXT:    sw zero, 16(sp)
+; RV32-NEXT:    sw s10, 20(sp)
+; RV32-NEXT:    sw zero, 8(sp)
+; RV32-NEXT:    sw a7, 12(sp)
+; RV32-NEXT:    lui a0, 61681
+; RV32-NEXT:    addi a0, a0, -241
+; RV32-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
+; RV32-NEXT:    vmv.v.x v3, a0
+; RV32-NEXT:    lui a0, 209715
+; RV32-NEXT:    addi a0, a0, 819
+; RV32-NEXT:    vmv.v.x v2, a0
+; RV32-NEXT:    lui a0, 349525
+; RV32-NEXT:    addi a0, a0, 1365
+; RV32-NEXT:    vmv.v.x v1, a0
+; RV32-NEXT:    addi a0, sp, 272
+; RV32-NEXT:    vsetvli a1, zero, e64, m1, ta, ma
+; RV32-NEXT:    vlse64.v v0, (a0), zero
+; RV32-NEXT:    addi a0, sp, 264
+; RV32-NEXT:    vlse64.v v13, (a0), zero
+; RV32-NEXT:    addi a0, sp, 256
+; RV32-NEXT:    vlse64.v v14, (a0), zero
+; RV32-NEXT:    addi a0, sp, 248
+; RV32-NEXT:    vlse64.v v15, (a0), zero
+; RV32-NEXT:    addi a0, sp, 240
+; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    addi a0, sp, 232
+; RV32-NEXT:    vlse64.v v17, (a0), zero
+; RV32-NEXT:    addi a0, sp, 224
+; RV32-NEXT:    vlse64.v v18, (a0), zero
+; RV32-NEXT:    addi a0, sp, 216
+; RV32-NEXT:    vlse64.v v19, (a0), zero
+; RV32-NEXT:    addi a0, sp, 208
+; RV32-NEXT:    vlse64.v v20, (a0), zero
+; RV32-NEXT:    addi a0, sp, 200
+; RV32-NEXT:    vlse64.v v21, (a0), zero
+; RV32-NEXT:    addi a0, sp, 192
+; RV32-NEXT:    vlse64.v v22, (a0), zero
+; RV32-NEXT:    addi a0, sp, 184
+; RV32-NEXT:    vlse64.v v23, (a0), zero
+; RV32-NEXT:    addi a0, sp, 176
+; RV32-NEXT:    vlse64.v v24, (a0), zero
+; RV32-NEXT:    addi a0, sp, 168
+; RV32-NEXT:    vlse64.v v25, (a0), zero
+; RV32-NEXT:    addi a0, sp, 160
+; RV32-NEXT:    vlse64.v v26, (a0), zero
+; RV32-NEXT:    addi a0, sp, 152
+; RV32-NEXT:    vlse64.v v9, (a0), zero
+; RV32-NEXT:    addi a0, sp, 144
+; RV32-NEXT:    vlse64.v v10, (a0), zero
+; RV32-NEXT:    addi a0, sp, 136
+; RV32-NEXT:    vlse64.v v29, (a0), zero
+; RV32-NEXT:    addi a0, sp, 128
+; RV32-NEXT:    vlse64.v v30, (a0), zero
+; RV32-NEXT:    addi a0, sp, 120
+; RV32-NEXT:    vlse64.v v31, (a0), zero
+; RV32-NEXT:    addi a0, sp, 112
+; RV32-NEXT:    vlse64.v v11, (a0), zero
+; RV32-NEXT:    addi a0, sp, 104
+; RV32-NEXT:    vlse64.v v12, (a0), zero
+; RV32-NEXT:    addi a0, sp, 96
+; RV32-NEXT:    vlse64.v v5, (a0), zero
+; RV32-NEXT:    addi a0, sp, 88
+; RV32-NEXT:    vlse64.v v4, (a0), zero
+; RV32-NEXT:    li a6, 56
+; RV32-NEXT:    vsrl.vi v27, v8, 24
+; RV32-NEXT:    vsrl.vx v28, v8, a6
+; RV32-NEXT:    li ra, 40
+; RV32-NEXT:    vsrl.vx v7, v8, ra
+; RV32-NEXT:    vsll.vx v6, v8, a6
+; RV32-NEXT:    addi a4, t3, -256
+; RV32-NEXT:    vand.vx v7, v7, a4
+; RV32-NEXT:    vor.vv v28, v7, v28
+; RV32-NEXT:    vand.vx v7, v8, a4
+; RV32-NEXT:    vsll.vx v7, v7, ra
+; RV32-NEXT:    vor.vv v7, v6, v7
+; RV32-NEXT:    vsrl.vi v6, v8, 8
+; RV32-NEXT:    lui a5, 4080
+; RV32-NEXT:    vand.vx v27, v27, a5
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v0, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vv v6, v6, v0
+; RV32-NEXT:    vor.vv v27, v6, v27
+; RV32-NEXT:    addi a3, sp, 80
+; RV32-NEXT:    vlse64.v v6, (a3), zero
+; RV32-NEXT:    vor.vv v27, v27, v28
+; RV32-NEXT:    vand.vx v28, v8, a5
+; RV32-NEXT:    vsll.vi v28, v28, 24
+; RV32-NEXT:    vand.vv v8, v8, v0
+; RV32-NEXT:    vsll.vi v8, v8, 8
+; RV32-NEXT:    vor.vv v8, v28, v8
+; RV32-NEXT:    addi a3, sp, 72
+; RV32-NEXT:    vlse64.v v28, (a3), zero
+; RV32-NEXT:    vor.vv v8, v7, v8
+; RV32-NEXT:    addi a3, sp, 64
+; RV32-NEXT:    vlse64.v v7, (a3), zero
+; RV32-NEXT:    vor.vv v8, v8, v27
+; RV32-NEXT:    vsrl.vi v27, v8, 4
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v3, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vv v8, v8, v3
+; RV32-NEXT:    vand.vv v27, v27, v3
+; RV32-NEXT:    vsll.vi v8, v8, 4
+; RV32-NEXT:    vor.vv v8, v27, v8
+; RV32-NEXT:    vsrl.vi v27, v8, 2
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v2, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vv v8, v8, v2
+; RV32-NEXT:    vand.vv v27, v27, v2
+; RV32-NEXT:    vsll.vi v8, v8, 2
+; RV32-NEXT:    vor.vv v8, v27, v8
+; RV32-NEXT:    vsrl.vi v27, v8, 1
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v1, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vv v8, v8, v1
+; RV32-NEXT:    vand.vv v27, v27, v1
+; RV32-NEXT:    vadd.vv v8, v8, v8
+; RV32-NEXT:    vor.vv v8, v27, v8
+; RV32-NEXT:    addi a3, sp, 56
+; RV32-NEXT:    vlse64.v v27, (a3), zero
+; RV32-NEXT:    vand.vv v13, v8, v13
+; RV32-NEXT:    vand.vv v14, v8, v14
+; RV32-NEXT:    vand.vv v15, v8, v15
+; RV32-NEXT:    vand.vv v16, v8, v16
+; RV32-NEXT:    vand.vv v17, v8, v17
+; RV32-NEXT:    vand.vv v18, v8, v18
+; RV32-NEXT:    vand.vv v19, v8, v19
+; RV32-NEXT:    vand.vv v20, v8, v20
+; RV32-NEXT:    vand.vv v21, v8, v21
+; RV32-NEXT:    vand.vv v22, v8, v22
+; RV32-NEXT:    vand.vv v23, v8, v23
+; RV32-NEXT:    vand.vv v24, v8, v24
+; RV32-NEXT:    vand.vv v25, v8, v25
+; RV32-NEXT:    vand.vv v26, v8, v26
+; RV32-NEXT:    vand.vv v3, v8, v9
+; RV32-NEXT:    vand.vv v2, v8, v10
+; RV32-NEXT:    vand.vv v29, v8, v29
+; RV32-NEXT:    vand.vv v30, v8, v30
+; RV32-NEXT:    vand.vv v31, v8, v31
+; RV32-NEXT:    vand.vv v0, v8, v11
+; RV32-NEXT:    vand.vv v9, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vv v5, v8, v5
+; RV32-NEXT:    vand.vv v4, v8, v4
+; RV32-NEXT:    vand.vv v6, v8, v6
+; RV32-NEXT:    vand.vv v9, v8, v28
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    addi a3, sp, 48
+; RV32-NEXT:    addi a0, sp, 40
+; RV32-NEXT:    vlse64.v v9, (a3), zero
+; RV32-NEXT:    vlse64.v v10, (a0), zero
+; RV32-NEXT:    vand.vv v11, v8, v7
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v11, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vv v11, v8, v27
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v11, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vv v9, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    addi a2, sp, 32
+; RV32-NEXT:    addi a3, sp, 24
+; RV32-NEXT:    addi a1, sp, 16
+; RV32-NEXT:    addi a0, sp, 8
+; RV32-NEXT:    vlse64.v v9, (a2), zero
+; RV32-NEXT:    vlse64.v v10, (a3), zero
+; RV32-NEXT:    vlse64.v v11, (a1), zero
+; RV32-NEXT:    vlse64.v v12, (a0), zero
+; RV32-NEXT:    vand.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a1, a0, 5
+; RV32-NEXT:    add a0, a1, a0
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vv v9, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vv v9, v8, v11
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a1, a0, 5
+; RV32-NEXT:    sub a0, a1, a0
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vv v9, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vi v9, v8, 2
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vi v9, v8, 1
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vi v9, v8, 4
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vi v9, v8, 8
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    li a0, 16
+; RV32-NEXT:    vand.vx v9, v8, a0
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    li a0, 32
+; RV32-NEXT:    vand.vx v9, v8, a0
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    li a0, 64
+; RV32-NEXT:    vand.vx v9, v8, a0
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    li a0, 128
+; RV32-NEXT:    vand.vx v9, v8, a0
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    li a0, 256
+; RV32-NEXT:    vand.vx v9, v8, a0
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    li a0, 512
+; RV32-NEXT:    vand.vx v9, v8, a0
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    li a0, 1024
+; RV32-NEXT:    vand.vx v9, v8, a0
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vx v9, v8, s11
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    lui a0, 1
+; RV32-NEXT:    vand.vx v9, v8, a0
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vx v9, v8, t0
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a1, a0, 4
+; RV32-NEXT:    add a0, a1, a0
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vx v9, v8, t1
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vx v9, v8, t2
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a1, a0, 4
+; RV32-NEXT:    sub a0, a1, a0
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vx v9, v8, t3
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vx v9, v8, t4
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vx v9, v8, t5
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vx v9, v8, t6
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vx v9, v8, s0
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vx v9, v8, s1
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a1, a0, 3
+; RV32-NEXT:    add a0, a1, a0
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vx v9, v8, s2
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vx v9, v8, s3
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a1, a0, 3
+; RV32-NEXT:    sub a0, a1, a0
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vx v9, v8, s4
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vx v9, v8, s5
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a1, a0, 2
+; RV32-NEXT:    add a0, a1, a0
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vx v9, v8, s6
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vx v9, v8, s7
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a1, a0, 1
+; RV32-NEXT:    add a0, a1, a0
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vx v9, v8, s8
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vx v9, v8, s9
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vx v1, v8, s10
+; RV32-NEXT:    vmul.vv v1, v8, v1
+; RV32-NEXT:    vmul.vv v9, v8, v13
+; RV32-NEXT:    addi a0, sp, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vmul.vv v10, v8, v14
+; RV32-NEXT:    vmul.vv v11, v8, v15
+; RV32-NEXT:    vmul.vv v12, v8, v16
+; RV32-NEXT:    vmul.vv v13, v8, v17
+; RV32-NEXT:    vmul.vv v14, v8, v18
+; RV32-NEXT:    vmul.vv v15, v8, v19
+; RV32-NEXT:    vmul.vv v16, v8, v20
+; RV32-NEXT:    vmul.vv v17, v8, v21
+; RV32-NEXT:    vmul.vv v18, v8, v22
+; RV32-NEXT:    vmul.vv v19, v8, v23
+; RV32-NEXT:    vmul.vv v20, v8, v24
+; RV32-NEXT:    vmul.vv v21, v8, v25
+; RV32-NEXT:    vmul.vv v22, v8, v26
+; RV32-NEXT:    vmul.vv v23, v8, v3
+; RV32-NEXT:    vmul.vv v24, v8, v2
+; RV32-NEXT:    vmul.vv v25, v8, v29
+; RV32-NEXT:    vmul.vv v26, v8, v30
+; RV32-NEXT:    vmul.vv v27, v8, v31
+; RV32-NEXT:    vmul.vv v28, v8, v0
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v29, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vmul.vv v29, v8, v29
+; RV32-NEXT:    vmul.vv v30, v8, v5
+; RV32-NEXT:    vmul.vv v31, v8, v4
+; RV32-NEXT:    vmul.vv v7, v8, v6
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v6, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vmul.vv v6, v8, v6
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v5, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vmul.vv v5, v8, v5
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v4, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vmul.vv v4, v8, v4
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v3, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vmul.vv v3, v8, v3
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vmul.vv v2, v8, v2
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a1, a0, 5
+; RV32-NEXT:    add a0, a1, a0
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vmul.vv v0, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a1, a0, 5
+; RV32-NEXT:    sub a0, a1, a0
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vmul.vv v8, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vi v8, v8, 0
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a1, a0, 4
+; RV32-NEXT:    add a0, a1, a0
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a1, a0, 4
+; RV32-NEXT:    sub a0, a1, a0
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a1, a0, 3
+; RV32-NEXT:    add a0, a1, a0
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a1, a0, 3
+; RV32-NEXT:    sub a0, a1, a0
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a1, a0, 2
+; RV32-NEXT:    add a0, a1, a0
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a1, a0, 1
+; RV32-NEXT:    add a0, a1, a0
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    vxor.vv v8, v8, v1
+; RV32-NEXT:    addi a0, sp, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    vxor.vv v8, v8, v11
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    vxor.vv v8, v8, v13
+; RV32-NEXT:    vxor.vv v8, v8, v14
+; RV32-NEXT:    vxor.vv v8, v8, v15
+; RV32-NEXT:    vxor.vv v8, v8, v16
+; RV32-NEXT:    vxor.vv v8, v8, v17
+; RV32-NEXT:    vxor.vv v8, v8, v18
+; RV32-NEXT:    vxor.vv v8, v8, v19
+; RV32-NEXT:    vxor.vv v8, v8, v20
+; RV32-NEXT:    vxor.vv v8, v8, v21
+; RV32-NEXT:    vxor.vv v8, v8, v22
+; RV32-NEXT:    vxor.vv v8, v8, v23
+; RV32-NEXT:    vxor.vv v8, v8, v24
+; RV32-NEXT:    vxor.vv v8, v8, v25
+; RV32-NEXT:    vxor.vv v8, v8, v26
+; RV32-NEXT:    vxor.vv v8, v8, v27
+; RV32-NEXT:    vxor.vv v8, v8, v28
+; RV32-NEXT:    vxor.vv v8, v8, v29
+; RV32-NEXT:    vxor.vv v8, v8, v30
+; RV32-NEXT:    vxor.vv v8, v8, v31
+; RV32-NEXT:    vxor.vv v8, v8, v7
+; RV32-NEXT:    vxor.vv v8, v8, v6
+; RV32-NEXT:    vxor.vv v8, v8, v5
+; RV32-NEXT:    vxor.vv v8, v8, v4
+; RV32-NEXT:    vxor.vv v8, v8, v3
+; RV32-NEXT:    vxor.vv v8, v8, v2
+; RV32-NEXT:    vxor.vv v8, v8, v0
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    vsrl.vx v9, v8, a6
+; RV32-NEXT:    vsll.vx v10, v8, a6
+; RV32-NEXT:    vsrl.vx v11, v8, ra
+; RV32-NEXT:    vand.vx v12, v8, a4
+; RV32-NEXT:    vand.vx v11, v11, a4
+; RV32-NEXT:    vsrl.vi v13, v8, 24
+; RV32-NEXT:    vand.vx v14, v8, a5
+; RV32-NEXT:    vand.vx v13, v13, a5
+; RV32-NEXT:    vsll.vx v12, v12, ra
+; RV32-NEXT:    vsrl.vi v15, v8, 8
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v16, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vand.vv v8, v8, v16
+; RV32-NEXT:    vand.vv v15, v15, v16
+; RV32-NEXT:    vor.vv v9, v11, v9
+; RV32-NEXT:    vor.vv v11, v15, v13
+; RV32-NEXT:    vsll.vi v8, v8, 8
+; RV32-NEXT:    vsll.vi v13, v14, 24
+; RV32-NEXT:    vor.vv v8, v13, v8
+; RV32-NEXT:    vor.vv v10, v10, v12
+; RV32-NEXT:    vor.vv v9, v11, v9
+; RV32-NEXT:    vor.vv v8, v10, v8
+; RV32-NEXT:    vor.vv v8, v8, v9
+; RV32-NEXT:    vsrl.vi v9, v8, 4
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v10, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vand.vv v8, v8, v10
+; RV32-NEXT:    vand.vv v9, v9, v10
+; RV32-NEXT:    vsll.vi v8, v8, 4
+; RV32-NEXT:    vor.vv v8, v9, v8
+; RV32-NEXT:    vsrl.vi v9, v8, 2
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v10, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vand.vv v8, v8, v10
+; RV32-NEXT:    vand.vv v9, v9, v10
+; RV32-NEXT:    vsll.vi v8, v8, 2
+; RV32-NEXT:    vor.vv v8, v9, v8
+; RV32-NEXT:    vsrl.vi v9, v8, 1
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v10, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vand.vv v8, v8, v10
+; RV32-NEXT:    vand.vv v9, v9, v10
+; RV32-NEXT:    vadd.vv v8, v8, v8
+; RV32-NEXT:    vor.vv v8, v9, v8
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add sp, sp, a0
+; RV32-NEXT:    lw ra, 348(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s0, 344(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s1, 340(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s2, 336(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s3, 332(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s4, 328(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s5, 324(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s6, 320(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s7, 316(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s8, 312(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s9, 308(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s10, 304(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s11, 300(sp) # 4-byte Folded Reload
+; RV32-NEXT:    addi sp, sp, 352
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: clmulr_nxv1i64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    addi sp, sp, -224
+; RV64-NEXT:    sd ra, 216(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s0, 208(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s1, 200(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s2, 192(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s3, 184(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s4, 176(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s5, 168(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s6, 160(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s7, 152(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s8, 144(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s9, 136(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s10, 128(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s11, 120(sp) # 8-byte Folded Spill
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    sub sp, sp, a0
+; RV64-NEXT:    vsetvli a1, zero, e64, m1, ta, ma
+; RV64-NEXT:    vsrl.vi v10, v8, 24
+; RV64-NEXT:    vsrl.vi v9, v8, 8
+; RV64-NEXT:    li t2, 255
+; RV64-NEXT:    lui t6, 61681
+; RV64-NEXT:    lui s0, 209715
+; RV64-NEXT:    lui s1, 349525
+; RV64-NEXT:    li s10, 16
+; RV64-NEXT:    li s9, 32
+; RV64-NEXT:    li s8, 64
+; RV64-NEXT:    li s5, 128
+; RV64-NEXT:    li s6, 256
+; RV64-NEXT:    li t5, 512
+; RV64-NEXT:    li t3, 1024
+; RV64-NEXT:    li t0, 1
+; RV64-NEXT:    lui s7, 1
+; RV64-NEXT:    lui a1, 2
+; RV64-NEXT:    lui t4, 4
+; RV64-NEXT:    lui t1, 8
+; RV64-NEXT:    lui a7, 32
+; RV64-NEXT:    lui a6, 64
+; RV64-NEXT:    lui a5, 128
+; RV64-NEXT:    lui a4, 256
+; RV64-NEXT:    lui a3, 512
+; RV64-NEXT:    lui a2, 1024
+; RV64-NEXT:    li s11, 56
+; RV64-NEXT:    vsrl.vx v11, v8, s11
+; RV64-NEXT:    li ra, 40
+; RV64-NEXT:    vsrl.vx v12, v8, ra
+; RV64-NEXT:    addi t6, t6, -241
+; RV64-NEXT:    addi s2, s0, 819
+; RV64-NEXT:    addi s3, s1, 1365
+; RV64-NEXT:    slli s1, t6, 32
+; RV64-NEXT:    add s4, t6, s1
+; RV64-NEXT:    slli t6, s2, 32
+; RV64-NEXT:    add s2, s2, t6
+; RV64-NEXT:    slli t6, s3, 32
+; RV64-NEXT:    add s3, s3, t6
+; RV64-NEXT:    lui s0, 16
+; RV64-NEXT:    addi s1, s0, -256
+; RV64-NEXT:    lui a0, 4080
+; RV64-NEXT:    vand.vx v10, v10, a0
+; RV64-NEXT:    slli t6, t2, 24
+; RV64-NEXT:    vand.vx v13, v8, a0
+; RV64-NEXT:    vsll.vx v14, v8, s11
+; RV64-NEXT:    vand.vx v12, v12, s1
+; RV64-NEXT:    vand.vx v9, v9, t6
+; RV64-NEXT:    vsll.vi v13, v13, 24
+; RV64-NEXT:    vand.vx v15, v8, t6
+; RV64-NEXT:    vand.vx v8, v8, s1
+; RV64-NEXT:    vor.vv v11, v12, v11
+; RV64-NEXT:    vor.vv v9, v9, v10
+; RV64-NEXT:    vsll.vi v10, v15, 8
+; RV64-NEXT:    vsll.vx v8, v8, ra
+; RV64-NEXT:    vor.vv v9, v9, v11
+; RV64-NEXT:    vor.vv v10, v13, v10
+; RV64-NEXT:    vor.vv v8, v14, v8
+; RV64-NEXT:    vor.vv v8, v8, v10
+; RV64-NEXT:    vor.vv v8, v8, v9
+; RV64-NEXT:    vsrl.vi v9, v8, 4
+; RV64-NEXT:    vand.vx v8, v8, s4
+; RV64-NEXT:    vand.vx v9, v9, s4
+; RV64-NEXT:    vsll.vi v8, v8, 4
+; RV64-NEXT:    vor.vv v8, v9, v8
+; RV64-NEXT:    vsrl.vi v9, v8, 2
+; RV64-NEXT:    vand.vx v8, v8, s2
+; RV64-NEXT:    vand.vx v9, v9, s2
+; RV64-NEXT:    vsll.vi v8, v8, 2
+; RV64-NEXT:    vor.vv v8, v9, v8
+; RV64-NEXT:    vsrl.vi v9, v8, 1
+; RV64-NEXT:    vand.vx v8, v8, s3
+; RV64-NEXT:    vand.vx v9, v9, s3
+; RV64-NEXT:    vadd.vv v8, v8, v8
+; RV64-NEXT:    vor.vv v8, v9, v8
+; RV64-NEXT:    vand.vx v7, v8, s10
+; RV64-NEXT:    lui t2, 4096
+; RV64-NEXT:    vand.vx v6, v8, s9
+; RV64-NEXT:    lui s9, 8192
+; RV64-NEXT:    vand.vx v5, v8, s8
+; RV64-NEXT:    lui s8, 16384
+; RV64-NEXT:    vand.vx v4, v8, s5
+; RV64-NEXT:    lui s10, 32768
+; RV64-NEXT:    vand.vx v13, v8, s6
+; RV64-NEXT:    lui s11, 65536
+; RV64-NEXT:    vand.vx v14, v8, t5
+; RV64-NEXT:    lui t5, 131072
+; RV64-NEXT:    vand.vx v15, v8, t3
+; RV64-NEXT:    slli t3, t0, 11
+; RV64-NEXT:    vand.vx v16, v8, t3
+; RV64-NEXT:    lui t3, 262144
+; RV64-NEXT:    vand.vx v17, v8, s7
+; RV64-NEXT:    slli a0, t0, 31
+; RV64-NEXT:    sd a0, 96(sp) # 8-byte Folded Spill
+; RV64-NEXT:    vand.vx v18, v8, a1
+; RV64-NEXT:    slli a0, t0, 32
+; RV64-NEXT:    sd a0, 88(sp) # 8-byte Folded Spill
+; RV64-NEXT:    vand.vx v19, v8, t4
+; RV64-NEXT:    slli a0, t0, 33
+; RV64-NEXT:    sd a0, 80(sp) # 8-byte Folded Spill
+; RV64-NEXT:    vand.vx v20, v8, t1
+; RV64-NEXT:    slli a0, t0, 34
+; RV64-NEXT:    sd a0, 72(sp) # 8-byte Folded Spill
+; RV64-NEXT:    vand.vx v21, v8, s0
+; RV64-NEXT:    slli a0, t0, 35
+; RV64-NEXT:    sd a0, 64(sp) # 8-byte Folded Spill
+; RV64-NEXT:    vand.vx v22, v8, a7
+; RV64-NEXT:    slli a0, t0, 36
+; RV64-NEXT:    sd a0, 56(sp) # 8-byte Folded Spill
+; RV64-NEXT:    vand.vx v23, v8, a6
+; RV64-NEXT:    slli a0, t0, 37
+; RV64-NEXT:    sd a0, 48(sp) # 8-byte Folded Spill
+; RV64-NEXT:    vand.vx v24, v8, a5
+; RV64-NEXT:    slli a0, t0, 38
+; RV64-NEXT:    sd a0, 40(sp) # 8-byte Folded Spill
+; RV64-NEXT:    vand.vx v25, v8, a4
+; RV64-NEXT:    slli a0, t0, 39
+; RV64-NEXT:    sd a0, 32(sp) # 8-byte Folded Spill
+; RV64-NEXT:    vand.vx v26, v8, a3
+; RV64-NEXT:    slli a0, t0, 40
+; RV64-NEXT:    sd a0, 24(sp) # 8-byte Folded Spill
+; RV64-NEXT:    vand.vx v27, v8, a2
+; RV64-NEXT:    slli a0, t0, 41
+; RV64-NEXT:    sd a0, 16(sp) # 8-byte Folded Spill
+; RV64-NEXT:    lui a0, 2048
+; RV64-NEXT:    vand.vx v28, v8, a0
+; RV64-NEXT:    slli s5, t0, 42
+; RV64-NEXT:    vand.vx v29, v8, t2
+; RV64-NEXT:    slli s6, t0, 43
+; RV64-NEXT:    vand.vx v30, v8, s9
+; RV64-NEXT:    slli s7, t0, 44
+; RV64-NEXT:    vand.vx v10, v8, s8
+; RV64-NEXT:    slli s8, t0, 45
+; RV64-NEXT:    vand.vx v11, v8, s10
+; RV64-NEXT:    slli s9, t0, 46
+; RV64-NEXT:    vand.vx v12, v8, s11
+; RV64-NEXT:    slli s10, t0, 47
+; RV64-NEXT:    vand.vx v9, v8, t5
+; RV64-NEXT:    slli s11, t0, 48
+; RV64-NEXT:    vand.vx v31, v8, t3
+; RV64-NEXT:    slli ra, t0, 49
+; RV64-NEXT:    slli t5, t0, 50
+; RV64-NEXT:    slli t4, t0, 51
+; RV64-NEXT:    slli t3, t0, 52
+; RV64-NEXT:    slli t2, t0, 53
+; RV64-NEXT:    slli t1, t0, 54
+; RV64-NEXT:    slli a7, t0, 55
+; RV64-NEXT:    slli a6, t0, 56
+; RV64-NEXT:    slli a5, t0, 57
+; RV64-NEXT:    slli a4, t0, 58
+; RV64-NEXT:    slli a3, t0, 59
+; RV64-NEXT:    slli a2, t0, 60
+; RV64-NEXT:    slli a1, t0, 61
+; RV64-NEXT:    slli t0, t0, 62
+; RV64-NEXT:    li a0, -1
+; RV64-NEXT:    slli a0, a0, 63
+; RV64-NEXT:    vand.vi v3, v8, 2
+; RV64-NEXT:    vand.vi v2, v8, 1
+; RV64-NEXT:    vand.vi v1, v8, 4
+; RV64-NEXT:    vand.vi v0, v8, 8
+; RV64-NEXT:    vmul.vv v3, v8, v3
+; RV64-NEXT:    sd t6, 8(sp) # 8-byte Folded Spill
+; RV64-NEXT:    csrr t6, vlenb
+; RV64-NEXT:    mv s0, t6
+; RV64-NEXT:    slli t6, t6, 1
+; RV64-NEXT:    add s0, s0, t6
+; RV64-NEXT:    slli t6, t6, 2
+; RV64-NEXT:    add s0, s0, t6
+; RV64-NEXT:    slli t6, t6, 1
+; RV64-NEXT:    add t6, t6, s0
+; RV64-NEXT:    add t6, sp, t6
+; RV64-NEXT:    addi t6, t6, 112
+; RV64-NEXT:    vs1r.v v3, (t6) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    vmul.vv v3, v8, v2
+; RV64-NEXT:    csrr t6, vlenb
+; RV64-NEXT:    mv s0, t6
+; RV64-NEXT:    slli t6, t6, 3
+; RV64-NEXT:    add s0, s0, t6
+; RV64-NEXT:    slli t6, t6, 1
+; RV64-NEXT:    add t6, t6, s0
+; RV64-NEXT:    add t6, sp, t6
+; RV64-NEXT:    addi t6, t6, 112
+; RV64-NEXT:    vs1r.v v3, (t6) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    vmul.vv v3, v8, v1
+; RV64-NEXT:    csrr t6, vlenb
+; RV64-NEXT:    slli t6, t6, 3
+; RV64-NEXT:    mv s0, t6
+; RV64-NEXT:    slli t6, t6, 1
+; RV64-NEXT:    add t6, t6, s0
+; RV64-NEXT:    add t6, sp, t6
+; RV64-NEXT:    addi t6, t6, 112
+; RV64-NEXT:    vs1r.v v3, (t6) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    vmul.vv v0, v8, v0
+; RV64-NEXT:    vmul.vv v7, v8, v7
+; RV64-NEXT:    csrr t6, vlenb
+; RV64-NEXT:    mv s0, t6
+; RV64-NEXT:    slli t6, t6, 1
+; RV64-NEXT:    add s0, s0, t6
+; RV64-NEXT:    slli t6, t6, 1
+; RV64-NEXT:    add s0, s0, t6
+; RV64-NEXT:    slli t6, t6, 2
+; RV64-NEXT:    add t6, t6, s0
+; RV64-NEXT:    add t6, sp, t6
+; RV64-NEXT:    addi t6, t6, 112
+; RV64-NEXT:    vs1r.v v7, (t6) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    vmul.vv v7, v8, v6
+; RV64-NEXT:    csrr t6, vlenb
+; RV64-NEXT:    slli t6, t6, 1
+; RV64-NEXT:    mv s0, t6
+; RV64-NEXT:    slli t6, t6, 1
+; RV64-NEXT:    add s0, s0, t6
+; RV64-NEXT:    slli t6, t6, 2
+; RV64-NEXT:    add t6, t6, s0
+; RV64-NEXT:    add t6, sp, t6
+; RV64-NEXT:    addi t6, t6, 112
+; RV64-NEXT:    vs1r.v v7, (t6) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    vmul.vv v7, v8, v5
+; RV64-NEXT:    csrr t6, vlenb
+; RV64-NEXT:    mv s0, t6
+; RV64-NEXT:    slli t6, t6, 2
+; RV64-NEXT:    add s0, s0, t6
+; RV64-NEXT:    slli t6, t6, 2
+; RV64-NEXT:    add t6, t6, s0
+; RV64-NEXT:    add t6, sp, t6
+; RV64-NEXT:    addi t6, t6, 112
+; RV64-NEXT:    vs1r.v v7, (t6) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    vmul.vv v7, v8, v4
+; RV64-NEXT:    csrr t6, vlenb
+; RV64-NEXT:    slli t6, t6, 2
+; RV64-NEXT:    mv s0, t6
+; RV64-NEXT:    slli t6, t6, 2
+; RV64-NEXT:    add t6, t6, s0
+; RV64-NEXT:    add t6, sp, t6
+; RV64-NEXT:    addi t6, t6, 112
+; RV64-NEXT:    vs1r.v v7, (t6) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    vmul.vv v13, v8, v13
+; RV64-NEXT:    csrr t6, vlenb
+; RV64-NEXT:    mv s0, t6
+; RV64-NEXT:    slli t6, t6, 1
+; RV64-NEXT:    add s0, s0, t6
+; RV64-NEXT:    slli t6, t6, 3
+; RV64-NEXT:    add t6, t6, s0
+; RV64-NEXT:    add t6, sp, t6
+; RV64-NEXT:    addi t6, t6, 112
+; RV64-NEXT:    vs1r.v v13, (t6) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    vmul.vv v13, v8, v14
+; RV64-NEXT:    csrr t6, vlenb
+; RV64-NEXT:    slli t6, t6, 1
+; RV64-NEXT:    mv s0, t6
+; RV64-NEXT:    slli t6, t6, 3
+; RV64-NEXT:    add t6, t6, s0
+; RV64-NEXT:    add t6, sp, t6
+; RV64-NEXT:    addi t6, t6, 112
+; RV64-NEXT:    vs1r.v v13, (t6) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    vmul.vv v13, v8, v15
+; RV64-NEXT:    csrr t6, vlenb
+; RV64-NEXT:    slli s0, t6, 4
+; RV64-NEXT:    add t6, s0, t6
+; RV64-NEXT:    add t6, sp, t6
+; RV64-NEXT:    addi t6, t6, 112
+; RV64-NEXT:    vs1r.v v13, (t6) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    vmul.vv v13, v8, v16
+; RV64-NEXT:    csrr t6, vlenb
+; RV64-NEXT:    slli t6, t6, 4
+; RV64-NEXT:    add t6, sp, t6
+; RV64-NEXT:    addi t6, t6, 112
+; RV64-NEXT:    vs1r.v v13, (t6) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    vmul.vv v13, v8, v17
+; RV64-NEXT:    csrr t6, vlenb
+; RV64-NEXT:    slli s0, t6, 4
+; RV64-NEXT:    sub t6, s0, t6
+; RV64-NEXT:    add t6, sp, t6
+; RV64-NEXT:    addi t6, t6, 112
+; RV64-NEXT:    vs1r.v v13, (t6) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    vmul.vv v13, v8, v18
+; RV64-NEXT:    csrr t6, vlenb
+; RV64-NEXT:    slli t6, t6, 1
+; RV64-NEXT:    mv s0, t6
+; RV64-NEXT:    slli t6, t6, 1
+; RV64-NEXT:    add s0, s0, t6
+; RV64-NEXT:    slli t6, t6, 1
+; RV64-NEXT:    add t6, t6, s0
+; RV64-NEXT:    add t6, sp, t6
+; RV64-NEXT:    addi t6, t6, 112
+; RV64-NEXT:    vs1r.v v13, (t6) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    vmul.vv v13, v8, v19
+; RV64-NEXT:    csrr t6, vlenb
+; RV64-NEXT:    mv s0, t6
+; RV64-NEXT:    slli t6, t6, 2
+; RV64-NEXT:    add s0, s0, t6
+; RV64-NEXT:    slli t6, t6, 1
+; RV64-NEXT:    add t6, t6, s0
+; RV64-NEXT:    add t6, sp, t6
+; RV64-NEXT:    addi t6, t6, 112
+; RV64-NEXT:    vs1r.v v13, (t6) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    vmul.vv v13, v8, v20
+; RV64-NEXT:    csrr t6, vlenb
+; RV64-NEXT:    slli t6, t6, 2
+; RV64-NEXT:    mv s0, t6
+; RV64-NEXT:    slli t6, t6, 1
+; RV64-NEXT:    add t6, t6, s0
+; RV64-NEXT:    add t6, sp, t6
+; RV64-NEXT:    addi t6, t6, 112
+; RV64-NEXT:    vs1r.v v13, (t6) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    vmul.vv v13, v8, v21
+; RV64-NEXT:    csrr t6, vlenb
+; RV64-NEXT:    mv s0, t6
+; RV64-NEXT:    slli t6, t6, 1
+; RV64-NEXT:    add s0, s0, t6
+; RV64-NEXT:    slli t6, t6, 2
+; RV64-NEXT:    add t6, t6, s0
+; RV64-NEXT:    add t6, sp, t6
+; RV64-NEXT:    addi t6, t6, 112
+; RV64-NEXT:    vs1r.v v13, (t6) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    vmul.vv v13, v8, v22
+; RV64-NEXT:    csrr t6, vlenb
+; RV64-NEXT:    slli t6, t6, 1
+; RV64-NEXT:    mv s0, t6
+; RV64-NEXT:    slli t6, t6, 2
+; RV64-NEXT:    add t6, t6, s0
+; RV64-NEXT:    add t6, sp, t6
+; RV64-NEXT:    addi t6, t6, 112
+; RV64-NEXT:    vs1r.v v13, (t6) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    vmul.vv v13, v8, v23
+; RV64-NEXT:    csrr t6, vlenb
+; RV64-NEXT:    slli s0, t6, 3
+; RV64-NEXT:    add t6, s0, t6
+; RV64-NEXT:    add t6, sp, t6
+; RV64-NEXT:    addi t6, t6, 112
+; RV64-NEXT:    vs1r.v v13, (t6) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    vmul.vv v13, v8, v24
+; RV64-NEXT:    csrr t6, vlenb
+; RV64-NEXT:    slli t6, t6, 3
+; RV64-NEXT:    add t6, sp, t6
+; RV64-NEXT:    addi t6, t6, 112
+; RV64-NEXT:    vs1r.v v13, (t6) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    vmul.vv v13, v8, v25
+; RV64-NEXT:    csrr t6, vlenb
+; RV64-NEXT:    slli s0, t6, 3
+; RV64-NEXT:    sub t6, s0, t6
+; RV64-NEXT:    add t6, sp, t6
+; RV64-NEXT:    addi t6, t6, 112
+; RV64-NEXT:    vs1r.v v13, (t6) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    vmul.vv v13, v8, v26
+; RV64-NEXT:    csrr t6, vlenb
+; RV64-NEXT:    slli t6, t6, 1
+; RV64-NEXT:    mv s0, t6
+; RV64-NEXT:    slli t6, t6, 1
+; RV64-NEXT:    add t6, t6, s0
+; RV64-NEXT:    add t6, sp, t6
+; RV64-NEXT:    addi t6, t6, 112
+; RV64-NEXT:    vs1r.v v13, (t6) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    vmul.vv v13, v8, v27
+; RV64-NEXT:    csrr t6, vlenb
+; RV64-NEXT:    slli s0, t6, 2
+; RV64-NEXT:    add t6, s0, t6
+; RV64-NEXT:    add t6, sp, t6
+; RV64-NEXT:    addi t6, t6, 112
+; RV64-NEXT:    vs1r.v v13, (t6) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    vmul.vv v13, v8, v28
+; RV64-NEXT:    csrr t6, vlenb
+; RV64-NEXT:    slli t6, t6, 2
+; RV64-NEXT:    add t6, sp, t6
+; RV64-NEXT:    addi t6, t6, 112
+; RV64-NEXT:    vs1r.v v13, (t6) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    vmul.vv v13, v8, v29
+; RV64-NEXT:    csrr t6, vlenb
+; RV64-NEXT:    slli s0, t6, 1
+; RV64-NEXT:    add t6, s0, t6
+; RV64-NEXT:    add t6, sp, t6
+; RV64-NEXT:    addi t6, t6, 112
+; RV64-NEXT:    vs1r.v v13, (t6) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    vmul.vv v13, v8, v30
+; RV64-NEXT:    csrr t6, vlenb
+; RV64-NEXT:    slli t6, t6, 1
+; RV64-NEXT:    add t6, sp, t6
+; RV64-NEXT:    addi t6, t6, 112
+; RV64-NEXT:    vs1r.v v13, (t6) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    vmul.vv v10, v8, v10
+; RV64-NEXT:    csrr t6, vlenb
+; RV64-NEXT:    slli t6, t6, 1
+; RV64-NEXT:    mv s0, t6
+; RV64-NEXT:    slli t6, t6, 4
+; RV64-NEXT:    add t6, t6, s0
+; RV64-NEXT:    add t6, sp, t6
+; RV64-NEXT:    addi t6, t6, 112
+; RV64-NEXT:    vs1r.v v10, (t6) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    vmul.vv v10, v8, v11
+; RV64-NEXT:    csrr t6, vlenb
+; RV64-NEXT:    slli s0, t6, 5
+; RV64-NEXT:    add t6, s0, t6
+; RV64-NEXT:    add t6, sp, t6
+; RV64-NEXT:    addi t6, t6, 112
+; RV64-NEXT:    vs1r.v v10, (t6) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    vmul.vv v10, v8, v12
+; RV64-NEXT:    csrr t6, vlenb
+; RV64-NEXT:    slli t6, t6, 5
+; RV64-NEXT:    add t6, sp, t6
+; RV64-NEXT:    addi t6, t6, 112
+; RV64-NEXT:    vs1r.v v10, (t6) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    vmul.vv v9, v8, v9
+; RV64-NEXT:    csrr t6, vlenb
+; RV64-NEXT:    slli s0, t6, 5
+; RV64-NEXT:    sub t6, s0, t6
+; RV64-NEXT:    add t6, sp, t6
+; RV64-NEXT:    addi t6, t6, 112
+; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    vmul.vv v9, v8, v31
+; RV64-NEXT:    csrr t6, vlenb
+; RV64-NEXT:    slli t6, t6, 1
+; RV64-NEXT:    mv s0, t6
+; RV64-NEXT:    slli t6, t6, 1
+; RV64-NEXT:    add s0, s0, t6
+; RV64-NEXT:    slli t6, t6, 1
+; RV64-NEXT:    add s0, s0, t6
+; RV64-NEXT:    slli t6, t6, 1
+; RV64-NEXT:    add t6, t6, s0
+; RV64-NEXT:    add t6, sp, t6
+; RV64-NEXT:    addi t6, t6, 112
+; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    ld s0, 96(sp) # 8-byte Folded Reload
+; RV64-NEXT:    vand.vx v9, v8, s0
+; RV64-NEXT:    vmul.vv v9, v8, v9
+; RV64-NEXT:    csrr t6, vlenb
+; RV64-NEXT:    mv s0, t6
+; RV64-NEXT:    slli t6, t6, 2
+; RV64-NEXT:    add s0, s0, t6
+; RV64-NEXT:    slli t6, t6, 1
+; RV64-NEXT:    add s0, s0, t6
+; RV64-NEXT:    slli t6, t6, 1
+; RV64-NEXT:    add t6, t6, s0
+; RV64-NEXT:    add t6, sp, t6
+; RV64-NEXT:    addi t6, t6, 112
+; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    ld s0, 88(sp) # 8-byte Folded Reload
+; RV64-NEXT:    vand.vx v9, v8, s0
+; RV64-NEXT:    vmul.vv v9, v8, v9
+; RV64-NEXT:    csrr t6, vlenb
+; RV64-NEXT:    slli t6, t6, 2
+; RV64-NEXT:    mv s0, t6
+; RV64-NEXT:    slli t6, t6, 1
+; RV64-NEXT:    add s0, s0, t6
+; RV64-NEXT:    slli t6, t6, 1
+; RV64-NEXT:    add t6, t6, s0
+; RV64-NEXT:    add t6, sp, t6
+; RV64-NEXT:    addi t6, t6, 112
+; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    ld s0, 80(sp) # 8-byte Folded Reload
+; RV64-NEXT:    vand.vx v9, v8, s0
+; RV64-NEXT:    vmul.vv v9, v8, v9
+; RV64-NEXT:    csrr s0, vlenb
+; RV64-NEXT:    slli s0, s0, 1
+; RV64-NEXT:    mv t6, s0
+; RV64-NEXT:    slli s0, s0, 2
+; RV64-NEXT:    add t6, t6, s0
+; RV64-NEXT:    slli s0, s0, 1
+; RV64-NEXT:    add s0, s0, t6
+; RV64-NEXT:    ld t6, 8(sp) # 8-byte Folded Reload
+; RV64-NEXT:    add s0, sp, s0
+; RV64-NEXT:    addi s0, s0, 112
+; RV64-NEXT:    vs1r.v v9, (s0) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    ld s0, 72(sp) # 8-byte Folded Reload
+; RV64-NEXT:    vand.vx v9, v8, s0
+; RV64-NEXT:    vmul.vv v9, v8, v9
+; RV64-NEXT:    csrr s0, vlenb
+; RV64-NEXT:    add s0, sp, s0
+; RV64-NEXT:    addi s0, s0, 112
+; RV64-NEXT:    vs1r.v v9, (s0) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    ld s0, 64(sp) # 8-byte Folded Reload
+; RV64-NEXT:    vand.vx v9, v8, s0
+; RV64-NEXT:    vmul.vv v9, v8, v9
+; RV64-NEXT:    addi s0, sp, 112
+; RV64-NEXT:    vs1r.v v9, (s0) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    ld s0, 56(sp) # 8-byte Folded Reload
+; RV64-NEXT:    vand.vx v9, v8, s0
+; RV64-NEXT:    vmul.vv v4, v8, v9
+; RV64-NEXT:    ld s0, 48(sp) # 8-byte Folded Reload
+; RV64-NEXT:    vand.vx v9, v8, s0
+; RV64-NEXT:    vmul.vv v5, v8, v9
+; RV64-NEXT:    ld s0, 40(sp) # 8-byte Folded Reload
+; RV64-NEXT:    vand.vx v9, v8, s0
+; RV64-NEXT:    vmul.vv v6, v8, v9
+; RV64-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64-NEXT:    vand.vx v9, v8, s0
+; RV64-NEXT:    vmul.vv v7, v8, v9
+; RV64-NEXT:    ld s0, 24(sp) # 8-byte Folded Reload
+; RV64-NEXT:    vand.vx v9, v8, s0
+; RV64-NEXT:    vmul.vv v31, v8, v9
+; RV64-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64-NEXT:    vand.vx v9, v8, s0
+; RV64-NEXT:    vmul.vv v30, v8, v9
+; RV64-NEXT:    vand.vx v9, v8, s5
+; RV64-NEXT:    vmul.vv v29, v8, v9
+; RV64-NEXT:    vand.vx v9, v8, s6
+; RV64-NEXT:    vmul.vv v28, v8, v9
+; RV64-NEXT:    vand.vx v9, v8, s7
+; RV64-NEXT:    vmul.vv v27, v8, v9
+; RV64-NEXT:    vand.vx v9, v8, s8
+; RV64-NEXT:    vmul.vv v26, v8, v9
+; RV64-NEXT:    vand.vx v9, v8, s9
+; RV64-NEXT:    vmul.vv v25, v8, v9
+; RV64-NEXT:    vand.vx v9, v8, s10
+; RV64-NEXT:    vmul.vv v23, v8, v9
+; RV64-NEXT:    vand.vx v9, v8, s11
+; RV64-NEXT:    vmul.vv v19, v8, v9
+; RV64-NEXT:    vand.vx v9, v8, ra
+; RV64-NEXT:    vmul.vv v14, v8, v9
+; RV64-NEXT:    vand.vx v9, v8, t5
+; RV64-NEXT:    vmul.vv v9, v8, v9
+; RV64-NEXT:    vand.vx v10, v8, t4
+; RV64-NEXT:    vmul.vv v24, v8, v10
+; RV64-NEXT:    vand.vx v10, v8, t3
+; RV64-NEXT:    vmul.vv v22, v8, v10
+; RV64-NEXT:    vand.vx v10, v8, t2
+; RV64-NEXT:    vmul.vv v20, v8, v10
+; RV64-NEXT:    vand.vx v10, v8, t1
+; RV64-NEXT:    vmul.vv v15, v8, v10
+; RV64-NEXT:    vand.vx v10, v8, a7
+; RV64-NEXT:    vmul.vv v10, v8, v10
+; RV64-NEXT:    vand.vx v11, v8, a6
+; RV64-NEXT:    vmul.vv v16, v8, v11
+; RV64-NEXT:    vand.vx v11, v8, a5
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vand.vx v12, v8, a4
+; RV64-NEXT:    vmul.vv v21, v8, v12
+; RV64-NEXT:    vand.vx v12, v8, a3
+; RV64-NEXT:    vmul.vv v17, v8, v12
+; RV64-NEXT:    vand.vx v12, v8, a2
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    vand.vx v13, v8, a1
+; RV64-NEXT:    vmul.vv v18, v8, v13
+; RV64-NEXT:    vand.vx v13, v8, t0
+; RV64-NEXT:    vmul.vv v13, v8, v13
+; RV64-NEXT:    vand.vx v2, v8, a0
+; RV64-NEXT:    vmul.vv v8, v8, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl1r.v v1, (a0) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v2, v1, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl1r.v v1, (a0) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v2, v2, v1
+; RV64-NEXT:    vxor.vv v2, v2, v0
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl1r.v v1, (a0) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v2, v2, v1
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl1r.v v1, (a0) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v2, v2, v1
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl1r.v v1, (a0) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v2, v2, v1
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl1r.v v1, (a0) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v2, v2, v1
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl1r.v v1, (a0) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v1, v2, v1
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl1r.v v0, (a0) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v1, v1, v0
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a1, a0, 4
+; RV64-NEXT:    add a0, a1, a0
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl1r.v v0, (a0) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v1, v1, v0
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl1r.v v0, (a0) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v1, v1, v0
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a1, a0, 4
+; RV64-NEXT:    sub a0, a1, a0
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl1r.v v0, (a0) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v1, v1, v0
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl1r.v v0, (a0) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v1, v1, v0
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl1r.v v0, (a0) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v1, v1, v0
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl1r.v v0, (a0) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v1, v1, v0
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl1r.v v0, (a0) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v1, v1, v0
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl1r.v v0, (a0) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v1, v1, v0
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a1, a0, 3
+; RV64-NEXT:    add a0, a1, a0
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl1r.v v0, (a0) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v1, v1, v0
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl1r.v v0, (a0) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v1, v1, v0
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a1, a0, 3
+; RV64-NEXT:    sub a0, a1, a0
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl1r.v v0, (a0) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v1, v1, v0
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl1r.v v0, (a0) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v1, v1, v0
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a1, a0, 2
+; RV64-NEXT:    add a0, a1, a0
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl1r.v v0, (a0) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v1, v1, v0
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl1r.v v0, (a0) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v1, v1, v0
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a1, a0, 1
+; RV64-NEXT:    add a0, a1, a0
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl1r.v v0, (a0) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v1, v0
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl1r.v v3, (a0) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v3
+; RV64-NEXT:    li a0, 56
+; RV64-NEXT:    vsll.vx v2, v2, a0
+; RV64-NEXT:    vand.vx v1, v1, s1
+; RV64-NEXT:    li a1, 40
+; RV64-NEXT:    vsll.vx v1, v1, a1
+; RV64-NEXT:    vor.vv v2, v2, v1
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    mv a3, a2
+; RV64-NEXT:    slli a2, a2, 4
+; RV64-NEXT:    add a2, a2, a3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 112
+; RV64-NEXT:    vl1r.v v1, (a2) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v1, v0, v1
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a3, a2, 5
+; RV64-NEXT:    add a2, a3, a2
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 112
+; RV64-NEXT:    vl1r.v v0, (a2) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v1, v1, v0
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 5
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 112
+; RV64-NEXT:    vl1r.v v0, (a2) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v1, v1, v0
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a3, a2, 5
+; RV64-NEXT:    sub a2, a3, a2
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 112
+; RV64-NEXT:    vl1r.v v0, (a2) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v1, v1, v0
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    mv a3, a2
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    add a3, a3, a2
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    add a3, a3, a2
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    add a2, a2, a3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 112
+; RV64-NEXT:    vl1r.v v0, (a2) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v1, v1, v0
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    mv a3, a2
+; RV64-NEXT:    slli a2, a2, 2
+; RV64-NEXT:    add a3, a3, a2
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    add a3, a3, a2
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    add a2, a2, a3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 112
+; RV64-NEXT:    vl1r.v v0, (a2) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v1, v1, v0
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 2
+; RV64-NEXT:    mv a3, a2
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    add a3, a3, a2
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    add a2, a2, a3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 112
+; RV64-NEXT:    vl1r.v v0, (a2) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v1, v1, v0
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    mv a3, a2
+; RV64-NEXT:    slli a2, a2, 2
+; RV64-NEXT:    add a3, a3, a2
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    add a2, a2, a3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 112
+; RV64-NEXT:    vl1r.v v0, (a2) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v1, v1, v0
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 112
+; RV64-NEXT:    vl1r.v v3, (a2) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v1, v1, v3
+; RV64-NEXT:    addi a2, sp, 112
+; RV64-NEXT:    vl1r.v v3, (a2) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v3, v1, v3
+; RV64-NEXT:    vxor.vv v4, v3, v4
+; RV64-NEXT:    vxor.vv v5, v4, v5
+; RV64-NEXT:    vxor.vv v6, v5, v6
+; RV64-NEXT:    vxor.vv v7, v6, v7
+; RV64-NEXT:    vxor.vv v31, v7, v31
+; RV64-NEXT:    vxor.vv v30, v31, v30
+; RV64-NEXT:    vxor.vv v29, v30, v29
+; RV64-NEXT:    vxor.vv v28, v29, v28
+; RV64-NEXT:    vxor.vv v27, v28, v27
+; RV64-NEXT:    vxor.vv v26, v27, v26
+; RV64-NEXT:    vxor.vv v25, v26, v25
+; RV64-NEXT:    vxor.vv v23, v25, v23
+; RV64-NEXT:    vxor.vv v19, v23, v19
+; RV64-NEXT:    vxor.vv v14, v19, v14
+; RV64-NEXT:    vxor.vv v9, v14, v9
+; RV64-NEXT:    vsrl.vi v14, v7, 8
+; RV64-NEXT:    vand.vx v14, v14, t6
+; RV64-NEXT:    vsrl.vi v19, v23, 24
+; RV64-NEXT:    lui a2, 4080
+; RV64-NEXT:    vand.vx v19, v19, a2
+; RV64-NEXT:    vor.vv v14, v14, v19
+; RV64-NEXT:    vxor.vv v9, v9, v24
+; RV64-NEXT:    vxor.vv v9, v9, v22
+; RV64-NEXT:    vxor.vv v9, v9, v20
+; RV64-NEXT:    vxor.vv v9, v9, v15
+; RV64-NEXT:    vxor.vv v9, v9, v10
+; RV64-NEXT:    vand.vx v10, v7, a2
+; RV64-NEXT:    vsll.vi v10, v10, 24
+; RV64-NEXT:    vxor.vv v15, v9, v16
+; RV64-NEXT:    vxor.vv v11, v15, v11
+; RV64-NEXT:    vand.vx v15, v9, t6
+; RV64-NEXT:    vsll.vi v15, v15, 8
+; RV64-NEXT:    vor.vv v10, v10, v15
+; RV64-NEXT:    vxor.vv v11, v11, v21
+; RV64-NEXT:    vor.vv v10, v2, v10
+; RV64-NEXT:    vxor.vv v11, v11, v17
+; RV64-NEXT:    vxor.vv v11, v11, v12
+; RV64-NEXT:    vsrl.vx v9, v9, a1
+; RV64-NEXT:    vand.vx v9, v9, s1
+; RV64-NEXT:    vxor.vv v11, v11, v18
+; RV64-NEXT:    vxor.vv v11, v11, v13
+; RV64-NEXT:    vxor.vv v8, v11, v8
+; RV64-NEXT:    vsrl.vx v8, v8, a0
+; RV64-NEXT:    vor.vv v8, v9, v8
+; RV64-NEXT:    vor.vv v8, v14, v8
+; RV64-NEXT:    vor.vv v8, v10, v8
+; RV64-NEXT:    vsrl.vi v9, v8, 4
+; RV64-NEXT:    vand.vx v8, v8, s4
+; RV64-NEXT:    vand.vx v9, v9, s4
+; RV64-NEXT:    vsll.vi v8, v8, 4
+; RV64-NEXT:    vor.vv v8, v9, v8
+; RV64-NEXT:    vsrl.vi v9, v8, 2
+; RV64-NEXT:    vand.vx v8, v8, s2
+; RV64-NEXT:    vand.vx v9, v9, s2
+; RV64-NEXT:    vsll.vi v8, v8, 2
+; RV64-NEXT:    vor.vv v8, v9, v8
+; RV64-NEXT:    vsrl.vi v9, v8, 1
+; RV64-NEXT:    vand.vx v8, v8, s3
+; RV64-NEXT:    vand.vx v9, v9, s3
+; RV64-NEXT:    vadd.vv v8, v8, v8
+; RV64-NEXT:    vor.vv v8, v9, v8
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add sp, sp, a0
+; RV64-NEXT:    ld ra, 216(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s0, 208(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s1, 200(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s2, 192(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s3, 184(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s4, 176(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s5, 168(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s6, 160(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s7, 152(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s8, 144(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s9, 136(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s10, 128(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s11, 120(sp) # 8-byte Folded Reload
+; RV64-NEXT:    addi sp, sp, 224
+; RV64-NEXT:    ret
+  %a = call <vscale x 1 x i64> @llvm.clmulr.nxv1i64(<vscale x 1 x i64> %x, <vscale x 1 x i64> %y)
+  ret <vscale x 1 x i64> %a
+}
+
+define <vscale x 2 x i64> @clmulr_nxv2i64(<vscale x 2 x i64> %x, <vscale x 2 x i64> %y) nounwind {
+; RV32-LABEL: clmulr_nxv2i64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -352
+; RV32-NEXT:    sw ra, 348(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s0, 344(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s1, 340(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s2, 336(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s3, 332(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s4, 328(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s5, 324(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s6, 320(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s7, 316(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s8, 312(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s9, 308(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s10, 304(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s11, 300(sp) # 4-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    sub sp, sp, a0
+; RV32-NEXT:    lui s7, 1044480
+; RV32-NEXT:    lui a7, 524288
+; RV32-NEXT:    li a1, 1
+; RV32-NEXT:    li s8, 2
+; RV32-NEXT:    li s9, 4
+; RV32-NEXT:    li s10, 8
+; RV32-NEXT:    li a3, 16
+; RV32-NEXT:    li a4, 32
+; RV32-NEXT:    li a5, 64
+; RV32-NEXT:    li a6, 128
+; RV32-NEXT:    li s11, 256
+; RV32-NEXT:    li ra, 512
+; RV32-NEXT:    li a0, 1024
+; RV32-NEXT:    lui a2, 1
+; RV32-NEXT:    lui t0, 2
+; RV32-NEXT:    lui t1, 4
+; RV32-NEXT:    lui t2, 8
+; RV32-NEXT:    lui t3, 16
+; RV32-NEXT:    lui t4, 32
+; RV32-NEXT:    lui t5, 64
+; RV32-NEXT:    lui t6, 128
+; RV32-NEXT:    lui s0, 256
+; RV32-NEXT:    lui s1, 512
+; RV32-NEXT:    lui s2, 1024
+; RV32-NEXT:    lui s3, 2048
+; RV32-NEXT:    lui s4, 4096
+; RV32-NEXT:    lui s5, 8192
+; RV32-NEXT:    lui s6, 16384
+; RV32-NEXT:    sw s7, 272(sp)
+; RV32-NEXT:    lui s7, 32768
+; RV32-NEXT:    sw zero, 276(sp)
+; RV32-NEXT:    sw a7, 264(sp)
+; RV32-NEXT:    sw zero, 268(sp)
+; RV32-NEXT:    sw zero, 256(sp)
+; RV32-NEXT:    sw a1, 260(sp)
+; RV32-NEXT:    sw zero, 248(sp)
+; RV32-NEXT:    sw s8, 252(sp)
+; RV32-NEXT:    lui s8, 65536
+; RV32-NEXT:    sw zero, 240(sp)
+; RV32-NEXT:    sw s9, 244(sp)
+; RV32-NEXT:    lui s9, 131072
+; RV32-NEXT:    sw zero, 232(sp)
+; RV32-NEXT:    sw s10, 236(sp)
+; RV32-NEXT:    lui s10, 262144
+; RV32-NEXT:    sw zero, 224(sp)
+; RV32-NEXT:    sw a3, 228(sp)
+; RV32-NEXT:    sw zero, 216(sp)
+; RV32-NEXT:    sw a4, 220(sp)
+; RV32-NEXT:    sw zero, 208(sp)
+; RV32-NEXT:    sw a5, 212(sp)
+; RV32-NEXT:    sw zero, 200(sp)
+; RV32-NEXT:    sw a6, 204(sp)
+; RV32-NEXT:    sw zero, 192(sp)
+; RV32-NEXT:    sw s11, 196(sp)
+; RV32-NEXT:    sw zero, 184(sp)
+; RV32-NEXT:    sw ra, 188(sp)
+; RV32-NEXT:    sw zero, 176(sp)
+; RV32-NEXT:    sw a0, 180(sp)
+; RV32-NEXT:    slli a5, a1, 11
+; RV32-NEXT:    sw zero, 168(sp)
+; RV32-NEXT:    sw a5, 172(sp)
+; RV32-NEXT:    sw zero, 160(sp)
+; RV32-NEXT:    sw a2, 164(sp)
+; RV32-NEXT:    sw zero, 152(sp)
+; RV32-NEXT:    sw t0, 156(sp)
+; RV32-NEXT:    sw zero, 144(sp)
+; RV32-NEXT:    sw t1, 148(sp)
+; RV32-NEXT:    sw zero, 136(sp)
+; RV32-NEXT:    sw t2, 140(sp)
+; RV32-NEXT:    sw zero, 128(sp)
+; RV32-NEXT:    sw t3, 132(sp)
+; RV32-NEXT:    sw zero, 120(sp)
+; RV32-NEXT:    sw t4, 124(sp)
+; RV32-NEXT:    sw zero, 112(sp)
+; RV32-NEXT:    sw t5, 116(sp)
+; RV32-NEXT:    sw zero, 104(sp)
+; RV32-NEXT:    sw t6, 108(sp)
+; RV32-NEXT:    sw zero, 96(sp)
+; RV32-NEXT:    sw s0, 100(sp)
+; RV32-NEXT:    sw zero, 88(sp)
+; RV32-NEXT:    sw s1, 92(sp)
+; RV32-NEXT:    sw zero, 80(sp)
+; RV32-NEXT:    sw s2, 84(sp)
+; RV32-NEXT:    sw zero, 72(sp)
+; RV32-NEXT:    sw s3, 76(sp)
+; RV32-NEXT:    sw zero, 64(sp)
+; RV32-NEXT:    sw s4, 68(sp)
+; RV32-NEXT:    sw zero, 56(sp)
+; RV32-NEXT:    sw s5, 60(sp)
+; RV32-NEXT:    sw zero, 48(sp)
+; RV32-NEXT:    sw s6, 52(sp)
+; RV32-NEXT:    sw zero, 40(sp)
+; RV32-NEXT:    sw s7, 44(sp)
+; RV32-NEXT:    sw zero, 32(sp)
+; RV32-NEXT:    sw s8, 36(sp)
+; RV32-NEXT:    sw zero, 24(sp)
+; RV32-NEXT:    sw s9, 28(sp)
+; RV32-NEXT:    sw zero, 16(sp)
+; RV32-NEXT:    sw s10, 20(sp)
+; RV32-NEXT:    sw zero, 8(sp)
+; RV32-NEXT:    sw a7, 12(sp)
+; RV32-NEXT:    lui a0, 61681
+; RV32-NEXT:    addi a0, a0, -241
+; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT:    vmv.v.x v4, a0
+; RV32-NEXT:    lui a0, 209715
+; RV32-NEXT:    addi a0, a0, 819
+; RV32-NEXT:    vmv.v.x v2, a0
+; RV32-NEXT:    lui a0, 349525
+; RV32-NEXT:    addi a0, a0, 1365
+; RV32-NEXT:    vmv.v.x v0, a0
+; RV32-NEXT:    addi a0, sp, 272
+; RV32-NEXT:    vsetvli a2, zero, e64, m2, ta, ma
+; RV32-NEXT:    vlse64.v v6, (a0), zero
+; RV32-NEXT:    addi a0, sp, 264
+; RV32-NEXT:    vlse64.v v10, (a0), zero
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    addi a0, sp, 256
+; RV32-NEXT:    vlse64.v v12, (a0), zero
+; RV32-NEXT:    addi a0, sp, 248
+; RV32-NEXT:    vlse64.v v14, (a0), zero
+; RV32-NEXT:    addi a0, sp, 240
+; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    addi a0, sp, 232
+; RV32-NEXT:    vlse64.v v18, (a0), zero
+; RV32-NEXT:    addi a0, sp, 224
+; RV32-NEXT:    vlse64.v v20, (a0), zero
+; RV32-NEXT:    addi a0, sp, 216
+; RV32-NEXT:    vlse64.v v22, (a0), zero
+; RV32-NEXT:    li ra, 56
+; RV32-NEXT:    vsrl.vi v24, v8, 24
+; RV32-NEXT:    vsrl.vx v26, v8, ra
+; RV32-NEXT:    li s11, 40
+; RV32-NEXT:    vsrl.vx v28, v8, s11
+; RV32-NEXT:    vsll.vx v30, v8, ra
+; RV32-NEXT:    addi a4, t3, -256
+; RV32-NEXT:    vand.vx v28, v28, a4
+; RV32-NEXT:    vor.vv v26, v28, v26
+; RV32-NEXT:    vand.vx v28, v8, a4
+; RV32-NEXT:    vsll.vx v28, v28, s11
+; RV32-NEXT:    vor.vv v30, v30, v28
+; RV32-NEXT:    vsrl.vi v28, v8, 8
+; RV32-NEXT:    lui a6, 4080
+; RV32-NEXT:    vand.vx v24, v24, a6
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v6, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vv v28, v28, v6
+; RV32-NEXT:    vor.vv v28, v28, v24
+; RV32-NEXT:    addi a3, sp, 208
+; RV32-NEXT:    vlse64.v v24, (a3), zero
+; RV32-NEXT:    vor.vv v10, v28, v26
+; RV32-NEXT:    vand.vx v26, v8, a6
+; RV32-NEXT:    vsll.vi v26, v26, 24
+; RV32-NEXT:    vand.vv v8, v8, v6
+; RV32-NEXT:    vsll.vi v8, v8, 8
+; RV32-NEXT:    vor.vv v8, v26, v8
+; RV32-NEXT:    addi a3, sp, 200
+; RV32-NEXT:    vlse64.v v28, (a3), zero
+; RV32-NEXT:    vor.vv v8, v30, v8
+; RV32-NEXT:    addi a3, sp, 192
+; RV32-NEXT:    vlse64.v v26, (a3), zero
+; RV32-NEXT:    vor.vv v8, v8, v10
+; RV32-NEXT:    vsrl.vi v30, v8, 4
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v4, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vv v8, v8, v4
+; RV32-NEXT:    vand.vv v30, v30, v4
+; RV32-NEXT:    vsll.vi v8, v8, 4
+; RV32-NEXT:    vor.vv v8, v30, v8
+; RV32-NEXT:    vsrl.vi v30, v8, 2
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v2, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vv v8, v8, v2
+; RV32-NEXT:    vand.vv v30, v30, v2
+; RV32-NEXT:    vsll.vi v8, v8, 2
+; RV32-NEXT:    vor.vv v8, v30, v8
+; RV32-NEXT:    vsrl.vi v30, v8, 1
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v0, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vv v8, v8, v0
+; RV32-NEXT:    vand.vv v30, v30, v0
+; RV32-NEXT:    vadd.vv v8, v8, v8
+; RV32-NEXT:    vor.vv v8, v30, v8
+; RV32-NEXT:    addi a3, sp, 184
+; RV32-NEXT:    vlse64.v v30, (a3), zero
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vand.vv v6, v8, v10
+; RV32-NEXT:    vand.vv v4, v8, v12
+; RV32-NEXT:    vand.vv v2, v8, v14
+; RV32-NEXT:    vand.vv v0, v8, v16
+; RV32-NEXT:    vand.vv v10, v8, v18
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vv v10, v8, v20
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vv v10, v8, v22
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vv v10, v8, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vv v28, v8, v28
+; RV32-NEXT:    addi a3, sp, 176
+; RV32-NEXT:    addi a0, sp, 168
+; RV32-NEXT:    vlse64.v v10, (a3), zero
+; RV32-NEXT:    vlse64.v v12, (a0), zero
+; RV32-NEXT:    vand.vv v14, v8, v26
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v14, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vv v14, v8, v30
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v14, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vv v10, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    addi a2, sp, 160
+; RV32-NEXT:    addi a3, sp, 152
+; RV32-NEXT:    addi a1, sp, 144
+; RV32-NEXT:    addi a0, sp, 136
+; RV32-NEXT:    vlse64.v v10, (a2), zero
+; RV32-NEXT:    vlse64.v v12, (a3), zero
+; RV32-NEXT:    vlse64.v v14, (a1), zero
+; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vand.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vv v10, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vv v10, v8, v14
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vv v10, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    addi a0, sp, 128
+; RV32-NEXT:    addi a1, sp, 120
+; RV32-NEXT:    addi a2, sp, 112
+; RV32-NEXT:    addi a3, sp, 104
+; RV32-NEXT:    vlse64.v v10, (a0), zero
+; RV32-NEXT:    vlse64.v v12, (a1), zero
+; RV32-NEXT:    vlse64.v v14, (a2), zero
+; RV32-NEXT:    vlse64.v v16, (a3), zero
+; RV32-NEXT:    vand.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vv v10, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vv v10, v8, v14
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vv v10, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    addi a0, sp, 96
+; RV32-NEXT:    addi a1, sp, 88
+; RV32-NEXT:    addi a2, sp, 80
+; RV32-NEXT:    addi a3, sp, 72
+; RV32-NEXT:    vlse64.v v10, (a0), zero
+; RV32-NEXT:    vlse64.v v12, (a1), zero
+; RV32-NEXT:    vlse64.v v14, (a2), zero
+; RV32-NEXT:    vlse64.v v16, (a3), zero
+; RV32-NEXT:    vand.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vv v10, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vv v10, v8, v14
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vv v10, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    addi a0, sp, 64
+; RV32-NEXT:    addi a1, sp, 56
+; RV32-NEXT:    addi a2, sp, 48
+; RV32-NEXT:    addi a3, sp, 40
+; RV32-NEXT:    vlse64.v v10, (a0), zero
+; RV32-NEXT:    vlse64.v v12, (a1), zero
+; RV32-NEXT:    vlse64.v v14, (a2), zero
+; RV32-NEXT:    vlse64.v v16, (a3), zero
+; RV32-NEXT:    vand.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vv v10, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vv v10, v8, v14
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vv v10, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    addi a0, sp, 32
+; RV32-NEXT:    addi a1, sp, 24
+; RV32-NEXT:    addi a2, sp, 16
+; RV32-NEXT:    addi a3, sp, 8
+; RV32-NEXT:    vlse64.v v10, (a0), zero
+; RV32-NEXT:    vlse64.v v12, (a1), zero
+; RV32-NEXT:    vlse64.v v14, (a2), zero
+; RV32-NEXT:    vlse64.v v16, (a3), zero
+; RV32-NEXT:    vand.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vv v10, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 6
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vv v10, v8, v14
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vv v10, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vi v10, v8, 2
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vi v10, v8, 1
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vi v10, v8, 4
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vi v10, v8, 8
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    li a0, 16
+; RV32-NEXT:    vand.vx v10, v8, a0
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    li a0, 32
+; RV32-NEXT:    vand.vx v10, v8, a0
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    li a0, 64
+; RV32-NEXT:    vand.vx v10, v8, a0
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    li a0, 128
+; RV32-NEXT:    vand.vx v10, v8, a0
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    li a0, 256
+; RV32-NEXT:    vand.vx v10, v8, a0
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    li a0, 512
+; RV32-NEXT:    vand.vx v10, v8, a0
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    li a0, 1024
+; RV32-NEXT:    vand.vx v10, v8, a0
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vx v10, v8, a5
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    lui a0, 1
+; RV32-NEXT:    vand.vx v10, v8, a0
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vx v10, v8, t0
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vx v10, v8, t1
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vx v10, v8, t2
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vx v10, v8, t3
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vx v10, v8, t4
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vx v10, v8, t5
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vx v10, v8, t6
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vx v10, v8, s0
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vx v10, v8, s1
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vx v10, v8, s2
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vx v10, v8, s3
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vx v10, v8, s4
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vx v10, v8, s5
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vx v10, v8, s6
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vx v10, v8, s7
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vx v10, v8, s8
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vx v10, v8, s9
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vx v10, v8, s10
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    addi a0, sp, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vmul.vv v12, v8, v6
+; RV32-NEXT:    vmul.vv v14, v8, v4
+; RV32-NEXT:    vmul.vv v16, v8, v2
+; RV32-NEXT:    vmul.vv v18, v8, v0
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v20, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v20, v8, v20
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v22, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v22, v8, v22
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v24, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v24, v8, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v26, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v26, v8, v26
+; RV32-NEXT:    vmul.vv v28, v8, v28
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v30, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v30, v8, v30
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v6, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v6, v8, v6
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v4, v8, v4
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v2, v8, v2
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v0, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v0, v8, v0
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 6
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v8, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v8, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v8, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vi v8, v8, 0
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    addi a0, sp, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    vxor.vv v8, v8, v14
+; RV32-NEXT:    vxor.vv v8, v8, v16
+; RV32-NEXT:    vxor.vv v8, v8, v18
+; RV32-NEXT:    vxor.vv v8, v8, v20
+; RV32-NEXT:    vxor.vv v8, v8, v22
+; RV32-NEXT:    vxor.vv v8, v8, v24
+; RV32-NEXT:    vxor.vv v8, v8, v26
+; RV32-NEXT:    vxor.vv v8, v8, v28
+; RV32-NEXT:    vxor.vv v8, v8, v30
+; RV32-NEXT:    vxor.vv v8, v8, v6
+; RV32-NEXT:    vxor.vv v8, v8, v4
+; RV32-NEXT:    vxor.vv v8, v8, v2
+; RV32-NEXT:    vxor.vv v8, v8, v0
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    vsrl.vx v10, v8, ra
+; RV32-NEXT:    vsll.vx v12, v8, ra
+; RV32-NEXT:    vsrl.vx v14, v8, s11
+; RV32-NEXT:    vand.vx v16, v8, a4
+; RV32-NEXT:    vand.vx v14, v14, a4
+; RV32-NEXT:    vsrl.vi v18, v8, 24
+; RV32-NEXT:    vand.vx v20, v8, a6
+; RV32-NEXT:    vand.vx v18, v18, a6
+; RV32-NEXT:    vsll.vx v16, v16, s11
+; RV32-NEXT:    vsrl.vi v22, v8, 8
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v24, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vand.vv v8, v8, v24
+; RV32-NEXT:    vand.vv v22, v22, v24
+; RV32-NEXT:    vor.vv v10, v14, v10
+; RV32-NEXT:    vor.vv v14, v22, v18
+; RV32-NEXT:    vsll.vi v8, v8, 8
+; RV32-NEXT:    vsll.vi v18, v20, 24
+; RV32-NEXT:    vor.vv v8, v18, v8
+; RV32-NEXT:    vor.vv v12, v12, v16
+; RV32-NEXT:    vor.vv v10, v14, v10
+; RV32-NEXT:    vor.vv v8, v12, v8
+; RV32-NEXT:    vor.vv v8, v8, v10
+; RV32-NEXT:    vsrl.vi v10, v8, 4
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v12, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vand.vv v8, v8, v12
+; RV32-NEXT:    vand.vv v10, v10, v12
+; RV32-NEXT:    vsll.vi v8, v8, 4
+; RV32-NEXT:    vor.vv v8, v10, v8
+; RV32-NEXT:    vsrl.vi v10, v8, 2
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v12, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vand.vv v8, v8, v12
+; RV32-NEXT:    vand.vv v10, v10, v12
+; RV32-NEXT:    vsll.vi v8, v8, 2
+; RV32-NEXT:    vor.vv v8, v10, v8
+; RV32-NEXT:    vsrl.vi v10, v8, 1
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v12, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vand.vv v8, v8, v12
+; RV32-NEXT:    vand.vv v10, v10, v12
+; RV32-NEXT:    vadd.vv v8, v8, v8
+; RV32-NEXT:    vor.vv v8, v10, v8
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add sp, sp, a0
+; RV32-NEXT:    lw ra, 348(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s0, 344(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s1, 340(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s2, 336(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s3, 332(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s4, 328(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s5, 324(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s6, 320(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s7, 316(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s8, 312(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s9, 308(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s10, 304(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s11, 300(sp) # 4-byte Folded Reload
+; RV32-NEXT:    addi sp, sp, 352
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: clmulr_nxv2i64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    addi sp, sp, -224
+; RV64-NEXT:    sd ra, 216(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s0, 208(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s1, 200(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s2, 192(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s3, 184(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s4, 176(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s5, 168(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s6, 160(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s7, 152(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s8, 144(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s9, 136(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s10, 128(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s11, 120(sp) # 8-byte Folded Spill
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    sub sp, sp, a0
+; RV64-NEXT:    li s3, 40
+; RV64-NEXT:    lui s1, 16
+; RV64-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
+; RV64-NEXT:    vsrl.vi v14, v8, 24
+; RV64-NEXT:    vsrl.vi v10, v8, 8
+; RV64-NEXT:    li t4, 255
+; RV64-NEXT:    lui a5, 61681
+; RV64-NEXT:    lui a6, 209715
+; RV64-NEXT:    lui t6, 349525
+; RV64-NEXT:    li t5, 16
+; RV64-NEXT:    li t3, 32
+; RV64-NEXT:    li t2, 64
+; RV64-NEXT:    li t0, 128
+; RV64-NEXT:    li t1, 256
+; RV64-NEXT:    li a4, 512
+; RV64-NEXT:    li a3, 1024
+; RV64-NEXT:    li s0, 1
+; RV64-NEXT:    lui a2, 1
+; RV64-NEXT:    lui a1, 2
+; RV64-NEXT:    lui a0, 4
+; RV64-NEXT:    li a7, 56
+; RV64-NEXT:    vsrl.vx v12, v8, a7
+; RV64-NEXT:    vsrl.vx v18, v8, s3
+; RV64-NEXT:    addi s2, s1, -256
+; RV64-NEXT:    lui s1, 4080
+; RV64-NEXT:    vand.vx v16, v14, s1
+; RV64-NEXT:    slli t4, t4, 24
+; RV64-NEXT:    vand.vx v20, v8, s1
+; RV64-NEXT:    vsll.vx v14, v8, a7
+; RV64-NEXT:    addi a7, a5, -241
+; RV64-NEXT:    addi a6, a6, 819
+; RV64-NEXT:    addi a5, t6, 1365
+; RV64-NEXT:    slli t6, s0, 11
+; RV64-NEXT:    slli s1, s0, 31
+; RV64-NEXT:    sd s1, 96(sp) # 8-byte Folded Spill
+; RV64-NEXT:    slli s1, s0, 32
+; RV64-NEXT:    sd s1, 88(sp) # 8-byte Folded Spill
+; RV64-NEXT:    slli s1, s0, 33
+; RV64-NEXT:    sd s1, 80(sp) # 8-byte Folded Spill
+; RV64-NEXT:    slli s1, s0, 34
+; RV64-NEXT:    sd s1, 72(sp) # 8-byte Folded Spill
+; RV64-NEXT:    slli s1, s0, 35
+; RV64-NEXT:    sd s1, 64(sp) # 8-byte Folded Spill
+; RV64-NEXT:    slli s1, s0, 36
+; RV64-NEXT:    sd s1, 56(sp) # 8-byte Folded Spill
+; RV64-NEXT:    slli s1, a7, 32
+; RV64-NEXT:    add a7, a7, s1
+; RV64-NEXT:    slli s1, a6, 32
+; RV64-NEXT:    add a6, a6, s1
+; RV64-NEXT:    slli s1, a5, 32
+; RV64-NEXT:    add a5, a5, s1
+; RV64-NEXT:    slli s1, s0, 37
+; RV64-NEXT:    sd s1, 48(sp) # 8-byte Folded Spill
+; RV64-NEXT:    vand.vx v18, v18, s2
+; RV64-NEXT:    vand.vx v10, v10, t4
+; RV64-NEXT:    vsll.vi v20, v20, 24
+; RV64-NEXT:    vand.vx v22, v8, t4
+; RV64-NEXT:    vand.vx v8, v8, s2
+; RV64-NEXT:    vor.vv v12, v18, v12
+; RV64-NEXT:    vor.vv v10, v10, v16
+; RV64-NEXT:    vsll.vi v16, v22, 8
+; RV64-NEXT:    vsll.vx v8, v8, s3
+; RV64-NEXT:    vor.vv v10, v10, v12
+; RV64-NEXT:    vor.vv v12, v20, v16
+; RV64-NEXT:    vor.vv v8, v14, v8
+; RV64-NEXT:    vor.vv v8, v8, v12
+; RV64-NEXT:    vor.vv v8, v8, v10
+; RV64-NEXT:    vsrl.vi v10, v8, 4
+; RV64-NEXT:    vand.vx v8, v8, a7
+; RV64-NEXT:    vand.vx v10, v10, a7
+; RV64-NEXT:    vsll.vi v8, v8, 4
+; RV64-NEXT:    vor.vv v8, v10, v8
+; RV64-NEXT:    vsrl.vi v10, v8, 2
+; RV64-NEXT:    vand.vx v8, v8, a6
+; RV64-NEXT:    vand.vx v10, v10, a6
+; RV64-NEXT:    vsll.vi v8, v8, 2
+; RV64-NEXT:    vor.vv v8, v10, v8
+; RV64-NEXT:    vsrl.vi v10, v8, 1
+; RV64-NEXT:    vand.vx v8, v8, a5
+; RV64-NEXT:    vand.vx v10, v10, a5
+; RV64-NEXT:    vadd.vv v8, v8, v8
+; RV64-NEXT:    vor.vv v8, v10, v8
+; RV64-NEXT:    vand.vx v10, v8, t5
+; RV64-NEXT:    slli t5, s0, 38
+; RV64-NEXT:    sd t5, 40(sp) # 8-byte Folded Spill
+; RV64-NEXT:    vand.vx v12, v8, t3
+; RV64-NEXT:    slli t3, s0, 39
+; RV64-NEXT:    sd t3, 32(sp) # 8-byte Folded Spill
+; RV64-NEXT:    vand.vx v14, v8, t2
+; RV64-NEXT:    slli t2, s0, 40
+; RV64-NEXT:    sd t2, 24(sp) # 8-byte Folded Spill
+; RV64-NEXT:    vand.vx v24, v8, t0
+; RV64-NEXT:    slli t0, s0, 41
+; RV64-NEXT:    sd t0, 16(sp) # 8-byte Folded Spill
+; RV64-NEXT:    vand.vx v16, v8, t1
+; RV64-NEXT:    slli s6, s0, 42
+; RV64-NEXT:    vand.vx v18, v8, a4
+; RV64-NEXT:    slli s7, s0, 43
+; RV64-NEXT:    vand.vx v20, v8, a3
+; RV64-NEXT:    slli s8, s0, 44
+; RV64-NEXT:    vand.vx v22, v8, t6
+; RV64-NEXT:    slli s9, s0, 45
+; RV64-NEXT:    vand.vx v26, v8, a2
+; RV64-NEXT:    slli s10, s0, 46
+; RV64-NEXT:    vand.vx v28, v8, a1
+; RV64-NEXT:    slli s11, s0, 47
+; RV64-NEXT:    vand.vx v30, v8, a0
+; RV64-NEXT:    slli ra, s0, 48
+; RV64-NEXT:    slli s4, s0, 49
+; RV64-NEXT:    slli s3, s0, 50
+; RV64-NEXT:    slli s1, s0, 51
+; RV64-NEXT:    slli t6, s0, 52
+; RV64-NEXT:    slli t5, s0, 53
+; RV64-NEXT:    slli t3, s0, 54
+; RV64-NEXT:    slli t2, s0, 55
+; RV64-NEXT:    slli t1, s0, 56
+; RV64-NEXT:    slli t0, s0, 57
+; RV64-NEXT:    slli a4, s0, 58
+; RV64-NEXT:    slli a3, s0, 59
+; RV64-NEXT:    slli a2, s0, 60
+; RV64-NEXT:    slli a1, s0, 61
+; RV64-NEXT:    slli s0, s0, 62
+; RV64-NEXT:    li a0, -1
+; RV64-NEXT:    slli a0, a0, 63
+; RV64-NEXT:    vand.vi v6, v8, 2
+; RV64-NEXT:    vand.vi v4, v8, 1
+; RV64-NEXT:    vand.vi v2, v8, 4
+; RV64-NEXT:    vand.vi v0, v8, 8
+; RV64-NEXT:    vmul.vv v6, v8, v6
+; RV64-NEXT:    sd a5, 8(sp) # 8-byte Folded Spill
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 3
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 112
+; RV64-NEXT:    vs2r.v v6, (a5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    vmul.vv v6, v8, v4
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 4
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 112
+; RV64-NEXT:    vs2r.v v6, (a5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    vmul.vv v6, v8, v2
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 5
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 112
+; RV64-NEXT:    vs2r.v v6, (a5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    vmul.vv v0, v8, v0
+; RV64-NEXT:    vmul.vv v10, v8, v10
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 112
+; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    vmul.vv v10, v8, v12
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 112
+; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    vmul.vv v10, v8, v14
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 112
+; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    vmul.vv v10, v8, v24
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 3
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 112
+; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    vmul.vv v10, v8, v16
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 112
+; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    vmul.vv v10, v8, v18
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 112
+; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    vmul.vv v10, v8, v20
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 3
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 112
+; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    vmul.vv v10, v8, v22
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 4
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 112
+; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    vmul.vv v10, v8, v26
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 3
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 112
+; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    vmul.vv v10, v8, v28
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 3
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 112
+; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    vmul.vv v10, v8, v30
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 3
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 112
+; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    lui s5, 8
+; RV64-NEXT:    vand.vx v10, v8, s5
+; RV64-NEXT:    vmul.vv v10, v8, v10
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 3
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 3
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 112
+; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    lui s5, 16
+; RV64-NEXT:    vand.vx v10, v8, s5
+; RV64-NEXT:    vmul.vv v10, v8, v10
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 4
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 112
+; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    lui s5, 32
+; RV64-NEXT:    vand.vx v10, v8, s5
+; RV64-NEXT:    vmul.vv v10, v8, v10
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 4
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 112
+; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    lui s5, 64
+; RV64-NEXT:    vand.vx v10, v8, s5
+; RV64-NEXT:    vmul.vv v10, v8, v10
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 5
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 112
+; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    lui s5, 128
+; RV64-NEXT:    vand.vx v10, v8, s5
+; RV64-NEXT:    vmul.vv v10, v8, v10
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 6
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 112
+; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    lui s5, 256
+; RV64-NEXT:    vand.vx v10, v8, s5
+; RV64-NEXT:    vmul.vv v10, v8, v10
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 112
+; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    lui s5, 512
+; RV64-NEXT:    vand.vx v10, v8, s5
+; RV64-NEXT:    vmul.vv v10, v8, v10
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 112
+; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    lui s5, 1024
+; RV64-NEXT:    vand.vx v10, v8, s5
+; RV64-NEXT:    vmul.vv v10, v8, v10
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 112
+; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    lui s5, 2048
+; RV64-NEXT:    vand.vx v10, v8, s5
+; RV64-NEXT:    vmul.vv v10, v8, v10
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 3
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 112
+; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    lui s5, 4096
+; RV64-NEXT:    vand.vx v10, v8, s5
+; RV64-NEXT:    vmul.vv v10, v8, v10
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 112
+; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    lui s5, 8192
+; RV64-NEXT:    vand.vx v10, v8, s5
+; RV64-NEXT:    vmul.vv v10, v8, v10
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 3
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 112
+; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    lui s5, 16384
+; RV64-NEXT:    vand.vx v10, v8, s5
+; RV64-NEXT:    vmul.vv v10, v8, v10
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 112
+; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    lui s5, 32768
+; RV64-NEXT:    vand.vx v10, v8, s5
+; RV64-NEXT:    vmul.vv v10, v8, v10
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 4
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 112
+; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    lui s5, 65536
+; RV64-NEXT:    vand.vx v10, v8, s5
+; RV64-NEXT:    vmul.vv v10, v8, v10
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 112
+; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    lui s5, 131072
+; RV64-NEXT:    vand.vx v10, v8, s5
+; RV64-NEXT:    vmul.vv v10, v8, v10
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 112
+; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    lui s5, 262144
+; RV64-NEXT:    vand.vx v10, v8, s5
+; RV64-NEXT:    vmul.vv v10, v8, v10
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 112
+; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    ld s5, 96(sp) # 8-byte Folded Reload
+; RV64-NEXT:    vand.vx v10, v8, s5
+; RV64-NEXT:    vmul.vv v10, v8, v10
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 3
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 112
+; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    ld s5, 88(sp) # 8-byte Folded Reload
+; RV64-NEXT:    vand.vx v10, v8, s5
+; RV64-NEXT:    vmul.vv v10, v8, v10
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 3
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 112
+; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    ld s5, 80(sp) # 8-byte Folded Reload
+; RV64-NEXT:    vand.vx v10, v8, s5
+; RV64-NEXT:    vmul.vv v10, v8, v10
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 3
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 112
+; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    ld s5, 72(sp) # 8-byte Folded Reload
+; RV64-NEXT:    vand.vx v10, v8, s5
+; RV64-NEXT:    vmul.vv v10, v8, v10
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 4
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 112
+; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    ld s5, 64(sp) # 8-byte Folded Reload
+; RV64-NEXT:    vand.vx v10, v8, s5
+; RV64-NEXT:    vmul.vv v10, v8, v10
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 112
+; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    ld s5, 56(sp) # 8-byte Folded Reload
+; RV64-NEXT:    vand.vx v10, v8, s5
+; RV64-NEXT:    vmul.vv v10, v8, v10
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 112
+; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    ld s5, 48(sp) # 8-byte Folded Reload
+; RV64-NEXT:    vand.vx v10, v8, s5
+; RV64-NEXT:    vmul.vv v10, v8, v10
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 112
+; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    ld s5, 40(sp) # 8-byte Folded Reload
+; RV64-NEXT:    vand.vx v10, v8, s5
+; RV64-NEXT:    vmul.vv v10, v8, v10
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 112
+; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    ld s5, 32(sp) # 8-byte Folded Reload
+; RV64-NEXT:    vand.vx v10, v8, s5
+; RV64-NEXT:    vmul.vv v10, v8, v10
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 3
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 112
+; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    ld s5, 24(sp) # 8-byte Folded Reload
+; RV64-NEXT:    vand.vx v10, v8, s5
+; RV64-NEXT:    vmul.vv v10, v8, v10
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 112
+; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    ld s5, 16(sp) # 8-byte Folded Reload
+; RV64-NEXT:    vand.vx v10, v8, s5
+; RV64-NEXT:    vmul.vv v10, v8, v10
+; RV64-NEXT:    csrr s5, vlenb
+; RV64-NEXT:    slli s5, s5, 2
+; RV64-NEXT:    mv a5, s5
+; RV64-NEXT:    slli s5, s5, 2
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    ld a5, 8(sp) # 8-byte Folded Reload
+; RV64-NEXT:    add s5, sp, s5
+; RV64-NEXT:    addi s5, s5, 112
+; RV64-NEXT:    vs2r.v v10, (s5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    vand.vx v10, v8, s6
+; RV64-NEXT:    vmul.vv v10, v8, v10
+; RV64-NEXT:    csrr s5, vlenb
+; RV64-NEXT:    slli s5, s5, 1
+; RV64-NEXT:    mv s6, s5
+; RV64-NEXT:    slli s5, s5, 3
+; RV64-NEXT:    add s5, s5, s6
+; RV64-NEXT:    add s5, sp, s5
+; RV64-NEXT:    addi s5, s5, 112
+; RV64-NEXT:    vs2r.v v10, (s5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    vand.vx v10, v8, s7
+; RV64-NEXT:    vmul.vv v10, v8, v10
+; RV64-NEXT:    csrr s5, vlenb
+; RV64-NEXT:    slli s5, s5, 4
+; RV64-NEXT:    add s5, sp, s5
+; RV64-NEXT:    addi s5, s5, 112
+; RV64-NEXT:    vs2r.v v10, (s5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    vand.vx v10, v8, s8
+; RV64-NEXT:    vmul.vv v10, v8, v10
+; RV64-NEXT:    csrr s5, vlenb
+; RV64-NEXT:    slli s5, s5, 1
+; RV64-NEXT:    mv s6, s5
+; RV64-NEXT:    slli s5, s5, 1
+; RV64-NEXT:    add s6, s6, s5
+; RV64-NEXT:    slli s5, s5, 1
+; RV64-NEXT:    add s5, s5, s6
+; RV64-NEXT:    add s5, sp, s5
+; RV64-NEXT:    addi s5, s5, 112
+; RV64-NEXT:    vs2r.v v10, (s5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    vand.vx v10, v8, s9
+; RV64-NEXT:    vmul.vv v10, v8, v10
+; RV64-NEXT:    csrr s5, vlenb
+; RV64-NEXT:    slli s5, s5, 2
+; RV64-NEXT:    mv s6, s5
+; RV64-NEXT:    slli s5, s5, 1
+; RV64-NEXT:    add s5, s5, s6
+; RV64-NEXT:    add s5, sp, s5
+; RV64-NEXT:    addi s5, s5, 112
+; RV64-NEXT:    vs2r.v v10, (s5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    vand.vx v10, v8, s10
+; RV64-NEXT:    vmul.vv v10, v8, v10
+; RV64-NEXT:    csrr s5, vlenb
+; RV64-NEXT:    slli s5, s5, 1
+; RV64-NEXT:    mv s6, s5
+; RV64-NEXT:    slli s5, s5, 2
+; RV64-NEXT:    add s5, s5, s6
+; RV64-NEXT:    add s5, sp, s5
+; RV64-NEXT:    addi s5, s5, 112
+; RV64-NEXT:    vs2r.v v10, (s5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    vand.vx v10, v8, s11
+; RV64-NEXT:    vmul.vv v10, v8, v10
+; RV64-NEXT:    csrr s5, vlenb
+; RV64-NEXT:    slli s5, s5, 1
+; RV64-NEXT:    mv s6, s5
+; RV64-NEXT:    slli s5, s5, 1
+; RV64-NEXT:    add s5, s5, s6
+; RV64-NEXT:    add s5, sp, s5
+; RV64-NEXT:    addi s5, s5, 112
+; RV64-NEXT:    vs2r.v v10, (s5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    vand.vx v10, v8, ra
+; RV64-NEXT:    vmul.vv v10, v8, v10
+; RV64-NEXT:    csrr s5, vlenb
+; RV64-NEXT:    slli s5, s5, 1
+; RV64-NEXT:    add s5, sp, s5
+; RV64-NEXT:    addi s5, s5, 112
+; RV64-NEXT:    vs2r.v v10, (s5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    vand.vx v10, v8, s4
+; RV64-NEXT:    vmul.vv v20, v8, v10
+; RV64-NEXT:    vand.vx v10, v8, s3
+; RV64-NEXT:    vmul.vv v10, v8, v10
+; RV64-NEXT:    vand.vx v12, v8, s1
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr s1, vlenb
+; RV64-NEXT:    slli s1, s1, 3
+; RV64-NEXT:    add s1, sp, s1
+; RV64-NEXT:    addi s1, s1, 112
+; RV64-NEXT:    vs2r.v v12, (s1) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    vand.vx v12, v8, t6
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr t6, vlenb
+; RV64-NEXT:    slli t6, t6, 2
+; RV64-NEXT:    add t6, sp, t6
+; RV64-NEXT:    addi t6, t6, 112
+; RV64-NEXT:    vs2r.v v12, (t6) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    vand.vx v12, v8, t5
+; RV64-NEXT:    vmul.vv v6, v8, v12
+; RV64-NEXT:    vand.vx v12, v8, t3
+; RV64-NEXT:    vmul.vv v22, v8, v12
+; RV64-NEXT:    vand.vx v12, v8, t2
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    vand.vx v14, v8, t1
+; RV64-NEXT:    vmul.vv v24, v8, v14
+; RV64-NEXT:    vand.vx v14, v8, t0
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vand.vx v16, v8, a4
+; RV64-NEXT:    vmul.vv v4, v8, v16
+; RV64-NEXT:    vand.vx v16, v8, a3
+; RV64-NEXT:    vmul.vv v2, v8, v16
+; RV64-NEXT:    vand.vx v16, v8, a2
+; RV64-NEXT:    vmul.vv v26, v8, v16
+; RV64-NEXT:    vand.vx v16, v8, a1
+; RV64-NEXT:    vmul.vv v16, v8, v16
+; RV64-NEXT:    addi a1, sp, 112
+; RV64-NEXT:    vs2r.v v16, (a1) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    vand.vx v16, v8, s0
+; RV64-NEXT:    vmul.vv v18, v8, v16
+; RV64-NEXT:    vand.vx v16, v8, a0
+; RV64-NEXT:    vmul.vv v16, v8, v16
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl2r.v v8, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl2r.v v28, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v8, v28, v8
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 5
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl2r.v v28, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v8, v8, v28
+; RV64-NEXT:    vxor.vv v8, v8, v0
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl2r.v v28, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v8, v8, v28
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl2r.v v28, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v8, v8, v28
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl2r.v v28, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v8, v8, v28
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl2r.v v28, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v8, v8, v28
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl2r.v v28, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v8, v28
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl2r.v v28, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v28
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl2r.v v28, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v28
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl2r.v v28, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v28
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl2r.v v28, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v28
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl2r.v v28, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v28
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl2r.v v28, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v28
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl2r.v v28, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v28
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl2r.v v28, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v28
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl2r.v v28, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v28
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 5
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl2r.v v28, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v28
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 6
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl2r.v v28, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v28
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl2r.v v28, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v28
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl2r.v v28, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v28
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl2r.v v28, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v28
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl2r.v v28, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v28
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl2r.v v28, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v28, v0, v28
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl2r.v v30, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v28, v28, v30
+; RV64-NEXT:    li a1, 56
+; RV64-NEXT:    vsll.vx v8, v8, a1
+; RV64-NEXT:    vand.vx v0, v0, s2
+; RV64-NEXT:    li a0, 40
+; RV64-NEXT:    vsll.vx v0, v0, a0
+; RV64-NEXT:    vor.vv v8, v8, v0
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 2
+; RV64-NEXT:    mv a3, a2
+; RV64-NEXT:    slli a2, a2, 2
+; RV64-NEXT:    add a3, a3, a2
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    add a2, a2, a3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 112
+; RV64-NEXT:    vl2r.v v0, (a2) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v28, v28, v0
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 4
+; RV64-NEXT:    mv a3, a2
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    add a2, a2, a3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 112
+; RV64-NEXT:    vl2r.v v30, (a2) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v28, v28, v30
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    mv a3, a2
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    add a3, a3, a2
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    add a3, a3, a2
+; RV64-NEXT:    slli a2, a2, 2
+; RV64-NEXT:    add a2, a2, a3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 112
+; RV64-NEXT:    vl2r.v v30, (a2) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v28, v28, v30
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 2
+; RV64-NEXT:    mv a3, a2
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    add a3, a3, a2
+; RV64-NEXT:    slli a2, a2, 2
+; RV64-NEXT:    add a2, a2, a3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 112
+; RV64-NEXT:    vl2r.v v30, (a2) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v28, v28, v30
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    mv a3, a2
+; RV64-NEXT:    slli a2, a2, 2
+; RV64-NEXT:    add a3, a3, a2
+; RV64-NEXT:    slli a2, a2, 2
+; RV64-NEXT:    add a2, a2, a3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 112
+; RV64-NEXT:    vl2r.v v30, (a2) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v28, v28, v30
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 3
+; RV64-NEXT:    mv a3, a2
+; RV64-NEXT:    slli a2, a2, 2
+; RV64-NEXT:    add a2, a2, a3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 112
+; RV64-NEXT:    vl2r.v v30, (a2) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v28, v28, v30
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    mv a3, a2
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    add a3, a3, a2
+; RV64-NEXT:    slli a2, a2, 3
+; RV64-NEXT:    add a2, a2, a3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 112
+; RV64-NEXT:    vl2r.v v30, (a2) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v28, v28, v30
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 2
+; RV64-NEXT:    mv a3, a2
+; RV64-NEXT:    slli a2, a2, 3
+; RV64-NEXT:    add a2, a2, a3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 112
+; RV64-NEXT:    vl2r.v v30, (a2) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v28, v28, v30
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    mv a3, a2
+; RV64-NEXT:    slli a2, a2, 4
+; RV64-NEXT:    add a2, a2, a3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 112
+; RV64-NEXT:    vl2r.v v30, (a2) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v28, v28, v30
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 5
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 112
+; RV64-NEXT:    vl2r.v v30, (a2) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v28, v28, v30
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    mv a3, a2
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    add a3, a3, a2
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    add a3, a3, a2
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    add a2, a2, a3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 112
+; RV64-NEXT:    vl2r.v v30, (a2) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v28, v28, v30
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 2
+; RV64-NEXT:    mv a3, a2
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    add a3, a3, a2
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    add a2, a2, a3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 112
+; RV64-NEXT:    vl2r.v v30, (a2) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v28, v28, v30
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    mv a3, a2
+; RV64-NEXT:    slli a2, a2, 2
+; RV64-NEXT:    add a3, a3, a2
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    add a2, a2, a3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 112
+; RV64-NEXT:    vl2r.v v30, (a2) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v28, v28, v30
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 3
+; RV64-NEXT:    mv a3, a2
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    add a2, a2, a3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 112
+; RV64-NEXT:    vl2r.v v30, (a2) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v28, v28, v30
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    mv a3, a2
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    add a3, a3, a2
+; RV64-NEXT:    slli a2, a2, 2
+; RV64-NEXT:    add a2, a2, a3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 112
+; RV64-NEXT:    vl2r.v v30, (a2) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v28, v30
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 2
+; RV64-NEXT:    mv a3, a2
+; RV64-NEXT:    slli a2, a2, 2
+; RV64-NEXT:    add a2, a2, a3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 112
+; RV64-NEXT:    vl2r.v v30, (a2) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v30
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    mv a3, a2
+; RV64-NEXT:    slli a2, a2, 3
+; RV64-NEXT:    add a2, a2, a3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 112
+; RV64-NEXT:    vl2r.v v30, (a2) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v30
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 4
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 112
+; RV64-NEXT:    vl2r.v v30, (a2) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v30
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    mv a3, a2
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    add a3, a3, a2
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    add a2, a2, a3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 112
+; RV64-NEXT:    vl2r.v v30, (a2) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v30
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 2
+; RV64-NEXT:    mv a3, a2
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    add a2, a2, a3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 112
+; RV64-NEXT:    vl2r.v v30, (a2) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v30
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    mv a3, a2
+; RV64-NEXT:    slli a2, a2, 2
+; RV64-NEXT:    add a2, a2, a3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 112
+; RV64-NEXT:    vl2r.v v30, (a2) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v30
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    mv a3, a2
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    add a2, a2, a3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 112
+; RV64-NEXT:    vl2r.v v30, (a2) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v30
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 112
+; RV64-NEXT:    vl2r.v v30, (a2) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v30, v0, v30
+; RV64-NEXT:    vxor.vv v20, v30, v20
+; RV64-NEXT:    vxor.vv v10, v20, v10
+; RV64-NEXT:    vsrl.vi v20, v28, 8
+; RV64-NEXT:    vand.vx v20, v20, t4
+; RV64-NEXT:    vsrl.vi v30, v0, 24
+; RV64-NEXT:    lui a2, 4080
+; RV64-NEXT:    vand.vx v30, v30, a2
+; RV64-NEXT:    vor.vv v20, v20, v30
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    slli a3, a3, 3
+; RV64-NEXT:    add a3, sp, a3
+; RV64-NEXT:    addi a3, a3, 112
+; RV64-NEXT:    vl2r.v v30, (a3) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v10, v10, v30
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    slli a3, a3, 2
+; RV64-NEXT:    add a3, sp, a3
+; RV64-NEXT:    addi a3, a3, 112
+; RV64-NEXT:    vl2r.v v30, (a3) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v10, v10, v30
+; RV64-NEXT:    vxor.vv v10, v10, v6
+; RV64-NEXT:    vxor.vv v10, v10, v22
+; RV64-NEXT:    vxor.vv v10, v10, v12
+; RV64-NEXT:    vand.vx v12, v28, a2
+; RV64-NEXT:    vsll.vi v12, v12, 24
+; RV64-NEXT:    vxor.vv v22, v10, v24
+; RV64-NEXT:    vxor.vv v14, v22, v14
+; RV64-NEXT:    vand.vx v22, v10, t4
+; RV64-NEXT:    vsll.vi v22, v22, 8
+; RV64-NEXT:    vor.vv v12, v12, v22
+; RV64-NEXT:    vxor.vv v14, v14, v4
+; RV64-NEXT:    vor.vv v8, v8, v12
+; RV64-NEXT:    vxor.vv v12, v14, v2
+; RV64-NEXT:    vxor.vv v12, v12, v26
+; RV64-NEXT:    vsrl.vx v10, v10, a0
+; RV64-NEXT:    vand.vx v10, v10, s2
+; RV64-NEXT:    addi a0, sp, 112
+; RV64-NEXT:    vl2r.v v14, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vxor.vv v12, v12, v18
+; RV64-NEXT:    vxor.vv v12, v12, v16
+; RV64-NEXT:    vsrl.vx v12, v12, a1
+; RV64-NEXT:    vor.vv v10, v10, v12
+; RV64-NEXT:    vor.vv v10, v20, v10
+; RV64-NEXT:    vor.vv v8, v8, v10
+; RV64-NEXT:    vsrl.vi v10, v8, 4
+; RV64-NEXT:    vand.vx v8, v8, a7
+; RV64-NEXT:    vand.vx v10, v10, a7
+; RV64-NEXT:    vsll.vi v8, v8, 4
+; RV64-NEXT:    vor.vv v8, v10, v8
+; RV64-NEXT:    vsrl.vi v10, v8, 2
+; RV64-NEXT:    vand.vx v8, v8, a6
+; RV64-NEXT:    vand.vx v10, v10, a6
+; RV64-NEXT:    vsll.vi v8, v8, 2
+; RV64-NEXT:    vor.vv v8, v10, v8
+; RV64-NEXT:    vsrl.vi v10, v8, 1
+; RV64-NEXT:    vand.vx v8, v8, a5
+; RV64-NEXT:    vand.vx v10, v10, a5
+; RV64-NEXT:    vadd.vv v8, v8, v8
+; RV64-NEXT:    vor.vv v8, v10, v8
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add sp, sp, a0
+; RV64-NEXT:    ld ra, 216(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s0, 208(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s1, 200(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s2, 192(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s3, 184(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s4, 176(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s5, 168(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s6, 160(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s7, 152(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s8, 144(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s9, 136(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s10, 128(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s11, 120(sp) # 8-byte Folded Reload
+; RV64-NEXT:    addi sp, sp, 224
+; RV64-NEXT:    ret
+  %a = call <vscale x 2 x i64> @llvm.clmulr.nxv2i64(<vscale x 2 x i64> %x, <vscale x 2 x i64> %y)
+  ret <vscale x 2 x i64> %a
+}
+
+define <vscale x 4 x i64> @clmulr_nxv4i64(<vscale x 4 x i64> %x, <vscale x 4 x i64> %y) nounwind {
+; RV32-LABEL: clmulr_nxv4i64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -352
+; RV32-NEXT:    sw ra, 348(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s0, 344(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s1, 340(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s2, 336(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s3, 332(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s4, 328(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s5, 324(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s6, 320(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s7, 316(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s8, 312(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s9, 308(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s10, 304(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s11, 300(sp) # 4-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 6
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    sub sp, sp, a0
+; RV32-NEXT:    lui s11, 1044480
+; RV32-NEXT:    lui t6, 524288
+; RV32-NEXT:    li a0, 1
+; RV32-NEXT:    li ra, 2
+; RV32-NEXT:    li t4, 4
+; RV32-NEXT:    li t2, 8
+; RV32-NEXT:    li t5, 16
+; RV32-NEXT:    li t3, 32
+; RV32-NEXT:    li t1, 64
+; RV32-NEXT:    li t0, 128
+; RV32-NEXT:    li a7, 256
+; RV32-NEXT:    li a6, 512
+; RV32-NEXT:    li a3, 1024
+; RV32-NEXT:    lui a2, 1
+; RV32-NEXT:    lui a4, 2
+; RV32-NEXT:    lui a1, 4
+; RV32-NEXT:    lui a5, 8
+; RV32-NEXT:    lui s0, 16
+; RV32-NEXT:    lui s1, 32
+; RV32-NEXT:    lui s2, 64
+; RV32-NEXT:    lui s3, 128
+; RV32-NEXT:    lui s4, 256
+; RV32-NEXT:    lui s5, 512
+; RV32-NEXT:    lui s6, 1024
+; RV32-NEXT:    lui s7, 2048
+; RV32-NEXT:    lui s8, 4096
+; RV32-NEXT:    lui s9, 8192
+; RV32-NEXT:    lui s10, 16384
+; RV32-NEXT:    sw s11, 272(sp)
+; RV32-NEXT:    lui s11, 32768
+; RV32-NEXT:    sw zero, 276(sp)
+; RV32-NEXT:    sw t6, 264(sp)
+; RV32-NEXT:    sw zero, 268(sp)
+; RV32-NEXT:    sw zero, 256(sp)
+; RV32-NEXT:    sw a0, 260(sp)
+; RV32-NEXT:    sw zero, 248(sp)
+; RV32-NEXT:    sw ra, 252(sp)
+; RV32-NEXT:    lui ra, 65536
+; RV32-NEXT:    sw zero, 240(sp)
+; RV32-NEXT:    sw t4, 244(sp)
+; RV32-NEXT:    lui t4, 131072
+; RV32-NEXT:    sw zero, 232(sp)
+; RV32-NEXT:    sw t2, 236(sp)
+; RV32-NEXT:    lui t2, 262144
+; RV32-NEXT:    sw zero, 224(sp)
+; RV32-NEXT:    sw t5, 228(sp)
+; RV32-NEXT:    sw zero, 216(sp)
+; RV32-NEXT:    sw t3, 220(sp)
+; RV32-NEXT:    sw zero, 208(sp)
+; RV32-NEXT:    sw t1, 212(sp)
+; RV32-NEXT:    sw zero, 200(sp)
+; RV32-NEXT:    sw t0, 204(sp)
+; RV32-NEXT:    sw zero, 192(sp)
+; RV32-NEXT:    sw a7, 196(sp)
+; RV32-NEXT:    sw zero, 184(sp)
+; RV32-NEXT:    sw a6, 188(sp)
+; RV32-NEXT:    sw zero, 176(sp)
+; RV32-NEXT:    sw a3, 180(sp)
+; RV32-NEXT:    li t1, 1024
+; RV32-NEXT:    slli a3, a0, 11
+; RV32-NEXT:    sw zero, 168(sp)
+; RV32-NEXT:    sw a3, 172(sp)
+; RV32-NEXT:    sw zero, 160(sp)
+; RV32-NEXT:    sw a2, 164(sp)
+; RV32-NEXT:    sw zero, 152(sp)
+; RV32-NEXT:    sw a4, 156(sp)
+; RV32-NEXT:    lui t3, 2
+; RV32-NEXT:    sw zero, 144(sp)
+; RV32-NEXT:    sw a1, 148(sp)
+; RV32-NEXT:    sw zero, 136(sp)
+; RV32-NEXT:    sw a5, 140(sp)
+; RV32-NEXT:    lui t5, 8
+; RV32-NEXT:    sw zero, 128(sp)
+; RV32-NEXT:    sw s0, 132(sp)
+; RV32-NEXT:    sw zero, 120(sp)
+; RV32-NEXT:    sw s1, 124(sp)
+; RV32-NEXT:    sw zero, 112(sp)
+; RV32-NEXT:    sw s2, 116(sp)
+; RV32-NEXT:    sw zero, 104(sp)
+; RV32-NEXT:    sw s3, 108(sp)
+; RV32-NEXT:    sw zero, 96(sp)
+; RV32-NEXT:    sw s4, 100(sp)
+; RV32-NEXT:    sw zero, 88(sp)
+; RV32-NEXT:    sw s5, 92(sp)
+; RV32-NEXT:    sw zero, 80(sp)
+; RV32-NEXT:    sw s6, 84(sp)
+; RV32-NEXT:    sw zero, 72(sp)
+; RV32-NEXT:    sw s7, 76(sp)
+; RV32-NEXT:    sw zero, 64(sp)
+; RV32-NEXT:    sw s8, 68(sp)
+; RV32-NEXT:    sw zero, 56(sp)
+; RV32-NEXT:    sw s9, 60(sp)
+; RV32-NEXT:    sw zero, 48(sp)
+; RV32-NEXT:    sw s10, 52(sp)
+; RV32-NEXT:    sw zero, 40(sp)
+; RV32-NEXT:    sw s11, 44(sp)
+; RV32-NEXT:    sw zero, 32(sp)
+; RV32-NEXT:    sw ra, 36(sp)
+; RV32-NEXT:    sw zero, 24(sp)
+; RV32-NEXT:    sw t4, 28(sp)
+; RV32-NEXT:    sw zero, 16(sp)
+; RV32-NEXT:    sw t2, 20(sp)
+; RV32-NEXT:    sw zero, 8(sp)
+; RV32-NEXT:    sw t6, 12(sp)
+; RV32-NEXT:    lui a1, 61681
+; RV32-NEXT:    addi a1, a1, -241
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vmv.v.x v28, a1
+; RV32-NEXT:    lui a1, 209715
+; RV32-NEXT:    addi a1, a1, 819
+; RV32-NEXT:    vmv.v.x v4, a1
+; RV32-NEXT:    addi a1, sp, 272
+; RV32-NEXT:    vsetvli a2, zero, e64, m4, ta, ma
+; RV32-NEXT:    vlse64.v v0, (a1), zero
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 8
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v0, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    li a6, 56
+; RV32-NEXT:    vsrl.vi v20, v8, 24
+; RV32-NEXT:    vsrl.vx v12, v8, a6
+; RV32-NEXT:    li a5, 40
+; RV32-NEXT:    vsrl.vx v16, v8, a5
+; RV32-NEXT:    vsll.vx v24, v8, a6
+; RV32-NEXT:    addi a2, s0, -256
+; RV32-NEXT:    vand.vx v16, v16, a2
+; RV32-NEXT:    vor.vv v16, v16, v12
+; RV32-NEXT:    vand.vx v12, v8, a2
+; RV32-NEXT:    vsll.vx v12, v12, a5
+; RV32-NEXT:    vor.vv v12, v24, v12
+; RV32-NEXT:    vsrl.vi v24, v8, 8
+; RV32-NEXT:    lui a4, 4080
+; RV32-NEXT:    vand.vx v20, v20, a4
+; RV32-NEXT:    lui a7, 349525
+; RV32-NEXT:    addi a7, a7, 1365
+; RV32-NEXT:    vand.vv v24, v24, v0
+; RV32-NEXT:    vor.vv v20, v24, v20
+; RV32-NEXT:    vsetvli t0, zero, e32, m4, ta, ma
+; RV32-NEXT:    vmv.v.x v24, a7
+; RV32-NEXT:    vsetvli a7, zero, e64, m4, ta, ma
+; RV32-NEXT:    vor.vv v16, v20, v16
+; RV32-NEXT:    vand.vx v20, v8, a4
+; RV32-NEXT:    vsll.vi v20, v20, 24
+; RV32-NEXT:    vand.vv v8, v8, v0
+; RV32-NEXT:    vsll.vi v8, v8, 8
+; RV32-NEXT:    vor.vv v8, v20, v8
+; RV32-NEXT:    addi a7, sp, 264
+; RV32-NEXT:    vlse64.v v20, (a7), zero
+; RV32-NEXT:    vor.vv v8, v12, v8
+; RV32-NEXT:    addi a7, sp, 256
+; RV32-NEXT:    vlse64.v v12, (a7), zero
+; RV32-NEXT:    vor.vv v8, v8, v16
+; RV32-NEXT:    vsrl.vi v16, v8, 4
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v28, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vv v8, v8, v28
+; RV32-NEXT:    vand.vv v16, v16, v28
+; RV32-NEXT:    vsll.vi v8, v8, 4
+; RV32-NEXT:    vor.vv v8, v16, v8
+; RV32-NEXT:    vsrl.vi v16, v8, 2
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v4, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vv v8, v8, v4
+; RV32-NEXT:    vand.vv v16, v16, v4
+; RV32-NEXT:    vsll.vi v8, v8, 2
+; RV32-NEXT:    vor.vv v8, v16, v8
+; RV32-NEXT:    vsrl.vi v16, v8, 1
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v24, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vv v8, v8, v24
+; RV32-NEXT:    vand.vv v16, v16, v24
+; RV32-NEXT:    vadd.vv v8, v8, v8
+; RV32-NEXT:    vor.vv v8, v16, v8
+; RV32-NEXT:    addi a7, sp, 248
+; RV32-NEXT:    vlse64.v v16, (a7), zero
+; RV32-NEXT:    vand.vv v28, v8, v20
+; RV32-NEXT:    addi a7, sp, 240
+; RV32-NEXT:    addi t0, sp, 232
+; RV32-NEXT:    vlse64.v v20, (a7), zero
+; RV32-NEXT:    vlse64.v v24, (t0), zero
+; RV32-NEXT:    vand.vv v4, v8, v12
+; RV32-NEXT:    vand.vv v0, v8, v16
+; RV32-NEXT:    vand.vv v12, v8, v20
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vv v12, v8, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    addi a7, sp, 224
+; RV32-NEXT:    addi t0, sp, 216
+; RV32-NEXT:    addi a1, sp, 208
+; RV32-NEXT:    addi a0, sp, 200
+; RV32-NEXT:    vlse64.v v12, (a7), zero
+; RV32-NEXT:    vlse64.v v16, (t0), zero
+; RV32-NEXT:    vlse64.v v20, (a1), zero
+; RV32-NEXT:    vlse64.v v24, (a0), zero
+; RV32-NEXT:    vand.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vv v12, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vv v12, v8, v20
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vv v12, v8, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    addi a0, sp, 192
+; RV32-NEXT:    addi a1, sp, 184
+; RV32-NEXT:    addi a7, sp, 176
+; RV32-NEXT:    addi t0, sp, 168
+; RV32-NEXT:    vlse64.v v12, (a0), zero
+; RV32-NEXT:    vlse64.v v16, (a1), zero
+; RV32-NEXT:    vlse64.v v20, (a7), zero
+; RV32-NEXT:    vlse64.v v24, (t0), zero
+; RV32-NEXT:    vand.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vv v12, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vv v12, v8, v20
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vv v12, v8, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    addi a0, sp, 160
+; RV32-NEXT:    addi a1, sp, 152
+; RV32-NEXT:    addi a7, sp, 144
+; RV32-NEXT:    addi t0, sp, 136
+; RV32-NEXT:    vlse64.v v12, (a0), zero
+; RV32-NEXT:    vlse64.v v16, (a1), zero
+; RV32-NEXT:    vlse64.v v20, (a7), zero
+; RV32-NEXT:    vlse64.v v24, (t0), zero
+; RV32-NEXT:    vand.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vv v12, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vv v12, v8, v20
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 6
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vv v12, v8, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    addi a0, sp, 128
+; RV32-NEXT:    addi a1, sp, 120
+; RV32-NEXT:    addi a7, sp, 112
+; RV32-NEXT:    addi t0, sp, 104
+; RV32-NEXT:    vlse64.v v12, (a0), zero
+; RV32-NEXT:    vlse64.v v16, (a1), zero
+; RV32-NEXT:    vlse64.v v20, (a7), zero
+; RV32-NEXT:    vlse64.v v24, (t0), zero
+; RV32-NEXT:    vand.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vv v12, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vv v12, v8, v20
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vv v12, v8, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    addi a0, sp, 96
+; RV32-NEXT:    addi a1, sp, 88
+; RV32-NEXT:    addi a7, sp, 80
+; RV32-NEXT:    addi t0, sp, 72
+; RV32-NEXT:    vlse64.v v12, (a0), zero
+; RV32-NEXT:    vlse64.v v16, (a1), zero
+; RV32-NEXT:    vlse64.v v20, (a7), zero
+; RV32-NEXT:    vlse64.v v24, (t0), zero
+; RV32-NEXT:    vand.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vv v12, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vv v12, v8, v20
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vv v12, v8, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    addi a0, sp, 64
+; RV32-NEXT:    addi a1, sp, 56
+; RV32-NEXT:    addi a7, sp, 48
+; RV32-NEXT:    addi t0, sp, 40
+; RV32-NEXT:    vlse64.v v12, (a0), zero
+; RV32-NEXT:    vlse64.v v16, (a1), zero
+; RV32-NEXT:    vlse64.v v20, (a7), zero
+; RV32-NEXT:    vlse64.v v24, (t0), zero
+; RV32-NEXT:    vand.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vv v12, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vv v12, v8, v20
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vv v12, v8, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    addi a0, sp, 32
+; RV32-NEXT:    addi a1, sp, 24
+; RV32-NEXT:    addi a7, sp, 16
+; RV32-NEXT:    addi t0, sp, 8
+; RV32-NEXT:    vlse64.v v12, (a0), zero
+; RV32-NEXT:    vlse64.v v16, (a1), zero
+; RV32-NEXT:    vlse64.v v20, (a7), zero
+; RV32-NEXT:    vlse64.v v24, (t0), zero
+; RV32-NEXT:    vand.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vv v12, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 7
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vv v12, v8, v20
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vv v12, v8, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vi v12, v8, 2
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vi v12, v8, 1
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vi v12, v8, 4
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vi v12, v8, 8
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    li a0, 16
+; RV32-NEXT:    vand.vx v12, v8, a0
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    li a0, 32
+; RV32-NEXT:    vand.vx v12, v8, a0
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    li a0, 64
+; RV32-NEXT:    vand.vx v12, v8, a0
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    li a0, 128
+; RV32-NEXT:    vand.vx v12, v8, a0
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    li a0, 256
+; RV32-NEXT:    vand.vx v12, v8, a0
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    li a0, 512
+; RV32-NEXT:    vand.vx v12, v8, a0
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vx v12, v8, t1
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vx v12, v8, a3
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    lui a0, 1
+; RV32-NEXT:    vand.vx v12, v8, a0
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vx v12, v8, t3
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    lui a0, 4
+; RV32-NEXT:    vand.vx v12, v8, a0
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 6
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vx v12, v8, t5
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vx v12, v8, s0
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vx v12, v8, s1
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vx v12, v8, s2
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vx v12, v8, s3
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vx v12, v8, s4
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vx v12, v8, s5
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vx v12, v8, s6
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vx v12, v8, s7
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vx v12, v8, s8
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vx v12, v8, s9
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vx v12, v8, s10
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vx v12, v8, s11
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vx v12, v8, ra
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vx v12, v8, t4
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vx v12, v8, t2
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    addi a0, sp, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vmul.vv v16, v8, v28
+; RV32-NEXT:    vmul.vv v20, v8, v4
+; RV32-NEXT:    vmul.vv v24, v8, v0
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v28, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v28, v8, v28
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v4, v8, v4
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v0, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v0, v8, v0
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 6
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 6
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 7
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v8, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v8, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vi v8, v8, 0
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 6
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    addi a0, sp, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    vxor.vv v8, v8, v16
+; RV32-NEXT:    vxor.vv v8, v8, v20
+; RV32-NEXT:    vxor.vv v8, v8, v24
+; RV32-NEXT:    vxor.vv v8, v8, v28
+; RV32-NEXT:    vxor.vv v8, v8, v4
+; RV32-NEXT:    vxor.vv v8, v8, v0
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 6
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    vsrl.vx v12, v8, a6
+; RV32-NEXT:    vsrl.vx v16, v8, a5
+; RV32-NEXT:    vsrl.vi v20, v8, 24
+; RV32-NEXT:    vand.vx v16, v16, a2
+; RV32-NEXT:    vor.vv v12, v16, v12
+; RV32-NEXT:    vsrl.vi v16, v8, 8
+; RV32-NEXT:    vand.vx v20, v20, a4
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 8
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vand.vv v16, v16, v24
+; RV32-NEXT:    vor.vv v16, v16, v20
+; RV32-NEXT:    vand.vx v20, v8, a4
+; RV32-NEXT:    vand.vv v24, v8, v24
+; RV32-NEXT:    vsll.vi v24, v24, 8
+; RV32-NEXT:    vsll.vi v20, v20, 24
+; RV32-NEXT:    vor.vv v20, v20, v24
+; RV32-NEXT:    vsll.vx v24, v8, a6
+; RV32-NEXT:    vand.vx v8, v8, a2
+; RV32-NEXT:    vsll.vx v8, v8, a5
+; RV32-NEXT:    vor.vv v8, v24, v8
+; RV32-NEXT:    vor.vv v12, v16, v12
+; RV32-NEXT:    vor.vv v8, v8, v20
+; RV32-NEXT:    vor.vv v8, v8, v12
+; RV32-NEXT:    vsrl.vi v12, v8, 4
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vand.vv v8, v8, v16
+; RV32-NEXT:    vand.vv v12, v12, v16
+; RV32-NEXT:    vsll.vi v8, v8, 4
+; RV32-NEXT:    vor.vv v8, v12, v8
+; RV32-NEXT:    vsrl.vi v12, v8, 2
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vand.vv v8, v8, v16
+; RV32-NEXT:    vand.vv v12, v12, v16
+; RV32-NEXT:    vsll.vi v8, v8, 2
+; RV32-NEXT:    vor.vv v8, v12, v8
+; RV32-NEXT:    vsrl.vi v12, v8, 1
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vand.vv v8, v8, v16
+; RV32-NEXT:    vand.vv v12, v12, v16
+; RV32-NEXT:    vadd.vv v8, v8, v8
+; RV32-NEXT:    vor.vv v8, v12, v8
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 6
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add sp, sp, a0
+; RV32-NEXT:    lw ra, 348(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s0, 344(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s1, 340(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s2, 336(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s3, 332(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s4, 328(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s5, 324(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s6, 320(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s7, 316(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s8, 312(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s9, 308(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s10, 304(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s11, 300(sp) # 4-byte Folded Reload
+; RV32-NEXT:    addi sp, sp, 352
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: clmulr_nxv4i64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    addi sp, sp, -240
+; RV64-NEXT:    sd ra, 232(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s0, 224(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s1, 216(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s2, 208(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s3, 200(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s4, 192(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s5, 184(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s6, 176(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s7, 168(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s8, 160(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s9, 152(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s10, 144(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s11, 136(sp) # 8-byte Folded Spill
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    sub sp, sp, a0
+; RV64-NEXT:    li t0, 40
+; RV64-NEXT:    lui a7, 16
+; RV64-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
+; RV64-NEXT:    vsrl.vi v20, v8, 24
+; RV64-NEXT:    vsrl.vi v12, v8, 8
+; RV64-NEXT:    li t2, 255
+; RV64-NEXT:    lui a3, 61681
+; RV64-NEXT:    lui a4, 209715
+; RV64-NEXT:    lui a5, 349525
+; RV64-NEXT:    li a2, 16
+; RV64-NEXT:    li a1, 32
+; RV64-NEXT:    li a0, 64
+; RV64-NEXT:    li s9, 1
+; RV64-NEXT:    li a6, 56
+; RV64-NEXT:    vsrl.vx v16, v8, a6
+; RV64-NEXT:    vsrl.vx v28, v8, t0
+; RV64-NEXT:    addi t6, a7, -256
+; RV64-NEXT:    lui a7, 4080
+; RV64-NEXT:    vand.vx v24, v20, a7
+; RV64-NEXT:    slli t2, t2, 24
+; RV64-NEXT:    vand.vx v4, v8, a7
+; RV64-NEXT:    vsll.vx v20, v8, a6
+; RV64-NEXT:    addi a7, a3, -241
+; RV64-NEXT:    addi a6, a4, 819
+; RV64-NEXT:    addi a5, a5, 1365
+; RV64-NEXT:    slli a3, s9, 11
+; RV64-NEXT:    sd a3, 112(sp) # 8-byte Folded Spill
+; RV64-NEXT:    slli a3, s9, 31
+; RV64-NEXT:    sd a3, 104(sp) # 8-byte Folded Spill
+; RV64-NEXT:    slli a3, s9, 32
+; RV64-NEXT:    sd a3, 96(sp) # 8-byte Folded Spill
+; RV64-NEXT:    slli a3, s9, 33
+; RV64-NEXT:    sd a3, 88(sp) # 8-byte Folded Spill
+; RV64-NEXT:    slli a3, s9, 34
+; RV64-NEXT:    sd a3, 80(sp) # 8-byte Folded Spill
+; RV64-NEXT:    slli a3, s9, 35
+; RV64-NEXT:    sd a3, 72(sp) # 8-byte Folded Spill
+; RV64-NEXT:    slli a3, s9, 36
+; RV64-NEXT:    sd a3, 64(sp) # 8-byte Folded Spill
+; RV64-NEXT:    slli a3, s9, 37
+; RV64-NEXT:    sd a3, 56(sp) # 8-byte Folded Spill
+; RV64-NEXT:    slli a3, s9, 38
+; RV64-NEXT:    sd a3, 48(sp) # 8-byte Folded Spill
+; RV64-NEXT:    slli a3, s9, 39
+; RV64-NEXT:    sd a3, 40(sp) # 8-byte Folded Spill
+; RV64-NEXT:    slli a3, s9, 40
+; RV64-NEXT:    sd a3, 32(sp) # 8-byte Folded Spill
+; RV64-NEXT:    slli a3, s9, 41
+; RV64-NEXT:    sd a3, 24(sp) # 8-byte Folded Spill
+; RV64-NEXT:    slli s6, s9, 42
+; RV64-NEXT:    slli s7, s9, 43
+; RV64-NEXT:    slli a3, a7, 32
+; RV64-NEXT:    add a7, a7, a3
+; RV64-NEXT:    slli a3, a6, 32
+; RV64-NEXT:    add a6, a6, a3
+; RV64-NEXT:    slli a3, a5, 32
+; RV64-NEXT:    add a5, a5, a3
+; RV64-NEXT:    slli s8, s9, 44
+; RV64-NEXT:    vand.vx v28, v28, t6
+; RV64-NEXT:    vand.vx v12, v12, t2
+; RV64-NEXT:    vsll.vi v4, v4, 24
+; RV64-NEXT:    vand.vx v0, v8, t2
+; RV64-NEXT:    vand.vx v8, v8, t6
+; RV64-NEXT:    vor.vv v16, v28, v16
+; RV64-NEXT:    vor.vv v12, v12, v24
+; RV64-NEXT:    vsll.vi v24, v0, 8
+; RV64-NEXT:    vsll.vx v8, v8, t0
+; RV64-NEXT:    vor.vv v12, v12, v16
+; RV64-NEXT:    vor.vv v16, v4, v24
+; RV64-NEXT:    vor.vv v8, v20, v8
+; RV64-NEXT:    vor.vv v8, v8, v16
+; RV64-NEXT:    vor.vv v8, v8, v12
+; RV64-NEXT:    vsrl.vi v12, v8, 4
+; RV64-NEXT:    vand.vx v8, v8, a7
+; RV64-NEXT:    vand.vx v12, v12, a7
+; RV64-NEXT:    vsll.vi v8, v8, 4
+; RV64-NEXT:    vor.vv v8, v12, v8
+; RV64-NEXT:    vsrl.vi v12, v8, 2
+; RV64-NEXT:    vand.vx v8, v8, a6
+; RV64-NEXT:    vand.vx v12, v12, a6
+; RV64-NEXT:    vsll.vi v8, v8, 2
+; RV64-NEXT:    vor.vv v8, v12, v8
+; RV64-NEXT:    vsrl.vi v12, v8, 1
+; RV64-NEXT:    vand.vx v8, v8, a5
+; RV64-NEXT:    vand.vx v12, v12, a5
+; RV64-NEXT:    vadd.vv v8, v8, v8
+; RV64-NEXT:    vor.vv v8, v12, v8
+; RV64-NEXT:    vand.vx v12, v8, a2
+; RV64-NEXT:    slli s10, s9, 45
+; RV64-NEXT:    vand.vx v16, v8, a1
+; RV64-NEXT:    slli s11, s9, 46
+; RV64-NEXT:    vand.vx v20, v8, a0
+; RV64-NEXT:    slli ra, s9, 47
+; RV64-NEXT:    slli s4, s9, 48
+; RV64-NEXT:    slli s3, s9, 49
+; RV64-NEXT:    slli s2, s9, 50
+; RV64-NEXT:    slli s1, s9, 51
+; RV64-NEXT:    slli s0, s9, 52
+; RV64-NEXT:    slli t5, s9, 53
+; RV64-NEXT:    slli t4, s9, 54
+; RV64-NEXT:    slli t3, s9, 55
+; RV64-NEXT:    slli t1, s9, 56
+; RV64-NEXT:    slli t0, s9, 57
+; RV64-NEXT:    slli a4, s9, 58
+; RV64-NEXT:    slli a3, s9, 59
+; RV64-NEXT:    slli a2, s9, 60
+; RV64-NEXT:    slli a1, s9, 61
+; RV64-NEXT:    slli s9, s9, 62
+; RV64-NEXT:    li a0, -1
+; RV64-NEXT:    slli a0, a0, 63
+; RV64-NEXT:    vand.vi v24, v8, 2
+; RV64-NEXT:    vand.vi v28, v8, 1
+; RV64-NEXT:    vand.vi v4, v8, 4
+; RV64-NEXT:    vand.vi v0, v8, 8
+; RV64-NEXT:    vmul.vv v24, v8, v24
+; RV64-NEXT:    vmul.vv v28, v8, v28
+; RV64-NEXT:    sd a5, 8(sp) # 8-byte Folded Spill
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 3
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 128
+; RV64-NEXT:    vs4r.v v28, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    vmul.vv v28, v8, v4
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 3
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 128
+; RV64-NEXT:    vs4r.v v28, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    vmul.vv v0, v8, v0
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 5
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 128
+; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    vmul.vv v12, v8, v16
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 128
+; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    vmul.vv v12, v8, v20
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 3
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 128
+; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    li s5, 128
+; RV64-NEXT:    vand.vx v12, v8, s5
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 128
+; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    li s5, 256
+; RV64-NEXT:    vand.vx v12, v8, s5
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 4
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 128
+; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    li s5, 512
+; RV64-NEXT:    vand.vx v12, v8, s5
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 3
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 128
+; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    li s5, 1024
+; RV64-NEXT:    vand.vx v12, v8, s5
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 3
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 3
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 128
+; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    ld s5, 112(sp) # 8-byte Folded Reload
+; RV64-NEXT:    vand.vx v12, v8, s5
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 4
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 128
+; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    lui s5, 1
+; RV64-NEXT:    vand.vx v12, v8, s5
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 6
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 128
+; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    lui s5, 2
+; RV64-NEXT:    vand.vx v12, v8, s5
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 128
+; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    lui s5, 4
+; RV64-NEXT:    vand.vx v12, v8, s5
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 3
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 128
+; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    lui s5, 8
+; RV64-NEXT:    vand.vx v12, v8, s5
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 128
+; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    lui s5, 16
+; RV64-NEXT:    vand.vx v12, v8, s5
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 4
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 128
+; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    lui s5, 32
+; RV64-NEXT:    vand.vx v12, v8, s5
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 128
+; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    lui s5, 64
+; RV64-NEXT:    vand.vx v12, v8, s5
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 3
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 128
+; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    lui s5, 128
+; RV64-NEXT:    vand.vx v12, v8, s5
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 3
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 128
+; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    lui s5, 256
+; RV64-NEXT:    vand.vx v12, v8, s5
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 5
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 128
+; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    lui s5, 512
+; RV64-NEXT:    vand.vx v12, v8, s5
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 3
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 128
+; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    lui s5, 1024
+; RV64-NEXT:    vand.vx v12, v8, s5
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 3
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 3
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 128
+; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    lui s5, 2048
+; RV64-NEXT:    vand.vx v12, v8, s5
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 3
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 128
+; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    lui s5, 4096
+; RV64-NEXT:    vand.vx v12, v8, s5
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 4
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 3
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 128
+; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    lui s5, 8192
+; RV64-NEXT:    vand.vx v12, v8, s5
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 3
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 4
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 128
+; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    lui s5, 16384
+; RV64-NEXT:    vand.vx v12, v8, s5
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 4
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 128
+; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    lui s5, 32768
+; RV64-NEXT:    vand.vx v12, v8, s5
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 5
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 128
+; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    lui s5, 65536
+; RV64-NEXT:    vand.vx v12, v8, s5
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 7
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 128
+; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    lui s5, 131072
+; RV64-NEXT:    vand.vx v12, v8, s5
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 128
+; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    lui s5, 262144
+; RV64-NEXT:    vand.vx v12, v8, s5
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 3
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 128
+; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    ld s5, 104(sp) # 8-byte Folded Reload
+; RV64-NEXT:    vand.vx v12, v8, s5
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 128
+; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    ld s5, 96(sp) # 8-byte Folded Reload
+; RV64-NEXT:    vand.vx v12, v8, s5
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 4
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 128
+; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    ld s5, 88(sp) # 8-byte Folded Reload
+; RV64-NEXT:    vand.vx v12, v8, s5
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 128
+; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    ld s5, 80(sp) # 8-byte Folded Reload
+; RV64-NEXT:    vand.vx v12, v8, s5
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 3
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 128
+; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    ld s5, 72(sp) # 8-byte Folded Reload
+; RV64-NEXT:    vand.vx v12, v8, s5
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 3
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 128
+; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    ld s5, 64(sp) # 8-byte Folded Reload
+; RV64-NEXT:    vand.vx v12, v8, s5
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 5
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 128
+; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    ld s5, 56(sp) # 8-byte Folded Reload
+; RV64-NEXT:    vand.vx v12, v8, s5
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 128
+; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    ld s5, 48(sp) # 8-byte Folded Reload
+; RV64-NEXT:    vand.vx v12, v8, s5
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 3
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 128
+; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    ld s5, 40(sp) # 8-byte Folded Reload
+; RV64-NEXT:    vand.vx v12, v8, s5
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 128
+; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    ld s5, 32(sp) # 8-byte Folded Reload
+; RV64-NEXT:    vand.vx v12, v8, s5
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 4
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 128
+; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    ld s5, 24(sp) # 8-byte Folded Reload
+; RV64-NEXT:    vand.vx v12, v8, s5
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr s5, vlenb
+; RV64-NEXT:    slli s5, s5, 2
+; RV64-NEXT:    mv a5, s5
+; RV64-NEXT:    slli s5, s5, 1
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    slli s5, s5, 3
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    ld a5, 8(sp) # 8-byte Folded Reload
+; RV64-NEXT:    add s5, sp, s5
+; RV64-NEXT:    addi s5, s5, 128
+; RV64-NEXT:    vs4r.v v12, (s5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    vand.vx v12, v8, s6
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr s5, vlenb
+; RV64-NEXT:    slli s5, s5, 3
+; RV64-NEXT:    mv s6, s5
+; RV64-NEXT:    slli s5, s5, 3
+; RV64-NEXT:    add s5, s5, s6
+; RV64-NEXT:    add s5, sp, s5
+; RV64-NEXT:    addi s5, s5, 128
+; RV64-NEXT:    vs4r.v v12, (s5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    vand.vx v12, v8, s7
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr s5, vlenb
+; RV64-NEXT:    slli s5, s5, 2
+; RV64-NEXT:    mv s6, s5
+; RV64-NEXT:    slli s5, s5, 4
+; RV64-NEXT:    add s5, s5, s6
+; RV64-NEXT:    add s5, sp, s5
+; RV64-NEXT:    addi s5, s5, 128
+; RV64-NEXT:    vs4r.v v12, (s5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    vand.vx v12, v8, s8
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr s5, vlenb
+; RV64-NEXT:    slli s5, s5, 6
+; RV64-NEXT:    add s5, sp, s5
+; RV64-NEXT:    addi s5, s5, 128
+; RV64-NEXT:    vs4r.v v12, (s5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    vand.vx v12, v8, s10
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr s5, vlenb
+; RV64-NEXT:    slli s5, s5, 2
+; RV64-NEXT:    mv s6, s5
+; RV64-NEXT:    slli s5, s5, 1
+; RV64-NEXT:    add s6, s6, s5
+; RV64-NEXT:    slli s5, s5, 1
+; RV64-NEXT:    add s6, s6, s5
+; RV64-NEXT:    slli s5, s5, 1
+; RV64-NEXT:    add s5, s5, s6
+; RV64-NEXT:    add s5, sp, s5
+; RV64-NEXT:    addi s5, s5, 128
+; RV64-NEXT:    vs4r.v v12, (s5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    vand.vx v12, v8, s11
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr s5, vlenb
+; RV64-NEXT:    slli s5, s5, 3
+; RV64-NEXT:    mv s6, s5
+; RV64-NEXT:    slli s5, s5, 1
+; RV64-NEXT:    add s6, s6, s5
+; RV64-NEXT:    slli s5, s5, 1
+; RV64-NEXT:    add s5, s5, s6
+; RV64-NEXT:    add s5, sp, s5
+; RV64-NEXT:    addi s5, s5, 128
+; RV64-NEXT:    vs4r.v v12, (s5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    vand.vx v12, v8, ra
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr s5, vlenb
+; RV64-NEXT:    slli s5, s5, 4
+; RV64-NEXT:    mv s6, s5
+; RV64-NEXT:    slli s5, s5, 1
+; RV64-NEXT:    add s5, s5, s6
+; RV64-NEXT:    add s5, sp, s5
+; RV64-NEXT:    addi s5, s5, 128
+; RV64-NEXT:    vs4r.v v12, (s5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    vand.vx v12, v8, s4
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr s4, vlenb
+; RV64-NEXT:    slli s4, s4, 3
+; RV64-NEXT:    mv s5, s4
+; RV64-NEXT:    slli s4, s4, 2
+; RV64-NEXT:    add s4, s4, s5
+; RV64-NEXT:    add s4, sp, s4
+; RV64-NEXT:    addi s4, s4, 128
+; RV64-NEXT:    vs4r.v v12, (s4) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    vand.vx v12, v8, s3
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr s3, vlenb
+; RV64-NEXT:    slli s3, s3, 2
+; RV64-NEXT:    mv s4, s3
+; RV64-NEXT:    slli s3, s3, 2
+; RV64-NEXT:    add s3, s3, s4
+; RV64-NEXT:    add s3, sp, s3
+; RV64-NEXT:    addi s3, s3, 128
+; RV64-NEXT:    vs4r.v v12, (s3) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    vand.vx v12, v8, s2
+; RV64-NEXT:    vmul.vv v4, v8, v12
+; RV64-NEXT:    vand.vx v12, v8, s1
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr s1, vlenb
+; RV64-NEXT:    slli s1, s1, 2
+; RV64-NEXT:    mv s2, s1
+; RV64-NEXT:    slli s1, s1, 2
+; RV64-NEXT:    add s2, s2, s1
+; RV64-NEXT:    slli s1, s1, 1
+; RV64-NEXT:    add s1, s1, s2
+; RV64-NEXT:    add s1, sp, s1
+; RV64-NEXT:    addi s1, s1, 128
+; RV64-NEXT:    vs4r.v v12, (s1) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    vand.vx v12, v8, s0
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr s0, vlenb
+; RV64-NEXT:    slli s0, s0, 2
+; RV64-NEXT:    mv s1, s0
+; RV64-NEXT:    slli s0, s0, 1
+; RV64-NEXT:    add s1, s1, s0
+; RV64-NEXT:    slli s0, s0, 2
+; RV64-NEXT:    add s0, s0, s1
+; RV64-NEXT:    add s0, sp, s0
+; RV64-NEXT:    addi s0, s0, 128
+; RV64-NEXT:    vs4r.v v12, (s0) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    vand.vx v12, v8, t5
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr t5, vlenb
+; RV64-NEXT:    slli t5, t5, 2
+; RV64-NEXT:    mv s0, t5
+; RV64-NEXT:    slli t5, t5, 3
+; RV64-NEXT:    add t5, t5, s0
+; RV64-NEXT:    add t5, sp, t5
+; RV64-NEXT:    addi t5, t5, 128
+; RV64-NEXT:    vs4r.v v12, (t5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    vand.vx v12, v8, t4
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr t4, vlenb
+; RV64-NEXT:    slli t4, t4, 4
+; RV64-NEXT:    add t4, sp, t4
+; RV64-NEXT:    addi t4, t4, 128
+; RV64-NEXT:    vs4r.v v12, (t4) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    vand.vx v12, v8, t3
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr t3, vlenb
+; RV64-NEXT:    slli t3, t3, 2
+; RV64-NEXT:    add t3, sp, t3
+; RV64-NEXT:    addi t3, t3, 128
+; RV64-NEXT:    vs4r.v v12, (t3) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    vand.vx v12, v8, t1
+; RV64-NEXT:    vmul.vv v20, v8, v12
+; RV64-NEXT:    vand.vx v12, v8, t0
+; RV64-NEXT:    vmul.vv v16, v8, v12
+; RV64-NEXT:    vand.vx v12, v8, a4
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a4, vlenb
+; RV64-NEXT:    slli a4, a4, 5
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    addi a4, a4, 128
+; RV64-NEXT:    vs4r.v v12, (a4) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    vand.vx v12, v8, a3
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    slli a3, a3, 2
+; RV64-NEXT:    mv a4, a3
+; RV64-NEXT:    slli a3, a3, 1
+; RV64-NEXT:    add a4, a4, a3
+; RV64-NEXT:    slli a3, a3, 1
+; RV64-NEXT:    add a3, a3, a4
+; RV64-NEXT:    add a3, sp, a3
+; RV64-NEXT:    addi a3, a3, 128
+; RV64-NEXT:    vs4r.v v12, (a3) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    vand.vx v12, v8, a2
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 2
+; RV64-NEXT:    mv a3, a2
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    add a2, a2, a3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 128
+; RV64-NEXT:    vs4r.v v12, (a2) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    vand.vx v12, v8, a1
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 3
+; RV64-NEXT:    mv a2, a1
+; RV64-NEXT:    slli a1, a1, 1
+; RV64-NEXT:    add a1, a1, a2
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 128
+; RV64-NEXT:    vs4r.v v12, (a1) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    vand.vx v12, v8, s9
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 3
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 128
+; RV64-NEXT:    vs4r.v v12, (a1) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    vand.vx v12, v8, a0
+; RV64-NEXT:    vmul.vv v8, v8, v12
+; RV64-NEXT:    addi a0, sp, 128
+; RV64-NEXT:    vs4r.v v8, (a0) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl4r.v v8, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v8, v8, v24
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v8, v8, v12
+; RV64-NEXT:    vxor.vv v8, v8, v0
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 5
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v8, v8, v12
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v8, v8, v12
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v8, v8, v12
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v8, v8, v12
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v12, v8, v12
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v12, v12, v24
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v12, v12, v24
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v12, v12, v24
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 6
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v12, v12, v24
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v12, v12, v24
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v12, v12, v24
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v12, v12, v24
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v12, v12, v24
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v12, v12, v24
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v12, v12, v24
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v12, v12, v24
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 5
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v12, v12, v24
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v12, v12, v24
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v12, v12, v24
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v12, v12, v24
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v12, v24
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v24
+; RV64-NEXT:    li a1, 56
+; RV64-NEXT:    vsll.vx v8, v8, a1
+; RV64-NEXT:    vand.vx v12, v12, t6
+; RV64-NEXT:    li a0, 40
+; RV64-NEXT:    vsll.vx v12, v12, a0
+; RV64-NEXT:    vor.vv v12, v8, v12
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 2
+; RV64-NEXT:    mv a3, a2
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    add a3, a3, a2
+; RV64-NEXT:    slli a2, a2, 4
+; RV64-NEXT:    add a2, a2, a3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 128
+; RV64-NEXT:    vl4r.v v8, (a2) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v8, v0, v8
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 2
+; RV64-NEXT:    mv a3, a2
+; RV64-NEXT:    slli a2, a2, 5
+; RV64-NEXT:    add a2, a2, a3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 128
+; RV64-NEXT:    vl4r.v v24, (a2) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v8, v8, v24
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 7
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 128
+; RV64-NEXT:    vl4r.v v24, (a2) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v8, v8, v24
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 2
+; RV64-NEXT:    mv a3, a2
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    add a3, a3, a2
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    add a3, a3, a2
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    add a3, a3, a2
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    add a2, a2, a3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 128
+; RV64-NEXT:    vl4r.v v24, (a2) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v8, v8, v24
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 3
+; RV64-NEXT:    mv a3, a2
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    add a3, a3, a2
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    add a3, a3, a2
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    add a2, a2, a3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 128
+; RV64-NEXT:    vl4r.v v24, (a2) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v8, v8, v24
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 2
+; RV64-NEXT:    mv a3, a2
+; RV64-NEXT:    slli a2, a2, 2
+; RV64-NEXT:    add a3, a3, a2
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    add a3, a3, a2
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    add a2, a2, a3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 128
+; RV64-NEXT:    vl4r.v v24, (a2) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v8, v8, v24
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 4
+; RV64-NEXT:    mv a3, a2
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    add a3, a3, a2
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    add a2, a2, a3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 128
+; RV64-NEXT:    vl4r.v v24, (a2) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v8, v8, v24
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 2
+; RV64-NEXT:    mv a3, a2
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    add a3, a3, a2
+; RV64-NEXT:    slli a2, a2, 2
+; RV64-NEXT:    add a3, a3, a2
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    add a2, a2, a3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 128
+; RV64-NEXT:    vl4r.v v24, (a2) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v8, v8, v24
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 3
+; RV64-NEXT:    mv a3, a2
+; RV64-NEXT:    slli a2, a2, 2
+; RV64-NEXT:    add a3, a3, a2
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    add a2, a2, a3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 128
+; RV64-NEXT:    vl4r.v v24, (a2) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v8, v8, v24
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 2
+; RV64-NEXT:    mv a3, a2
+; RV64-NEXT:    slli a2, a2, 3
+; RV64-NEXT:    add a3, a3, a2
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    add a2, a2, a3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 128
+; RV64-NEXT:    vl4r.v v24, (a2) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v8, v8, v24
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 5
+; RV64-NEXT:    mv a3, a2
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    add a2, a2, a3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 128
+; RV64-NEXT:    vl4r.v v24, (a2) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v8, v8, v24
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 2
+; RV64-NEXT:    mv a3, a2
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    add a3, a3, a2
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    add a3, a3, a2
+; RV64-NEXT:    slli a2, a2, 2
+; RV64-NEXT:    add a2, a2, a3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 128
+; RV64-NEXT:    vl4r.v v24, (a2) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v8, v8, v24
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 3
+; RV64-NEXT:    mv a3, a2
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    add a3, a3, a2
+; RV64-NEXT:    slli a2, a2, 2
+; RV64-NEXT:    add a2, a2, a3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 128
+; RV64-NEXT:    vl4r.v v24, (a2) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v8, v8, v24
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 2
+; RV64-NEXT:    mv a3, a2
+; RV64-NEXT:    slli a2, a2, 2
+; RV64-NEXT:    add a3, a3, a2
+; RV64-NEXT:    slli a2, a2, 2
+; RV64-NEXT:    add a2, a2, a3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 128
+; RV64-NEXT:    vl4r.v v24, (a2) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v8, v8, v24
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 4
+; RV64-NEXT:    mv a3, a2
+; RV64-NEXT:    slli a2, a2, 2
+; RV64-NEXT:    add a2, a2, a3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 128
+; RV64-NEXT:    vl4r.v v24, (a2) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v8, v24
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 2
+; RV64-NEXT:    mv a3, a2
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    add a3, a3, a2
+; RV64-NEXT:    slli a2, a2, 3
+; RV64-NEXT:    add a2, a2, a3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 128
+; RV64-NEXT:    vl4r.v v24, (a2) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v24
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 3
+; RV64-NEXT:    mv a3, a2
+; RV64-NEXT:    slli a2, a2, 3
+; RV64-NEXT:    add a2, a2, a3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 128
+; RV64-NEXT:    vl4r.v v24, (a2) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v24
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 2
+; RV64-NEXT:    mv a3, a2
+; RV64-NEXT:    slli a2, a2, 4
+; RV64-NEXT:    add a2, a2, a3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 128
+; RV64-NEXT:    vl4r.v v24, (a2) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v24
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 6
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 128
+; RV64-NEXT:    vl4r.v v24, (a2) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v24
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 2
+; RV64-NEXT:    mv a3, a2
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    add a3, a3, a2
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    add a3, a3, a2
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    add a2, a2, a3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 128
+; RV64-NEXT:    vl4r.v v24, (a2) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v24
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 3
+; RV64-NEXT:    mv a3, a2
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    add a3, a3, a2
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    add a2, a2, a3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 128
+; RV64-NEXT:    vl4r.v v24, (a2) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v24
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 4
+; RV64-NEXT:    mv a3, a2
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    add a2, a2, a3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 128
+; RV64-NEXT:    vl4r.v v24, (a2) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v24
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 3
+; RV64-NEXT:    mv a3, a2
+; RV64-NEXT:    slli a2, a2, 2
+; RV64-NEXT:    add a2, a2, a3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 128
+; RV64-NEXT:    vl4r.v v24, (a2) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v24, v0, v24
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 2
+; RV64-NEXT:    mv a3, a2
+; RV64-NEXT:    slli a2, a2, 2
+; RV64-NEXT:    add a2, a2, a3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 128
+; RV64-NEXT:    vl4r.v v28, (a2) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v24, v24, v28
+; RV64-NEXT:    vxor.vv v24, v24, v4
+; RV64-NEXT:    vsrl.vi v4, v8, 8
+; RV64-NEXT:    vand.vx v4, v4, t2
+; RV64-NEXT:    vsrl.vi v0, v0, 24
+; RV64-NEXT:    lui a2, 4080
+; RV64-NEXT:    vand.vx v0, v0, a2
+; RV64-NEXT:    vor.vv v4, v4, v0
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    slli a3, a3, 2
+; RV64-NEXT:    mv a4, a3
+; RV64-NEXT:    slli a3, a3, 2
+; RV64-NEXT:    add a4, a4, a3
+; RV64-NEXT:    slli a3, a3, 1
+; RV64-NEXT:    add a3, a3, a4
+; RV64-NEXT:    add a3, sp, a3
+; RV64-NEXT:    addi a3, a3, 128
+; RV64-NEXT:    vl4r.v v0, (a3) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v24, v24, v0
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    slli a3, a3, 2
+; RV64-NEXT:    mv a4, a3
+; RV64-NEXT:    slli a3, a3, 1
+; RV64-NEXT:    add a4, a4, a3
+; RV64-NEXT:    slli a3, a3, 2
+; RV64-NEXT:    add a3, a3, a4
+; RV64-NEXT:    add a3, sp, a3
+; RV64-NEXT:    addi a3, a3, 128
+; RV64-NEXT:    vl4r.v v0, (a3) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v24, v24, v0
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    slli a3, a3, 2
+; RV64-NEXT:    mv a4, a3
+; RV64-NEXT:    slli a3, a3, 3
+; RV64-NEXT:    add a3, a3, a4
+; RV64-NEXT:    add a3, sp, a3
+; RV64-NEXT:    addi a3, a3, 128
+; RV64-NEXT:    vl4r.v v0, (a3) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v24, v24, v0
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    slli a3, a3, 4
+; RV64-NEXT:    add a3, sp, a3
+; RV64-NEXT:    addi a3, a3, 128
+; RV64-NEXT:    vl4r.v v28, (a3) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v24, v24, v28
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    slli a3, a3, 2
+; RV64-NEXT:    add a3, sp, a3
+; RV64-NEXT:    addi a3, a3, 128
+; RV64-NEXT:    vl4r.v v28, (a3) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v24, v24, v28
+; RV64-NEXT:    vxor.vv v20, v24, v20
+; RV64-NEXT:    vxor.vv v16, v20, v16
+; RV64-NEXT:    vand.vx v8, v8, a2
+; RV64-NEXT:    vsll.vi v8, v8, 24
+; RV64-NEXT:    vand.vx v20, v24, t2
+; RV64-NEXT:    vsll.vi v20, v20, 8
+; RV64-NEXT:    vor.vv v8, v8, v20
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 5
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 128
+; RV64-NEXT:    vl4r.v v20, (a2) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vor.vv v8, v12, v8
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 2
+; RV64-NEXT:    mv a3, a2
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    add a3, a3, a2
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    add a2, a2, a3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 128
+; RV64-NEXT:    vl4r.v v12, (a2) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v12, v16, v12
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 2
+; RV64-NEXT:    mv a3, a2
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    add a2, a2, a3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 128
+; RV64-NEXT:    vl4r.v v16, (a2) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v12, v12, v16
+; RV64-NEXT:    vsrl.vx v16, v24, a0
+; RV64-NEXT:    vand.vx v16, v16, t6
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    mv a2, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a2
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl4r.v v20, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v12, v12, v20
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl4r.v v20, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v12, v12, v20
+; RV64-NEXT:    addi a0, sp, 128
+; RV64-NEXT:    vl4r.v v20, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v12, v12, v20
+; RV64-NEXT:    vsrl.vx v12, v12, a1
+; RV64-NEXT:    vor.vv v12, v16, v12
+; RV64-NEXT:    vor.vv v12, v4, v12
+; RV64-NEXT:    vor.vv v8, v8, v12
+; RV64-NEXT:    vsrl.vi v12, v8, 4
+; RV64-NEXT:    vand.vx v8, v8, a7
+; RV64-NEXT:    vand.vx v12, v12, a7
+; RV64-NEXT:    vsll.vi v8, v8, 4
+; RV64-NEXT:    vor.vv v8, v12, v8
+; RV64-NEXT:    vsrl.vi v12, v8, 2
+; RV64-NEXT:    vand.vx v8, v8, a6
+; RV64-NEXT:    vand.vx v12, v12, a6
+; RV64-NEXT:    vsll.vi v8, v8, 2
+; RV64-NEXT:    vor.vv v8, v12, v8
+; RV64-NEXT:    vsrl.vi v12, v8, 1
+; RV64-NEXT:    vand.vx v8, v8, a5
+; RV64-NEXT:    vand.vx v12, v12, a5
+; RV64-NEXT:    vadd.vv v8, v8, v8
+; RV64-NEXT:    vor.vv v8, v12, v8
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add sp, sp, a0
+; RV64-NEXT:    ld ra, 232(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s0, 224(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s1, 216(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s2, 208(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s3, 200(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s4, 192(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s5, 184(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s6, 176(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s7, 168(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s8, 160(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s9, 152(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s10, 144(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s11, 136(sp) # 8-byte Folded Reload
+; RV64-NEXT:    addi sp, sp, 240
+; RV64-NEXT:    ret
+  %a = call <vscale x 4 x i64> @llvm.clmulr.nxv4i64(<vscale x 4 x i64> %x, <vscale x 4 x i64> %y)
+  ret <vscale x 4 x i64> %a
+}
+
+define <vscale x 8 x i64> @clmulr_nxv8i64(<vscale x 8 x i64> %x, <vscale x 8 x i64> %y) nounwind {
+; RV32-LABEL: clmulr_nxv8i64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -352
+; RV32-NEXT:    sw ra, 348(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s0, 344(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s1, 340(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s2, 336(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s3, 332(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s4, 328(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s5, 324(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s6, 320(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s7, 316(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s8, 312(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s9, 308(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s10, 304(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s11, 300(sp) # 4-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    sub sp, sp, a0
+; RV32-NEXT:    lui s11, 1044480
+; RV32-NEXT:    lui s0, 524288
+; RV32-NEXT:    li a0, 1
+; RV32-NEXT:    li ra, 2
+; RV32-NEXT:    li t4, 4
+; RV32-NEXT:    li t2, 8
+; RV32-NEXT:    li t6, 16
+; RV32-NEXT:    li t5, 32
+; RV32-NEXT:    li t3, 64
+; RV32-NEXT:    li t1, 128
+; RV32-NEXT:    li t0, 256
+; RV32-NEXT:    li a7, 512
+; RV32-NEXT:    li a6, 1024
+; RV32-NEXT:    lui a4, 1
+; RV32-NEXT:    lui a3, 2
+; RV32-NEXT:    lui a2, 4
+; RV32-NEXT:    lui a5, 8
+; RV32-NEXT:    lui s1, 16
+; RV32-NEXT:    lui a1, 32
+; RV32-NEXT:    lui s2, 64
+; RV32-NEXT:    lui s3, 128
+; RV32-NEXT:    lui s4, 256
+; RV32-NEXT:    lui s5, 512
+; RV32-NEXT:    lui s6, 1024
+; RV32-NEXT:    lui s7, 2048
+; RV32-NEXT:    lui s8, 4096
+; RV32-NEXT:    lui s9, 8192
+; RV32-NEXT:    lui s10, 16384
+; RV32-NEXT:    sw s11, 272(sp)
+; RV32-NEXT:    lui s11, 32768
+; RV32-NEXT:    sw zero, 276(sp)
+; RV32-NEXT:    sw s0, 264(sp)
+; RV32-NEXT:    sw zero, 268(sp)
+; RV32-NEXT:    sw zero, 256(sp)
+; RV32-NEXT:    sw a0, 260(sp)
+; RV32-NEXT:    sw zero, 248(sp)
+; RV32-NEXT:    sw ra, 252(sp)
+; RV32-NEXT:    lui ra, 65536
+; RV32-NEXT:    sw zero, 240(sp)
+; RV32-NEXT:    sw t4, 244(sp)
+; RV32-NEXT:    lui t4, 131072
+; RV32-NEXT:    sw zero, 232(sp)
+; RV32-NEXT:    sw t2, 236(sp)
+; RV32-NEXT:    lui t2, 262144
+; RV32-NEXT:    sw zero, 224(sp)
+; RV32-NEXT:    sw t6, 228(sp)
+; RV32-NEXT:    sw zero, 216(sp)
+; RV32-NEXT:    sw t5, 220(sp)
+; RV32-NEXT:    sw zero, 208(sp)
+; RV32-NEXT:    sw t3, 212(sp)
+; RV32-NEXT:    sw zero, 200(sp)
+; RV32-NEXT:    sw t1, 204(sp)
+; RV32-NEXT:    sw zero, 192(sp)
+; RV32-NEXT:    sw t0, 196(sp)
+; RV32-NEXT:    sw zero, 184(sp)
+; RV32-NEXT:    sw a7, 188(sp)
+; RV32-NEXT:    sw zero, 176(sp)
+; RV32-NEXT:    sw a6, 180(sp)
+; RV32-NEXT:    li t1, 1024
+; RV32-NEXT:    slli t6, a0, 11
+; RV32-NEXT:    sw zero, 168(sp)
+; RV32-NEXT:    sw t6, 172(sp)
+; RV32-NEXT:    sw zero, 160(sp)
+; RV32-NEXT:    sw a4, 164(sp)
+; RV32-NEXT:    sw zero, 152(sp)
+; RV32-NEXT:    sw a3, 156(sp)
+; RV32-NEXT:    lui t3, 2
+; RV32-NEXT:    sw zero, 144(sp)
+; RV32-NEXT:    sw a2, 148(sp)
+; RV32-NEXT:    lui t5, 4
+; RV32-NEXT:    sw zero, 136(sp)
+; RV32-NEXT:    sw a5, 140(sp)
+; RV32-NEXT:    lui a4, 8
+; RV32-NEXT:    sw zero, 128(sp)
+; RV32-NEXT:    sw s1, 132(sp)
+; RV32-NEXT:    sw zero, 120(sp)
+; RV32-NEXT:    sw a1, 124(sp)
+; RV32-NEXT:    sw zero, 112(sp)
+; RV32-NEXT:    sw s2, 116(sp)
+; RV32-NEXT:    sw zero, 104(sp)
+; RV32-NEXT:    sw s3, 108(sp)
+; RV32-NEXT:    sw zero, 96(sp)
+; RV32-NEXT:    sw s4, 100(sp)
+; RV32-NEXT:    sw zero, 88(sp)
+; RV32-NEXT:    sw s5, 92(sp)
+; RV32-NEXT:    sw zero, 80(sp)
+; RV32-NEXT:    sw s6, 84(sp)
+; RV32-NEXT:    sw zero, 72(sp)
+; RV32-NEXT:    sw s7, 76(sp)
+; RV32-NEXT:    sw zero, 64(sp)
+; RV32-NEXT:    sw s8, 68(sp)
+; RV32-NEXT:    sw zero, 56(sp)
+; RV32-NEXT:    sw s9, 60(sp)
+; RV32-NEXT:    sw zero, 48(sp)
+; RV32-NEXT:    sw s10, 52(sp)
+; RV32-NEXT:    sw zero, 40(sp)
+; RV32-NEXT:    sw s11, 44(sp)
+; RV32-NEXT:    sw zero, 32(sp)
+; RV32-NEXT:    sw ra, 36(sp)
+; RV32-NEXT:    sw zero, 24(sp)
+; RV32-NEXT:    sw t4, 28(sp)
+; RV32-NEXT:    sw zero, 16(sp)
+; RV32-NEXT:    sw t2, 20(sp)
+; RV32-NEXT:    sw zero, 8(sp)
+; RV32-NEXT:    sw s0, 12(sp)
+; RV32-NEXT:    li a6, 56
+; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
+; RV32-NEXT:    vsrl.vx v16, v8, a6
+; RV32-NEXT:    li a5, 40
+; RV32-NEXT:    vsrl.vx v24, v8, a5
+; RV32-NEXT:    vsll.vx v0, v8, a6
+; RV32-NEXT:    addi a2, s1, -256
+; RV32-NEXT:    vand.vx v24, v24, a2
+; RV32-NEXT:    vor.vv v16, v24, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 6
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vand.vx v24, v8, a2
+; RV32-NEXT:    vsll.vx v24, v24, a5
+; RV32-NEXT:    vor.vv v16, v0, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    addi a3, sp, 272
+; RV32-NEXT:    vlse64.v v24, (a3), zero
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    lui a3, 4080
+; RV32-NEXT:    vsrl.vi v0, v8, 24
+; RV32-NEXT:    vand.vx v16, v0, a3
+; RV32-NEXT:    vsrl.vi v24, v8, 8
+; RV32-NEXT:    vmv8r.v v0, v8
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vand.vv v24, v24, v8
+; RV32-NEXT:    vor.vv v24, v24, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 6
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vor.vv v16, v24, v8
+; RV32-NEXT:    vand.vx v24, v0, a3
+; RV32-NEXT:    vsll.vi v24, v24, 24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vand.vv v0, v0, v8
+; RV32-NEXT:    vsll.vi v0, v0, 8
+; RV32-NEXT:    vor.vv v24, v24, v0
+; RV32-NEXT:    lui a7, 61681
+; RV32-NEXT:    addi a7, a7, -241
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vor.vv v8, v8, v24
+; RV32-NEXT:    vsetvli t0, zero, e32, m8, ta, ma
+; RV32-NEXT:    vmv.v.x v24, a7
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    lui a7, 209715
+; RV32-NEXT:    addi a7, a7, 819
+; RV32-NEXT:    vsetvli t0, zero, e64, m8, ta, ma
+; RV32-NEXT:    vor.vv v8, v8, v16
+; RV32-NEXT:    vsrl.vi v16, v8, 4
+; RV32-NEXT:    vand.vv v8, v8, v24
+; RV32-NEXT:    vand.vv v16, v16, v24
+; RV32-NEXT:    vsll.vi v8, v8, 4
+; RV32-NEXT:    vor.vv v8, v16, v8
+; RV32-NEXT:    vsetvli t0, zero, e32, m8, ta, ma
+; RV32-NEXT:    vmv.v.x v24, a7
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 6
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vsetvli a7, zero, e64, m8, ta, ma
+; RV32-NEXT:    vsrl.vi v16, v8, 2
+; RV32-NEXT:    vand.vv v8, v8, v24
+; RV32-NEXT:    vand.vv v16, v16, v24
+; RV32-NEXT:    vsll.vi v8, v8, 2
+; RV32-NEXT:    vor.vv v16, v16, v8
+; RV32-NEXT:    lui a7, 349525
+; RV32-NEXT:    addi a7, a7, 1365
+; RV32-NEXT:    vsetvli t0, zero, e32, m8, ta, ma
+; RV32-NEXT:    vmv.v.x v24, a7
+; RV32-NEXT:    vsetvli a7, zero, e64, m8, ta, ma
+; RV32-NEXT:    vsrl.vi v8, v16, 1
+; RV32-NEXT:    vand.vv v16, v16, v24
+; RV32-NEXT:    vmv8r.v v0, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 9
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    addi a7, sp, 264
+; RV32-NEXT:    vlse64.v v24, (a7), zero
+; RV32-NEXT:    vand.vv v8, v8, v0
+; RV32-NEXT:    vadd.vv v16, v16, v16
+; RV32-NEXT:    vor.vv v8, v8, v16
+; RV32-NEXT:    vand.vv v16, v8, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    addi a7, sp, 256
+; RV32-NEXT:    addi t0, sp, 248
+; RV32-NEXT:    addi a1, sp, 240
+; RV32-NEXT:    addi a0, sp, 232
+; RV32-NEXT:    vlse64.v v16, (a7), zero
+; RV32-NEXT:    csrr a7, vlenb
+; RV32-NEXT:    slli a7, a7, 4
+; RV32-NEXT:    mv s0, a7
+; RV32-NEXT:    slli a7, a7, 1
+; RV32-NEXT:    add s0, s0, a7
+; RV32-NEXT:    slli a7, a7, 1
+; RV32-NEXT:    add s0, s0, a7
+; RV32-NEXT:    slli a7, a7, 1
+; RV32-NEXT:    add s0, s0, a7
+; RV32-NEXT:    slli a7, a7, 1
+; RV32-NEXT:    add a7, a7, s0
+; RV32-NEXT:    add a7, sp, a7
+; RV32-NEXT:    addi a7, a7, 288
+; RV32-NEXT:    vs8r.v v16, (a7) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vlse64.v v24, (t0), zero
+; RV32-NEXT:    vlse64.v v0, (a1), zero
+; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vand.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vand.vv v16, v8, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vand.vv v16, v8, v0
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vand.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    addi a0, sp, 224
+; RV32-NEXT:    addi a1, sp, 216
+; RV32-NEXT:    addi a7, sp, 208
+; RV32-NEXT:    addi t0, sp, 200
+; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv s0, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add s0, s0, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add s0, s0, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, s0
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vlse64.v v24, (a1), zero
+; RV32-NEXT:    vlse64.v v0, (a7), zero
+; RV32-NEXT:    vlse64.v v16, (t0), zero
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vand.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vand.vv v16, v8, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vand.vv v16, v8, v0
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 6
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vand.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    addi a0, sp, 192
+; RV32-NEXT:    addi a1, sp, 184
+; RV32-NEXT:    addi a7, sp, 176
+; RV32-NEXT:    addi t0, sp, 168
+; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv s0, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add s0, s0, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add s0, s0, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, s0
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vlse64.v v24, (a1), zero
+; RV32-NEXT:    vlse64.v v0, (a7), zero
+; RV32-NEXT:    vlse64.v v16, (t0), zero
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vand.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vand.vv v16, v8, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vand.vv v16, v8, v0
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vand.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    addi a0, sp, 160
+; RV32-NEXT:    addi a1, sp, 152
+; RV32-NEXT:    addi a7, sp, 144
+; RV32-NEXT:    addi t0, sp, 136
+; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv s0, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add s0, s0, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, s0
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vlse64.v v24, (a1), zero
+; RV32-NEXT:    vlse64.v v0, (a7), zero
+; RV32-NEXT:    vlse64.v v16, (t0), zero
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vand.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vand.vv v16, v8, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vand.vv v16, v8, v0
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 7
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vand.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    addi a0, sp, 128
+; RV32-NEXT:    addi a1, sp, 120
+; RV32-NEXT:    addi a7, sp, 112
+; RV32-NEXT:    addi t0, sp, 104
+; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv s0, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add s0, s0, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add s0, s0, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, s0
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vlse64.v v24, (a1), zero
+; RV32-NEXT:    vlse64.v v0, (a7), zero
+; RV32-NEXT:    vlse64.v v16, (t0), zero
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vand.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vand.vv v16, v8, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vand.vv v16, v8, v0
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vand.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    addi a0, sp, 96
+; RV32-NEXT:    addi a1, sp, 88
+; RV32-NEXT:    addi a7, sp, 80
+; RV32-NEXT:    addi t0, sp, 72
+; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv s0, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add s0, s0, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, s0
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vlse64.v v24, (a1), zero
+; RV32-NEXT:    vlse64.v v0, (a7), zero
+; RV32-NEXT:    vlse64.v v16, (t0), zero
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vand.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vand.vv v16, v8, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vand.vv v16, v8, v0
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 6
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vand.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    addi a0, sp, 64
+; RV32-NEXT:    addi a1, sp, 56
+; RV32-NEXT:    addi a7, sp, 48
+; RV32-NEXT:    addi t0, sp, 40
+; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv s0, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add s0, s0, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, s0
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vlse64.v v24, (a1), zero
+; RV32-NEXT:    vlse64.v v0, (a7), zero
+; RV32-NEXT:    vlse64.v v16, (t0), zero
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vand.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vand.vv v16, v8, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vand.vv v16, v8, v0
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vand.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    addi a0, sp, 32
+; RV32-NEXT:    addi a1, sp, 24
+; RV32-NEXT:    addi a7, sp, 16
+; RV32-NEXT:    addi t0, sp, 8
+; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv s0, a0
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    add a0, a0, s0
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vlse64.v v24, (a1), zero
+; RV32-NEXT:    vlse64.v v0, (a7), zero
+; RV32-NEXT:    vlse64.v v16, (t0), zero
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vand.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vand.vv v16, v8, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 8
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vand.vv v16, v8, v0
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vand.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vand.vi v16, v8, 2
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vand.vi v16, v8, 1
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vand.vi v16, v8, 4
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vand.vi v16, v8, 8
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    li a0, 16
+; RV32-NEXT:    vand.vx v16, v8, a0
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    li a0, 32
+; RV32-NEXT:    vand.vx v16, v8, a0
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    li a0, 64
+; RV32-NEXT:    vand.vx v16, v8, a0
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 6
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    li a0, 128
+; RV32-NEXT:    vand.vx v16, v8, a0
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    li a0, 256
+; RV32-NEXT:    vand.vx v16, v8, a0
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    li a0, 512
+; RV32-NEXT:    vand.vx v16, v8, a0
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vand.vx v16, v8, t1
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vand.vx v16, v8, t6
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    lui a0, 1
+; RV32-NEXT:    vand.vx v16, v8, a0
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vand.vx v16, v8, t3
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vand.vx v16, v8, t5
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 7
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vand.vx v16, v8, a4
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vand.vx v16, v8, s1
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    lui a0, 32
+; RV32-NEXT:    vand.vx v16, v8, a0
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vand.vx v16, v8, s2
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vand.vx v16, v8, s3
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vand.vx v16, v8, s4
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vand.vx v16, v8, s5
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vand.vx v16, v8, s6
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 6
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vand.vx v16, v8, s7
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vand.vx v16, v8, s8
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vand.vx v16, v8, s9
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vand.vx v16, v8, s10
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vand.vx v16, v8, s11
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vand.vx v16, v8, ra
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vand.vx v16, v8, t4
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vand.vx v16, v8, t2
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    addi a0, sp, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vmul.vv v24, v8, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vmul.vv v0, v8, v0
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 6
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 6
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 7
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 7
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 6
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 6
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 8
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vmul.vv v8, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 8
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vxor.vi v8, v8, 0
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 6
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 7
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 6
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v16
+; RV32-NEXT:    addi a0, sp, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v16
+; RV32-NEXT:    vxor.vv v8, v8, v24
+; RV32-NEXT:    vxor.vv v8, v8, v0
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 6
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 7
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 6
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 8
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v16
+; RV32-NEXT:    vsrl.vx v16, v8, a5
+; RV32-NEXT:    vand.vx v16, v16, a2
+; RV32-NEXT:    vsrl.vx v24, v8, a6
+; RV32-NEXT:    vor.vv v16, v16, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vsrl.vi v24, v8, 24
+; RV32-NEXT:    vand.vx v24, v24, a3
+; RV32-NEXT:    vsrl.vi v0, v8, 8
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vand.vv v0, v0, v16
+; RV32-NEXT:    vor.vv v24, v0, v24
+; RV32-NEXT:    vand.vv v0, v8, v16
+; RV32-NEXT:    vsll.vi v0, v0, 8
+; RV32-NEXT:    vand.vx v16, v8, a3
+; RV32-NEXT:    vsll.vi v16, v16, 24
+; RV32-NEXT:    vor.vv v16, v16, v0
+; RV32-NEXT:    vsll.vx v0, v8, a6
+; RV32-NEXT:    vand.vx v8, v8, a2
+; RV32-NEXT:    vsll.vx v8, v8, a5
+; RV32-NEXT:    vor.vv v8, v0, v8
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vor.vv v24, v24, v0
+; RV32-NEXT:    vor.vv v8, v8, v16
+; RV32-NEXT:    vor.vv v8, v8, v24
+; RV32-NEXT:    vsrl.vi v16, v8, 4
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vand.vv v8, v8, v24
+; RV32-NEXT:    vand.vv v16, v16, v24
+; RV32-NEXT:    vsll.vi v8, v8, 4
+; RV32-NEXT:    vor.vv v8, v16, v8
+; RV32-NEXT:    vsrl.vi v16, v8, 2
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 6
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vand.vv v8, v8, v24
+; RV32-NEXT:    vand.vv v16, v16, v24
+; RV32-NEXT:    vsll.vi v8, v8, 2
+; RV32-NEXT:    vor.vv v8, v16, v8
+; RV32-NEXT:    vsrl.vi v16, v8, 1
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 9
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vand.vv v8, v8, v24
+; RV32-NEXT:    vand.vv v16, v16, v24
+; RV32-NEXT:    vadd.vv v8, v8, v8
+; RV32-NEXT:    vor.vv v8, v16, v8
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add sp, sp, a0
+; RV32-NEXT:    lw ra, 348(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s0, 344(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s1, 340(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s2, 336(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s3, 332(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s4, 328(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s5, 324(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s6, 320(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s7, 316(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s8, 312(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s9, 308(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s10, 304(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s11, 300(sp) # 4-byte Folded Reload
+; RV32-NEXT:    addi sp, sp, 352
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: clmulr_nxv8i64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    addi sp, sp, -240
+; RV64-NEXT:    sd ra, 232(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s0, 224(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s1, 216(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s2, 208(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s3, 200(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s4, 192(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s5, 184(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s6, 176(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s7, 168(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s8, 160(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s9, 152(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s10, 144(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s11, 136(sp) # 8-byte Folded Spill
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    sub sp, sp, a0
+; RV64-NEXT:    li a1, 56
+; RV64-NEXT:    li a2, 40
+; RV64-NEXT:    lui a3, 16
+; RV64-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
+; RV64-NEXT:    vsrl.vi v24, v8, 24
+; RV64-NEXT:    vsrl.vx v16, v8, a1
+; RV64-NEXT:    li a5, 56
+; RV64-NEXT:    vsrl.vx v0, v8, a2
+; RV64-NEXT:    li s5, 40
+; RV64-NEXT:    addi s4, a3, -256
+; RV64-NEXT:    vand.vx v0, v0, s4
+; RV64-NEXT:    vor.vv v16, v0, v16
+; RV64-NEXT:    vsrl.vi v0, v8, 8
+; RV64-NEXT:    li a4, 255
+; RV64-NEXT:    lui a1, 61681
+; RV64-NEXT:    lui a2, 209715
+; RV64-NEXT:    lui a3, 349525
+; RV64-NEXT:    li a0, 1
+; RV64-NEXT:    lui a6, 4080
+; RV64-NEXT:    vand.vx v24, v24, a6
+; RV64-NEXT:    slli a4, a4, 24
+; RV64-NEXT:    vand.vx v0, v0, a4
+; RV64-NEXT:    vor.vv v24, v0, v24
+; RV64-NEXT:    vand.vx v0, v8, a6
+; RV64-NEXT:    vsll.vi v0, v0, 24
+; RV64-NEXT:    vor.vv v16, v24, v16
+; RV64-NEXT:    vand.vx v24, v8, a4
+; RV64-NEXT:    vsll.vi v24, v24, 8
+; RV64-NEXT:    vor.vv v24, v0, v24
+; RV64-NEXT:    vsll.vx v0, v8, a5
+; RV64-NEXT:    addi a7, a1, -241
+; RV64-NEXT:    addi a6, a2, 819
+; RV64-NEXT:    addi a5, a3, 1365
+; RV64-NEXT:    slli a1, a0, 11
+; RV64-NEXT:    sd a1, 112(sp) # 8-byte Folded Spill
+; RV64-NEXT:    slli a1, a0, 31
+; RV64-NEXT:    sd a1, 104(sp) # 8-byte Folded Spill
+; RV64-NEXT:    slli a1, a0, 32
+; RV64-NEXT:    sd a1, 96(sp) # 8-byte Folded Spill
+; RV64-NEXT:    slli a1, a0, 33
+; RV64-NEXT:    sd a1, 88(sp) # 8-byte Folded Spill
+; RV64-NEXT:    slli a1, a0, 34
+; RV64-NEXT:    sd a1, 80(sp) # 8-byte Folded Spill
+; RV64-NEXT:    slli a1, a0, 35
+; RV64-NEXT:    sd a1, 72(sp) # 8-byte Folded Spill
+; RV64-NEXT:    slli a1, a0, 36
+; RV64-NEXT:    sd a1, 64(sp) # 8-byte Folded Spill
+; RV64-NEXT:    slli a1, a0, 37
+; RV64-NEXT:    sd a1, 56(sp) # 8-byte Folded Spill
+; RV64-NEXT:    slli a1, a0, 38
+; RV64-NEXT:    sd a1, 48(sp) # 8-byte Folded Spill
+; RV64-NEXT:    slli a1, a0, 39
+; RV64-NEXT:    sd a1, 40(sp) # 8-byte Folded Spill
+; RV64-NEXT:    slli a1, a0, 40
+; RV64-NEXT:    sd a1, 32(sp) # 8-byte Folded Spill
+; RV64-NEXT:    slli a1, a0, 41
+; RV64-NEXT:    sd a1, 24(sp) # 8-byte Folded Spill
+; RV64-NEXT:    slli s6, a0, 42
+; RV64-NEXT:    slli s7, a0, 43
+; RV64-NEXT:    slli s8, a0, 44
+; RV64-NEXT:    slli s9, a0, 45
+; RV64-NEXT:    slli s10, a0, 46
+; RV64-NEXT:    slli a1, a7, 32
+; RV64-NEXT:    add a7, a7, a1
+; RV64-NEXT:    slli a1, a6, 32
+; RV64-NEXT:    add a6, a6, a1
+; RV64-NEXT:    slli a1, a5, 32
+; RV64-NEXT:    add a5, a5, a1
+; RV64-NEXT:    slli s11, a0, 47
+; RV64-NEXT:    slli ra, a0, 48
+; RV64-NEXT:    slli s3, a0, 49
+; RV64-NEXT:    slli s2, a0, 50
+; RV64-NEXT:    slli s1, a0, 51
+; RV64-NEXT:    slli s0, a0, 52
+; RV64-NEXT:    slli t6, a0, 53
+; RV64-NEXT:    slli t5, a0, 54
+; RV64-NEXT:    slli t4, a0, 55
+; RV64-NEXT:    slli t3, a0, 56
+; RV64-NEXT:    slli t2, a0, 57
+; RV64-NEXT:    slli t1, a0, 58
+; RV64-NEXT:    slli t0, a0, 59
+; RV64-NEXT:    slli a3, a0, 60
+; RV64-NEXT:    slli a2, a0, 61
+; RV64-NEXT:    slli a1, a0, 62
+; RV64-NEXT:    li a0, -1
+; RV64-NEXT:    slli a0, a0, 63
+; RV64-NEXT:    vand.vx v8, v8, s4
+; RV64-NEXT:    vsll.vx v8, v8, s5
+; RV64-NEXT:    vor.vv v8, v0, v8
+; RV64-NEXT:    vor.vv v8, v8, v24
+; RV64-NEXT:    vor.vv v8, v8, v16
+; RV64-NEXT:    vsrl.vi v16, v8, 4
+; RV64-NEXT:    vand.vx v8, v8, a7
+; RV64-NEXT:    vand.vx v16, v16, a7
+; RV64-NEXT:    vsll.vi v8, v8, 4
+; RV64-NEXT:    vor.vv v8, v16, v8
+; RV64-NEXT:    vsrl.vi v16, v8, 2
+; RV64-NEXT:    vand.vx v8, v8, a6
+; RV64-NEXT:    vand.vx v16, v16, a6
+; RV64-NEXT:    vsll.vi v8, v8, 2
+; RV64-NEXT:    vor.vv v8, v16, v8
+; RV64-NEXT:    vsrl.vi v16, v8, 1
+; RV64-NEXT:    vand.vx v8, v8, a5
+; RV64-NEXT:    vand.vx v16, v16, a5
+; RV64-NEXT:    vadd.vv v8, v8, v8
+; RV64-NEXT:    vor.vv v8, v16, v8
+; RV64-NEXT:    vand.vi v16, v8, 2
+; RV64-NEXT:    vand.vi v24, v8, 1
+; RV64-NEXT:    vand.vi v0, v8, 4
+; RV64-NEXT:    vmul.vv v16, v8, v16
+; RV64-NEXT:    sd a1, 8(sp) # 8-byte Folded Spill
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 3
+; RV64-NEXT:    mv s5, a1
+; RV64-NEXT:    slli a1, a1, 2
+; RV64-NEXT:    add s5, s5, a1
+; RV64-NEXT:    slli a1, a1, 1
+; RV64-NEXT:    add s5, s5, a1
+; RV64-NEXT:    slli a1, a1, 1
+; RV64-NEXT:    add s5, s5, a1
+; RV64-NEXT:    slli a1, a1, 1
+; RV64-NEXT:    add a1, a1, s5
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 128
+; RV64-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    vmul.vv v24, v8, v24
+; RV64-NEXT:    vmul.vv v0, v8, v0
+; RV64-NEXT:    vand.vi v16, v8, 8
+; RV64-NEXT:    vmul.vv v16, v8, v16
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 5
+; RV64-NEXT:    mv s5, a1
+; RV64-NEXT:    slli a1, a1, 1
+; RV64-NEXT:    add s5, s5, a1
+; RV64-NEXT:    slli a1, a1, 1
+; RV64-NEXT:    add s5, s5, a1
+; RV64-NEXT:    slli a1, a1, 1
+; RV64-NEXT:    add a1, a1, s5
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 128
+; RV64-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    li s5, 16
+; RV64-NEXT:    vand.vx v16, v8, s5
+; RV64-NEXT:    vmul.vv v16, v8, v16
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 3
+; RV64-NEXT:    mv s5, a1
+; RV64-NEXT:    slli a1, a1, 1
+; RV64-NEXT:    add s5, s5, a1
+; RV64-NEXT:    slli a1, a1, 2
+; RV64-NEXT:    add s5, s5, a1
+; RV64-NEXT:    slli a1, a1, 1
+; RV64-NEXT:    add s5, s5, a1
+; RV64-NEXT:    slli a1, a1, 1
+; RV64-NEXT:    add a1, a1, s5
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 128
+; RV64-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    li s5, 32
+; RV64-NEXT:    vand.vx v16, v8, s5
+; RV64-NEXT:    vmul.vv v16, v8, v16
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 4
+; RV64-NEXT:    mv s5, a1
+; RV64-NEXT:    slli a1, a1, 2
+; RV64-NEXT:    add s5, s5, a1
+; RV64-NEXT:    slli a1, a1, 1
+; RV64-NEXT:    add s5, s5, a1
+; RV64-NEXT:    slli a1, a1, 1
+; RV64-NEXT:    add a1, a1, s5
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 128
+; RV64-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    li s5, 64
+; RV64-NEXT:    vand.vx v16, v8, s5
+; RV64-NEXT:    vmul.vv v16, v8, v16
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 3
+; RV64-NEXT:    mv s5, a1
+; RV64-NEXT:    slli a1, a1, 3
+; RV64-NEXT:    add s5, s5, a1
+; RV64-NEXT:    slli a1, a1, 1
+; RV64-NEXT:    add s5, s5, a1
+; RV64-NEXT:    slli a1, a1, 1
+; RV64-NEXT:    add a1, a1, s5
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 128
+; RV64-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    li s5, 128
+; RV64-NEXT:    vand.vx v16, v8, s5
+; RV64-NEXT:    vmul.vv v16, v8, v16
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 6
+; RV64-NEXT:    mv s5, a1
+; RV64-NEXT:    slli a1, a1, 1
+; RV64-NEXT:    add s5, s5, a1
+; RV64-NEXT:    slli a1, a1, 1
+; RV64-NEXT:    add a1, a1, s5
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 128
+; RV64-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    li s5, 256
+; RV64-NEXT:    vand.vx v16, v8, s5
+; RV64-NEXT:    vmul.vv v16, v8, v16
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 3
+; RV64-NEXT:    mv s5, a1
+; RV64-NEXT:    slli a1, a1, 1
+; RV64-NEXT:    add s5, s5, a1
+; RV64-NEXT:    slli a1, a1, 1
+; RV64-NEXT:    add s5, s5, a1
+; RV64-NEXT:    slli a1, a1, 2
+; RV64-NEXT:    add s5, s5, a1
+; RV64-NEXT:    slli a1, a1, 1
+; RV64-NEXT:    add a1, a1, s5
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 128
+; RV64-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    li s5, 512
+; RV64-NEXT:    vand.vx v16, v8, s5
+; RV64-NEXT:    vmul.vv v16, v8, v16
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 4
+; RV64-NEXT:    mv s5, a1
+; RV64-NEXT:    slli a1, a1, 1
+; RV64-NEXT:    add s5, s5, a1
+; RV64-NEXT:    slli a1, a1, 2
+; RV64-NEXT:    add s5, s5, a1
+; RV64-NEXT:    slli a1, a1, 1
+; RV64-NEXT:    add a1, a1, s5
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 128
+; RV64-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    li s5, 1024
+; RV64-NEXT:    vand.vx v16, v8, s5
+; RV64-NEXT:    vmul.vv v16, v8, v16
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 3
+; RV64-NEXT:    mv s5, a1
+; RV64-NEXT:    slli a1, a1, 2
+; RV64-NEXT:    add s5, s5, a1
+; RV64-NEXT:    slli a1, a1, 2
+; RV64-NEXT:    add s5, s5, a1
+; RV64-NEXT:    slli a1, a1, 1
+; RV64-NEXT:    add a1, a1, s5
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 128
+; RV64-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    ld s5, 112(sp) # 8-byte Folded Reload
+; RV64-NEXT:    vand.vx v16, v8, s5
+; RV64-NEXT:    vmul.vv v16, v8, v16
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 5
+; RV64-NEXT:    mv s5, a1
+; RV64-NEXT:    slli a1, a1, 2
+; RV64-NEXT:    add s5, s5, a1
+; RV64-NEXT:    slli a1, a1, 1
+; RV64-NEXT:    add a1, a1, s5
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 128
+; RV64-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    lui s5, 1
+; RV64-NEXT:    vand.vx v16, v8, s5
+; RV64-NEXT:    vmul.vv v16, v8, v16
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 3
+; RV64-NEXT:    mv s5, a1
+; RV64-NEXT:    slli a1, a1, 1
+; RV64-NEXT:    add s5, s5, a1
+; RV64-NEXT:    slli a1, a1, 3
+; RV64-NEXT:    add s5, s5, a1
+; RV64-NEXT:    slli a1, a1, 1
+; RV64-NEXT:    add a1, a1, s5
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 128
+; RV64-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    lui s5, 2
+; RV64-NEXT:    vand.vx v16, v8, s5
+; RV64-NEXT:    vmul.vv v16, v8, v16
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 4
+; RV64-NEXT:    mv s5, a1
+; RV64-NEXT:    slli a1, a1, 3
+; RV64-NEXT:    add s5, s5, a1
+; RV64-NEXT:    slli a1, a1, 1
+; RV64-NEXT:    add a1, a1, s5
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 128
+; RV64-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    lui s5, 4
+; RV64-NEXT:    vand.vx v16, v8, s5
+; RV64-NEXT:    vmul.vv v16, v8, v16
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 3
+; RV64-NEXT:    mv s5, a1
+; RV64-NEXT:    slli a1, a1, 4
+; RV64-NEXT:    add s5, s5, a1
+; RV64-NEXT:    slli a1, a1, 1
+; RV64-NEXT:    add a1, a1, s5
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 128
+; RV64-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    lui s5, 8
+; RV64-NEXT:    vand.vx v16, v8, s5
+; RV64-NEXT:    vmul.vv v16, v8, v16
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 7
+; RV64-NEXT:    mv s5, a1
+; RV64-NEXT:    slli a1, a1, 1
+; RV64-NEXT:    add a1, a1, s5
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 128
+; RV64-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    lui s5, 16
+; RV64-NEXT:    vand.vx v16, v8, s5
+; RV64-NEXT:    vmul.vv v16, v8, v16
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 3
+; RV64-NEXT:    mv s5, a1
+; RV64-NEXT:    slli a1, a1, 1
+; RV64-NEXT:    add s5, s5, a1
+; RV64-NEXT:    slli a1, a1, 1
+; RV64-NEXT:    add s5, s5, a1
+; RV64-NEXT:    slli a1, a1, 1
+; RV64-NEXT:    add s5, s5, a1
+; RV64-NEXT:    slli a1, a1, 2
+; RV64-NEXT:    add a1, a1, s5
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 128
+; RV64-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    lui s5, 32
+; RV64-NEXT:    vand.vx v16, v8, s5
+; RV64-NEXT:    vmul.vv v16, v8, v16
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 4
+; RV64-NEXT:    mv s5, a1
+; RV64-NEXT:    slli a1, a1, 1
+; RV64-NEXT:    add s5, s5, a1
+; RV64-NEXT:    slli a1, a1, 1
+; RV64-NEXT:    add s5, s5, a1
+; RV64-NEXT:    slli a1, a1, 2
+; RV64-NEXT:    add a1, a1, s5
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 128
+; RV64-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    lui s5, 64
+; RV64-NEXT:    vand.vx v16, v8, s5
+; RV64-NEXT:    vmul.vv v16, v8, v16
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 3
+; RV64-NEXT:    mv s5, a1
+; RV64-NEXT:    slli a1, a1, 2
+; RV64-NEXT:    add s5, s5, a1
+; RV64-NEXT:    slli a1, a1, 1
+; RV64-NEXT:    add s5, s5, a1
+; RV64-NEXT:    slli a1, a1, 2
+; RV64-NEXT:    add a1, a1, s5
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 128
+; RV64-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    lui s5, 128
+; RV64-NEXT:    vand.vx v16, v8, s5
+; RV64-NEXT:    vmul.vv v16, v8, v16
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 5
+; RV64-NEXT:    mv s5, a1
+; RV64-NEXT:    slli a1, a1, 1
+; RV64-NEXT:    add s5, s5, a1
+; RV64-NEXT:    slli a1, a1, 2
+; RV64-NEXT:    add a1, a1, s5
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 128
+; RV64-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    lui s5, 256
+; RV64-NEXT:    vand.vx v16, v8, s5
+; RV64-NEXT:    vmul.vv v16, v8, v16
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 3
+; RV64-NEXT:    mv s5, a1
+; RV64-NEXT:    slli a1, a1, 1
+; RV64-NEXT:    add s5, s5, a1
+; RV64-NEXT:    slli a1, a1, 2
+; RV64-NEXT:    add s5, s5, a1
+; RV64-NEXT:    slli a1, a1, 2
+; RV64-NEXT:    add a1, a1, s5
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 128
+; RV64-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    lui s5, 512
+; RV64-NEXT:    vand.vx v16, v8, s5
+; RV64-NEXT:    vmul.vv v16, v8, v16
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 4
+; RV64-NEXT:    mv s5, a1
+; RV64-NEXT:    slli a1, a1, 2
+; RV64-NEXT:    add s5, s5, a1
+; RV64-NEXT:    slli a1, a1, 2
+; RV64-NEXT:    add a1, a1, s5
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 128
+; RV64-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    lui s5, 1024
+; RV64-NEXT:    vand.vx v16, v8, s5
+; RV64-NEXT:    vmul.vv v16, v8, v16
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 3
+; RV64-NEXT:    mv s5, a1
+; RV64-NEXT:    slli a1, a1, 3
+; RV64-NEXT:    add s5, s5, a1
+; RV64-NEXT:    slli a1, a1, 2
+; RV64-NEXT:    add a1, a1, s5
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 128
+; RV64-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    lui s5, 2048
+; RV64-NEXT:    vand.vx v16, v8, s5
+; RV64-NEXT:    vmul.vv v16, v8, v16
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 6
+; RV64-NEXT:    mv s5, a1
+; RV64-NEXT:    slli a1, a1, 2
+; RV64-NEXT:    add a1, a1, s5
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 128
+; RV64-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    lui s5, 4096
+; RV64-NEXT:    vand.vx v16, v8, s5
+; RV64-NEXT:    vmul.vv v16, v8, v16
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 3
+; RV64-NEXT:    mv s5, a1
+; RV64-NEXT:    slli a1, a1, 1
+; RV64-NEXT:    add s5, s5, a1
+; RV64-NEXT:    slli a1, a1, 1
+; RV64-NEXT:    add s5, s5, a1
+; RV64-NEXT:    slli a1, a1, 3
+; RV64-NEXT:    add a1, a1, s5
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 128
+; RV64-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    lui s5, 8192
+; RV64-NEXT:    vand.vx v16, v8, s5
+; RV64-NEXT:    vmul.vv v16, v8, v16
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 3
+; RV64-NEXT:    mv s5, a1
+; RV64-NEXT:    slli a1, a1, 2
+; RV64-NEXT:    add s5, s5, a1
+; RV64-NEXT:    slli a1, a1, 3
+; RV64-NEXT:    add a1, a1, s5
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 128
+; RV64-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    lui s5, 16384
+; RV64-NEXT:    vand.vx v16, v8, s5
+; RV64-NEXT:    vmul.vv v16, v8, v16
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 4
+; RV64-NEXT:    mv s5, a1
+; RV64-NEXT:    slli a1, a1, 1
+; RV64-NEXT:    add s5, s5, a1
+; RV64-NEXT:    slli a1, a1, 3
+; RV64-NEXT:    add a1, a1, s5
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 128
+; RV64-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    lui s5, 32768
+; RV64-NEXT:    vand.vx v16, v8, s5
+; RV64-NEXT:    vmul.vv v16, v8, v16
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 5
+; RV64-NEXT:    mv s5, a1
+; RV64-NEXT:    slli a1, a1, 3
+; RV64-NEXT:    add a1, a1, s5
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 128
+; RV64-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    lui s5, 65536
+; RV64-NEXT:    vand.vx v16, v8, s5
+; RV64-NEXT:    vmul.vv v16, v8, v16
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 3
+; RV64-NEXT:    mv s5, a1
+; RV64-NEXT:    slli a1, a1, 1
+; RV64-NEXT:    add s5, s5, a1
+; RV64-NEXT:    slli a1, a1, 4
+; RV64-NEXT:    add a1, a1, s5
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 128
+; RV64-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    lui s5, 131072
+; RV64-NEXT:    vand.vx v16, v8, s5
+; RV64-NEXT:    vmul.vv v16, v8, v16
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 4
+; RV64-NEXT:    mv s5, a1
+; RV64-NEXT:    slli a1, a1, 4
+; RV64-NEXT:    add a1, a1, s5
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 128
+; RV64-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    lui s5, 262144
+; RV64-NEXT:    vand.vx v16, v8, s5
+; RV64-NEXT:    vmul.vv v16, v8, v16
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 3
+; RV64-NEXT:    mv s5, a1
+; RV64-NEXT:    slli a1, a1, 5
+; RV64-NEXT:    add a1, a1, s5
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 128
+; RV64-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    ld s5, 104(sp) # 8-byte Folded Reload
+; RV64-NEXT:    vand.vx v16, v8, s5
+; RV64-NEXT:    vmul.vv v16, v8, v16
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 8
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 128
+; RV64-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    ld s5, 96(sp) # 8-byte Folded Reload
+; RV64-NEXT:    vand.vx v16, v8, s5
+; RV64-NEXT:    vmul.vv v16, v8, v16
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 3
+; RV64-NEXT:    mv s5, a1
+; RV64-NEXT:    slli a1, a1, 1
+; RV64-NEXT:    add s5, s5, a1
+; RV64-NEXT:    slli a1, a1, 1
+; RV64-NEXT:    add s5, s5, a1
+; RV64-NEXT:    slli a1, a1, 1
+; RV64-NEXT:    add s5, s5, a1
+; RV64-NEXT:    slli a1, a1, 1
+; RV64-NEXT:    add a1, a1, s5
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 128
+; RV64-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    ld s5, 88(sp) # 8-byte Folded Reload
+; RV64-NEXT:    vand.vx v16, v8, s5
+; RV64-NEXT:    vmul.vv v16, v8, v16
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 4
+; RV64-NEXT:    mv s5, a1
+; RV64-NEXT:    slli a1, a1, 1
+; RV64-NEXT:    add s5, s5, a1
+; RV64-NEXT:    slli a1, a1, 1
+; RV64-NEXT:    add s5, s5, a1
+; RV64-NEXT:    slli a1, a1, 1
+; RV64-NEXT:    add a1, a1, s5
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 128
+; RV64-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    ld s5, 80(sp) # 8-byte Folded Reload
+; RV64-NEXT:    vand.vx v16, v8, s5
+; RV64-NEXT:    vmul.vv v16, v8, v16
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 3
+; RV64-NEXT:    mv s5, a1
+; RV64-NEXT:    slli a1, a1, 2
+; RV64-NEXT:    add s5, s5, a1
+; RV64-NEXT:    slli a1, a1, 1
+; RV64-NEXT:    add s5, s5, a1
+; RV64-NEXT:    slli a1, a1, 1
+; RV64-NEXT:    add a1, a1, s5
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 128
+; RV64-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    ld s5, 72(sp) # 8-byte Folded Reload
+; RV64-NEXT:    vand.vx v16, v8, s5
+; RV64-NEXT:    vmul.vv v16, v8, v16
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 5
+; RV64-NEXT:    mv s5, a1
+; RV64-NEXT:    slli a1, a1, 1
+; RV64-NEXT:    add s5, s5, a1
+; RV64-NEXT:    slli a1, a1, 1
+; RV64-NEXT:    add a1, a1, s5
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 128
+; RV64-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    ld s5, 64(sp) # 8-byte Folded Reload
+; RV64-NEXT:    vand.vx v16, v8, s5
+; RV64-NEXT:    vmul.vv v16, v8, v16
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 3
+; RV64-NEXT:    mv s5, a1
+; RV64-NEXT:    slli a1, a1, 1
+; RV64-NEXT:    add s5, s5, a1
+; RV64-NEXT:    slli a1, a1, 2
+; RV64-NEXT:    add s5, s5, a1
+; RV64-NEXT:    slli a1, a1, 1
+; RV64-NEXT:    add a1, a1, s5
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 128
+; RV64-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    ld s5, 56(sp) # 8-byte Folded Reload
+; RV64-NEXT:    vand.vx v16, v8, s5
+; RV64-NEXT:    vmul.vv v16, v8, v16
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 4
+; RV64-NEXT:    mv s5, a1
+; RV64-NEXT:    slli a1, a1, 2
+; RV64-NEXT:    add s5, s5, a1
+; RV64-NEXT:    slli a1, a1, 1
+; RV64-NEXT:    add a1, a1, s5
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 128
+; RV64-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    ld s5, 48(sp) # 8-byte Folded Reload
+; RV64-NEXT:    vand.vx v16, v8, s5
+; RV64-NEXT:    vmul.vv v16, v8, v16
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 3
+; RV64-NEXT:    mv s5, a1
+; RV64-NEXT:    slli a1, a1, 3
+; RV64-NEXT:    add s5, s5, a1
+; RV64-NEXT:    slli a1, a1, 1
+; RV64-NEXT:    add a1, a1, s5
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 128
+; RV64-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    ld s5, 40(sp) # 8-byte Folded Reload
+; RV64-NEXT:    vand.vx v16, v8, s5
+; RV64-NEXT:    vmul.vv v16, v8, v16
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 6
+; RV64-NEXT:    mv s5, a1
+; RV64-NEXT:    slli a1, a1, 1
+; RV64-NEXT:    add a1, a1, s5
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 128
+; RV64-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    ld s5, 32(sp) # 8-byte Folded Reload
+; RV64-NEXT:    vand.vx v16, v8, s5
+; RV64-NEXT:    vmul.vv v16, v8, v16
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 3
+; RV64-NEXT:    mv s5, a1
+; RV64-NEXT:    slli a1, a1, 1
+; RV64-NEXT:    add s5, s5, a1
+; RV64-NEXT:    slli a1, a1, 1
+; RV64-NEXT:    add s5, s5, a1
+; RV64-NEXT:    slli a1, a1, 2
+; RV64-NEXT:    add a1, a1, s5
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 128
+; RV64-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    ld s5, 24(sp) # 8-byte Folded Reload
+; RV64-NEXT:    vand.vx v16, v8, s5
+; RV64-NEXT:    vmul.vv v16, v8, v16
+; RV64-NEXT:    csrr s5, vlenb
+; RV64-NEXT:    slli s5, s5, 4
+; RV64-NEXT:    mv a1, s5
+; RV64-NEXT:    slli s5, s5, 1
+; RV64-NEXT:    add a1, a1, s5
+; RV64-NEXT:    slli s5, s5, 2
+; RV64-NEXT:    add s5, s5, a1
+; RV64-NEXT:    ld a1, 8(sp) # 8-byte Folded Reload
+; RV64-NEXT:    add s5, sp, s5
+; RV64-NEXT:    addi s5, s5, 128
+; RV64-NEXT:    vs8r.v v16, (s5) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    vand.vx v16, v8, s6
+; RV64-NEXT:    vmul.vv v16, v8, v16
+; RV64-NEXT:    csrr s5, vlenb
+; RV64-NEXT:    slli s5, s5, 3
+; RV64-NEXT:    mv s6, s5
+; RV64-NEXT:    slli s5, s5, 2
+; RV64-NEXT:    add s6, s6, s5
+; RV64-NEXT:    slli s5, s5, 2
+; RV64-NEXT:    add s5, s5, s6
+; RV64-NEXT:    add s5, sp, s5
+; RV64-NEXT:    addi s5, s5, 128
+; RV64-NEXT:    vs8r.v v16, (s5) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    vand.vx v16, v8, s7
+; RV64-NEXT:    vmul.vv v16, v8, v16
+; RV64-NEXT:    csrr s5, vlenb
+; RV64-NEXT:    slli s5, s5, 5
+; RV64-NEXT:    mv s6, s5
+; RV64-NEXT:    slli s5, s5, 2
+; RV64-NEXT:    add s5, s5, s6
+; RV64-NEXT:    add s5, sp, s5
+; RV64-NEXT:    addi s5, s5, 128
+; RV64-NEXT:    vs8r.v v16, (s5) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    vand.vx v16, v8, s8
+; RV64-NEXT:    vmul.vv v16, v8, v16
+; RV64-NEXT:    csrr s5, vlenb
+; RV64-NEXT:    slli s5, s5, 3
+; RV64-NEXT:    mv s6, s5
+; RV64-NEXT:    slli s5, s5, 1
+; RV64-NEXT:    add s6, s6, s5
+; RV64-NEXT:    slli s5, s5, 3
+; RV64-NEXT:    add s5, s5, s6
+; RV64-NEXT:    add s5, sp, s5
+; RV64-NEXT:    addi s5, s5, 128
+; RV64-NEXT:    vs8r.v v16, (s5) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    vand.vx v16, v8, s9
+; RV64-NEXT:    vmul.vv v16, v8, v16
+; RV64-NEXT:    csrr s5, vlenb
+; RV64-NEXT:    slli s5, s5, 4
+; RV64-NEXT:    mv s6, s5
+; RV64-NEXT:    slli s5, s5, 3
+; RV64-NEXT:    add s5, s5, s6
+; RV64-NEXT:    add s5, sp, s5
+; RV64-NEXT:    addi s5, s5, 128
+; RV64-NEXT:    vs8r.v v16, (s5) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    vand.vx v16, v8, s10
+; RV64-NEXT:    vmul.vv v16, v8, v16
+; RV64-NEXT:    csrr s5, vlenb
+; RV64-NEXT:    slli s5, s5, 3
+; RV64-NEXT:    mv s6, s5
+; RV64-NEXT:    slli s5, s5, 4
+; RV64-NEXT:    add s5, s5, s6
+; RV64-NEXT:    add s5, sp, s5
+; RV64-NEXT:    addi s5, s5, 128
+; RV64-NEXT:    vs8r.v v16, (s5) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    vand.vx v16, v8, s11
+; RV64-NEXT:    vmul.vv v16, v8, v16
+; RV64-NEXT:    csrr s5, vlenb
+; RV64-NEXT:    slli s5, s5, 3
+; RV64-NEXT:    mv s6, s5
+; RV64-NEXT:    slli s5, s5, 1
+; RV64-NEXT:    add s6, s6, s5
+; RV64-NEXT:    slli s5, s5, 1
+; RV64-NEXT:    add s6, s6, s5
+; RV64-NEXT:    slli s5, s5, 1
+; RV64-NEXT:    add s5, s5, s6
+; RV64-NEXT:    add s5, sp, s5
+; RV64-NEXT:    addi s5, s5, 128
+; RV64-NEXT:    vs8r.v v16, (s5) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    vand.vx v16, v8, ra
+; RV64-NEXT:    vmul.vv v16, v8, v16
+; RV64-NEXT:    csrr s5, vlenb
+; RV64-NEXT:    slli s5, s5, 3
+; RV64-NEXT:    mv s6, s5
+; RV64-NEXT:    slli s5, s5, 2
+; RV64-NEXT:    add s6, s6, s5
+; RV64-NEXT:    slli s5, s5, 1
+; RV64-NEXT:    add s5, s5, s6
+; RV64-NEXT:    add s5, sp, s5
+; RV64-NEXT:    addi s5, s5, 128
+; RV64-NEXT:    vs8r.v v16, (s5) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    vand.vx v16, v8, s3
+; RV64-NEXT:    vmul.vv v16, v8, v16
+; RV64-NEXT:    csrr s3, vlenb
+; RV64-NEXT:    slli s3, s3, 3
+; RV64-NEXT:    mv s5, s3
+; RV64-NEXT:    slli s3, s3, 3
+; RV64-NEXT:    add s3, s3, s5
+; RV64-NEXT:    add s3, sp, s3
+; RV64-NEXT:    addi s3, s3, 128
+; RV64-NEXT:    vs8r.v v16, (s3) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    vand.vx v16, v8, s2
+; RV64-NEXT:    vmul.vv v16, v8, v16
+; RV64-NEXT:    csrr s2, vlenb
+; RV64-NEXT:    slli s2, s2, 4
+; RV64-NEXT:    mv s3, s2
+; RV64-NEXT:    slli s2, s2, 1
+; RV64-NEXT:    add s2, s2, s3
+; RV64-NEXT:    add s2, sp, s2
+; RV64-NEXT:    addi s2, s2, 128
+; RV64-NEXT:    vs8r.v v16, (s2) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    vand.vx v16, v8, s1
+; RV64-NEXT:    vmul.vv v16, v8, v16
+; RV64-NEXT:    csrr s1, vlenb
+; RV64-NEXT:    slli s1, s1, 7
+; RV64-NEXT:    add s1, sp, s1
+; RV64-NEXT:    addi s1, s1, 128
+; RV64-NEXT:    vs8r.v v16, (s1) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    vand.vx v16, v8, s0
+; RV64-NEXT:    vmul.vv v16, v8, v16
+; RV64-NEXT:    csrr s0, vlenb
+; RV64-NEXT:    slli s0, s0, 4
+; RV64-NEXT:    mv s1, s0
+; RV64-NEXT:    slli s0, s0, 1
+; RV64-NEXT:    add s1, s1, s0
+; RV64-NEXT:    slli s0, s0, 1
+; RV64-NEXT:    add s0, s0, s1
+; RV64-NEXT:    add s0, sp, s0
+; RV64-NEXT:    addi s0, s0, 128
+; RV64-NEXT:    vs8r.v v16, (s0) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    vand.vx v16, v8, t6
+; RV64-NEXT:    vmul.vv v16, v8, v16
+; RV64-NEXT:    csrr t6, vlenb
+; RV64-NEXT:    slli t6, t6, 5
+; RV64-NEXT:    mv s0, t6
+; RV64-NEXT:    slli t6, t6, 1
+; RV64-NEXT:    add t6, t6, s0
+; RV64-NEXT:    add t6, sp, t6
+; RV64-NEXT:    addi t6, t6, 128
+; RV64-NEXT:    vs8r.v v16, (t6) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    vand.vx v16, v8, t5
+; RV64-NEXT:    vmul.vv v16, v8, v16
+; RV64-NEXT:    csrr t5, vlenb
+; RV64-NEXT:    slli t5, t5, 6
+; RV64-NEXT:    add t5, sp, t5
+; RV64-NEXT:    addi t5, t5, 128
+; RV64-NEXT:    vs8r.v v16, (t5) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    vand.vx v16, v8, t4
+; RV64-NEXT:    vmul.vv v16, v8, v16
+; RV64-NEXT:    csrr t4, vlenb
+; RV64-NEXT:    slli t4, t4, 3
+; RV64-NEXT:    mv t5, t4
+; RV64-NEXT:    slli t4, t4, 2
+; RV64-NEXT:    add t4, t4, t5
+; RV64-NEXT:    add t4, sp, t4
+; RV64-NEXT:    addi t4, t4, 128
+; RV64-NEXT:    vs8r.v v16, (t4) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    vand.vx v16, v8, t3
+; RV64-NEXT:    vmul.vv v16, v8, v16
+; RV64-NEXT:    csrr t3, vlenb
+; RV64-NEXT:    slli t3, t3, 3
+; RV64-NEXT:    mv t4, t3
+; RV64-NEXT:    slli t3, t3, 1
+; RV64-NEXT:    add t3, t3, t4
+; RV64-NEXT:    add t3, sp, t3
+; RV64-NEXT:    addi t3, t3, 128
+; RV64-NEXT:    vs8r.v v16, (t3) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    vand.vx v16, v8, t2
+; RV64-NEXT:    vmul.vv v16, v8, v16
+; RV64-NEXT:    csrr t2, vlenb
+; RV64-NEXT:    slli t2, t2, 3
+; RV64-NEXT:    add t2, sp, t2
+; RV64-NEXT:    addi t2, t2, 128
+; RV64-NEXT:    vs8r.v v16, (t2) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    vand.vx v16, v8, t1
+; RV64-NEXT:    vmul.vv v16, v8, v16
+; RV64-NEXT:    csrr t1, vlenb
+; RV64-NEXT:    slli t1, t1, 3
+; RV64-NEXT:    mv t2, t1
+; RV64-NEXT:    slli t1, t1, 1
+; RV64-NEXT:    add t2, t2, t1
+; RV64-NEXT:    slli t1, t1, 2
+; RV64-NEXT:    add t1, t1, t2
+; RV64-NEXT:    add t1, sp, t1
+; RV64-NEXT:    addi t1, t1, 128
+; RV64-NEXT:    vs8r.v v16, (t1) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    vand.vx v16, v8, t0
+; RV64-NEXT:    vmul.vv v16, v8, v16
+; RV64-NEXT:    csrr t0, vlenb
+; RV64-NEXT:    slli t0, t0, 4
+; RV64-NEXT:    mv t1, t0
+; RV64-NEXT:    slli t0, t0, 2
+; RV64-NEXT:    add t0, t0, t1
+; RV64-NEXT:    add t0, sp, t0
+; RV64-NEXT:    addi t0, t0, 128
+; RV64-NEXT:    vs8r.v v16, (t0) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    vand.vx v16, v8, a3
+; RV64-NEXT:    vmul.vv v16, v8, v16
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    slli a3, a3, 3
+; RV64-NEXT:    mv t0, a3
+; RV64-NEXT:    slli a3, a3, 1
+; RV64-NEXT:    add t0, t0, a3
+; RV64-NEXT:    slli a3, a3, 1
+; RV64-NEXT:    add a3, a3, t0
+; RV64-NEXT:    add a3, sp, a3
+; RV64-NEXT:    addi a3, a3, 128
+; RV64-NEXT:    vs8r.v v16, (a3) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    vand.vx v16, v8, a2
+; RV64-NEXT:    vmul.vv v16, v8, v16
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 5
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 128
+; RV64-NEXT:    vs8r.v v16, (a2) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    vand.vx v16, v8, a1
+; RV64-NEXT:    vmul.vv v16, v8, v16
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 4
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 128
+; RV64-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    vand.vx v16, v8, a0
+; RV64-NEXT:    vmul.vv v8, v8, v16
+; RV64-NEXT:    addi a0, sp, 128
+; RV64-NEXT:    vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vxor.vv v8, v24, v8
+; RV64-NEXT:    vxor.vv v8, v8, v0
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 5
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vxor.vv v8, v8, v16
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vxor.vv v8, v8, v16
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vxor.vv v8, v8, v16
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vxor.vv v8, v8, v16
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 6
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vxor.vv v8, v8, v16
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vxor.vv v16, v8, v16
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vxor.vv v16, v16, v24
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vxor.vv v16, v16, v24
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 5
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vxor.vv v16, v16, v24
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vxor.vv v16, v16, v24
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vxor.vv v16, v16, v24
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vxor.vv v16, v16, v24
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 7
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vxor.vv v16, v16, v24
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vxor.vv v16, v16, v24
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vxor.vv v16, v16, v24
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vxor.vv v16, v16, v24
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 5
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vxor.vv v16, v16, v24
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vxor.vv v16, v16, v24
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vxor.vv v16, v16, v24
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vxor.vv v16, v16, v24
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 6
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vxor.vv v16, v16, v24
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vxor.vv v24, v16, v24
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vxor.vv v24, v24, v0
+; RV64-NEXT:    li a0, 56
+; RV64-NEXT:    vsll.vx v8, v8, a0
+; RV64-NEXT:    vand.vx v16, v16, s4
+; RV64-NEXT:    li a1, 40
+; RV64-NEXT:    vsll.vx v16, v16, a1
+; RV64-NEXT:    vor.vv v8, v8, v16
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 3
+; RV64-NEXT:    mv a3, a2
+; RV64-NEXT:    slli a2, a2, 2
+; RV64-NEXT:    add a3, a3, a2
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    add a3, a3, a2
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    add a3, a3, a2
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    add a2, a2, a3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 128
+; RV64-NEXT:    vs8r.v v8, (a2) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 4
+; RV64-NEXT:    mv a3, a2
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    add a3, a3, a2
+; RV64-NEXT:    slli a2, a2, 3
+; RV64-NEXT:    add a2, a2, a3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 128
+; RV64-NEXT:    vl8r.v v8, (a2) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vxor.vv v8, v24, v8
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 5
+; RV64-NEXT:    mv a3, a2
+; RV64-NEXT:    slli a2, a2, 3
+; RV64-NEXT:    add a2, a2, a3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 128
+; RV64-NEXT:    vl8r.v v16, (a2) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vxor.vv v8, v8, v16
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 3
+; RV64-NEXT:    mv a3, a2
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    add a3, a3, a2
+; RV64-NEXT:    slli a2, a2, 4
+; RV64-NEXT:    add a2, a2, a3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 128
+; RV64-NEXT:    vl8r.v v16, (a2) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vxor.vv v8, v8, v16
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 4
+; RV64-NEXT:    mv a3, a2
+; RV64-NEXT:    slli a2, a2, 4
+; RV64-NEXT:    add a2, a2, a3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 128
+; RV64-NEXT:    vl8r.v v16, (a2) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vxor.vv v8, v8, v16
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 3
+; RV64-NEXT:    mv a3, a2
+; RV64-NEXT:    slli a2, a2, 5
+; RV64-NEXT:    add a2, a2, a3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 128
+; RV64-NEXT:    vl8r.v v16, (a2) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vxor.vv v8, v8, v16
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 8
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 128
+; RV64-NEXT:    vl8r.v v16, (a2) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vxor.vv v8, v8, v16
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 3
+; RV64-NEXT:    mv a3, a2
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    add a3, a3, a2
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    add a3, a3, a2
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    add a3, a3, a2
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    add a2, a2, a3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 128
+; RV64-NEXT:    vl8r.v v16, (a2) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vxor.vv v8, v8, v16
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 4
+; RV64-NEXT:    mv a3, a2
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    add a3, a3, a2
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    add a3, a3, a2
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    add a2, a2, a3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 128
+; RV64-NEXT:    vl8r.v v16, (a2) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vxor.vv v8, v8, v16
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 3
+; RV64-NEXT:    mv a3, a2
+; RV64-NEXT:    slli a2, a2, 2
+; RV64-NEXT:    add a3, a3, a2
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    add a3, a3, a2
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    add a2, a2, a3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 128
+; RV64-NEXT:    vl8r.v v16, (a2) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vxor.vv v8, v8, v16
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 5
+; RV64-NEXT:    mv a3, a2
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    add a3, a3, a2
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    add a2, a2, a3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 128
+; RV64-NEXT:    vl8r.v v16, (a2) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vxor.vv v8, v8, v16
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 3
+; RV64-NEXT:    mv a3, a2
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    add a3, a3, a2
+; RV64-NEXT:    slli a2, a2, 2
+; RV64-NEXT:    add a3, a3, a2
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    add a2, a2, a3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 128
+; RV64-NEXT:    vl8r.v v16, (a2) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vxor.vv v8, v8, v16
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 4
+; RV64-NEXT:    mv a3, a2
+; RV64-NEXT:    slli a2, a2, 2
+; RV64-NEXT:    add a3, a3, a2
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    add a2, a2, a3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 128
+; RV64-NEXT:    vl8r.v v16, (a2) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vxor.vv v8, v8, v16
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 3
+; RV64-NEXT:    mv a3, a2
+; RV64-NEXT:    slli a2, a2, 3
+; RV64-NEXT:    add a3, a3, a2
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    add a2, a2, a3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 128
+; RV64-NEXT:    vl8r.v v16, (a2) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vxor.vv v8, v8, v16
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 6
+; RV64-NEXT:    mv a3, a2
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    add a2, a2, a3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 128
+; RV64-NEXT:    vl8r.v v16, (a2) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vxor.vv v16, v8, v16
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 3
+; RV64-NEXT:    mv a3, a2
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    add a3, a3, a2
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    add a3, a3, a2
+; RV64-NEXT:    slli a2, a2, 2
+; RV64-NEXT:    add a2, a2, a3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 128
+; RV64-NEXT:    vl8r.v v8, (a2) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vxor.vv v8, v16, v8
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 4
+; RV64-NEXT:    mv a3, a2
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    add a3, a3, a2
+; RV64-NEXT:    slli a2, a2, 2
+; RV64-NEXT:    add a2, a2, a3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 128
+; RV64-NEXT:    vl8r.v v24, (a2) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vxor.vv v8, v8, v24
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 3
+; RV64-NEXT:    mv a3, a2
+; RV64-NEXT:    slli a2, a2, 2
+; RV64-NEXT:    add a3, a3, a2
+; RV64-NEXT:    slli a2, a2, 2
+; RV64-NEXT:    add a2, a2, a3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 128
+; RV64-NEXT:    vl8r.v v24, (a2) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vxor.vv v8, v8, v24
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 5
+; RV64-NEXT:    mv a3, a2
+; RV64-NEXT:    slli a2, a2, 2
+; RV64-NEXT:    add a2, a2, a3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 128
+; RV64-NEXT:    vl8r.v v24, (a2) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vxor.vv v8, v8, v24
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 3
+; RV64-NEXT:    mv a3, a2
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    add a3, a3, a2
+; RV64-NEXT:    slli a2, a2, 3
+; RV64-NEXT:    add a2, a2, a3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 128
+; RV64-NEXT:    vl8r.v v24, (a2) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vxor.vv v8, v8, v24
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 4
+; RV64-NEXT:    mv a3, a2
+; RV64-NEXT:    slli a2, a2, 3
+; RV64-NEXT:    add a2, a2, a3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 128
+; RV64-NEXT:    vl8r.v v24, (a2) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vxor.vv v8, v8, v24
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 3
+; RV64-NEXT:    mv a3, a2
+; RV64-NEXT:    slli a2, a2, 4
+; RV64-NEXT:    add a2, a2, a3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 128
+; RV64-NEXT:    vl8r.v v24, (a2) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vxor.vv v8, v8, v24
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 3
+; RV64-NEXT:    mv a3, a2
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    add a3, a3, a2
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    add a3, a3, a2
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    add a2, a2, a3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 128
+; RV64-NEXT:    vl8r.v v24, (a2) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vxor.vv v8, v8, v24
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 3
+; RV64-NEXT:    mv a3, a2
+; RV64-NEXT:    slli a2, a2, 2
+; RV64-NEXT:    add a3, a3, a2
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    add a2, a2, a3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 128
+; RV64-NEXT:    vl8r.v v24, (a2) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vxor.vv v24, v8, v24
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 3
+; RV64-NEXT:    mv a3, a2
+; RV64-NEXT:    slli a2, a2, 3
+; RV64-NEXT:    add a2, a2, a3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 128
+; RV64-NEXT:    vl8r.v v0, (a2) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vxor.vv v24, v24, v0
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 4
+; RV64-NEXT:    mv a3, a2
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    add a2, a2, a3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 128
+; RV64-NEXT:    vl8r.v v0, (a2) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vxor.vv v24, v24, v0
+; RV64-NEXT:    vsrl.vi v0, v16, 8
+; RV64-NEXT:    vand.vx v0, v0, a4
+; RV64-NEXT:    vsrl.vi v8, v8, 24
+; RV64-NEXT:    lui a2, 4080
+; RV64-NEXT:    vand.vx v8, v8, a2
+; RV64-NEXT:    vor.vv v8, v0, v8
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    slli a3, a3, 5
+; RV64-NEXT:    mv t0, a3
+; RV64-NEXT:    slli a3, a3, 1
+; RV64-NEXT:    add t0, t0, a3
+; RV64-NEXT:    slli a3, a3, 1
+; RV64-NEXT:    add t0, t0, a3
+; RV64-NEXT:    slli a3, a3, 1
+; RV64-NEXT:    add a3, a3, t0
+; RV64-NEXT:    add a3, sp, a3
+; RV64-NEXT:    addi a3, a3, 128
+; RV64-NEXT:    vs8r.v v8, (a3) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    slli a3, a3, 7
+; RV64-NEXT:    add a3, sp, a3
+; RV64-NEXT:    addi a3, a3, 128
+; RV64-NEXT:    vl8r.v v8, (a3) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vxor.vv v8, v24, v8
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    slli a3, a3, 4
+; RV64-NEXT:    mv t0, a3
+; RV64-NEXT:    slli a3, a3, 1
+; RV64-NEXT:    add t0, t0, a3
+; RV64-NEXT:    slli a3, a3, 1
+; RV64-NEXT:    add a3, a3, t0
+; RV64-NEXT:    add a3, sp, a3
+; RV64-NEXT:    addi a3, a3, 128
+; RV64-NEXT:    vl8r.v v24, (a3) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vxor.vv v8, v8, v24
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    slli a3, a3, 5
+; RV64-NEXT:    mv t0, a3
+; RV64-NEXT:    slli a3, a3, 1
+; RV64-NEXT:    add a3, a3, t0
+; RV64-NEXT:    add a3, sp, a3
+; RV64-NEXT:    addi a3, a3, 128
+; RV64-NEXT:    vl8r.v v24, (a3) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vxor.vv v8, v8, v24
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    slli a3, a3, 6
+; RV64-NEXT:    add a3, sp, a3
+; RV64-NEXT:    addi a3, a3, 128
+; RV64-NEXT:    vl8r.v v24, (a3) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vxor.vv v8, v8, v24
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    slli a3, a3, 3
+; RV64-NEXT:    mv t0, a3
+; RV64-NEXT:    slli a3, a3, 2
+; RV64-NEXT:    add a3, a3, t0
+; RV64-NEXT:    add a3, sp, a3
+; RV64-NEXT:    addi a3, a3, 128
+; RV64-NEXT:    vl8r.v v24, (a3) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vxor.vv v8, v8, v24
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    slli a3, a3, 3
+; RV64-NEXT:    mv t0, a3
+; RV64-NEXT:    slli a3, a3, 1
+; RV64-NEXT:    add a3, a3, t0
+; RV64-NEXT:    add a3, sp, a3
+; RV64-NEXT:    addi a3, a3, 128
+; RV64-NEXT:    vl8r.v v24, (a3) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vxor.vv v24, v8, v24
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    slli a3, a3, 3
+; RV64-NEXT:    add a3, sp, a3
+; RV64-NEXT:    addi a3, a3, 128
+; RV64-NEXT:    vl8r.v v0, (a3) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vxor.vv v24, v24, v0
+; RV64-NEXT:    vand.vx v16, v16, a2
+; RV64-NEXT:    vsll.vi v16, v16, 24
+; RV64-NEXT:    vand.vx v0, v8, a4
+; RV64-NEXT:    vsll.vi v0, v0, 8
+; RV64-NEXT:    vor.vv v16, v16, v0
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 3
+; RV64-NEXT:    mv a3, a2
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    add a3, a3, a2
+; RV64-NEXT:    slli a2, a2, 2
+; RV64-NEXT:    add a2, a2, a3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 128
+; RV64-NEXT:    vl8r.v v0, (a2) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vxor.vv v24, v24, v0
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 3
+; RV64-NEXT:    mv a3, a2
+; RV64-NEXT:    slli a2, a2, 2
+; RV64-NEXT:    add a3, a3, a2
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    add a3, a3, a2
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    add a3, a3, a2
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    add a2, a2, a3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 128
+; RV64-NEXT:    vl8r.v v0, (a2) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vor.vv v16, v0, v16
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 4
+; RV64-NEXT:    mv a3, a2
+; RV64-NEXT:    slli a2, a2, 2
+; RV64-NEXT:    add a2, a2, a3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 128
+; RV64-NEXT:    vl8r.v v0, (a2) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vxor.vv v24, v24, v0
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 3
+; RV64-NEXT:    mv a3, a2
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    add a3, a3, a2
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    add a2, a2, a3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 128
+; RV64-NEXT:    vl8r.v v0, (a2) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vxor.vv v24, v24, v0
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 5
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 128
+; RV64-NEXT:    vl8r.v v0, (a2) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vxor.vv v24, v24, v0
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 4
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 128
+; RV64-NEXT:    vl8r.v v0, (a2) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vxor.vv v24, v24, v0
+; RV64-NEXT:    addi a2, sp, 128
+; RV64-NEXT:    vl8r.v v0, (a2) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vxor.vv v24, v24, v0
+; RV64-NEXT:    vsrl.vx v8, v8, a1
+; RV64-NEXT:    vand.vx v8, v8, s4
+; RV64-NEXT:    vsrl.vx v24, v24, a0
+; RV64-NEXT:    vor.vv v8, v8, v24
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 5
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vor.vv v8, v24, v8
+; RV64-NEXT:    vor.vv v8, v16, v8
+; RV64-NEXT:    vsrl.vi v16, v8, 4
+; RV64-NEXT:    vand.vx v8, v8, a7
+; RV64-NEXT:    vand.vx v16, v16, a7
+; RV64-NEXT:    vsll.vi v8, v8, 4
+; RV64-NEXT:    vor.vv v8, v16, v8
+; RV64-NEXT:    vsrl.vi v16, v8, 2
+; RV64-NEXT:    vand.vx v8, v8, a6
+; RV64-NEXT:    vand.vx v16, v16, a6
+; RV64-NEXT:    vsll.vi v8, v8, 2
+; RV64-NEXT:    vor.vv v8, v16, v8
+; RV64-NEXT:    vsrl.vi v16, v8, 1
+; RV64-NEXT:    vand.vx v8, v8, a5
+; RV64-NEXT:    vand.vx v16, v16, a5
+; RV64-NEXT:    vadd.vv v8, v8, v8
+; RV64-NEXT:    vor.vv v8, v16, v8
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add sp, sp, a0
+; RV64-NEXT:    ld ra, 232(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s0, 224(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s1, 216(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s2, 208(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s3, 200(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s4, 192(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s5, 184(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s6, 176(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s7, 168(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s8, 160(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s9, 152(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s10, 144(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s11, 136(sp) # 8-byte Folded Reload
+; RV64-NEXT:    addi sp, sp, 240
+; RV64-NEXT:    ret
+  %a = call <vscale x 8 x i64> @llvm.clmulr.nxv8i64(<vscale x 8 x i64> %x, <vscale x 8 x i64> %y)
+  ret <vscale x 8 x i64> %a
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-clmul.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-clmul.ll
new file mode 100644
index 0000000000000..1c00086064133
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-clmul.ll
@@ -0,0 +1,19366 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -verify-machineinstrs -mattr=+v < %s | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -verify-machineinstrs -mattr=+v < %s | FileCheck %s --check-prefixes=CHECK,RV64
+
+define <1 x i32> @clmul_v1i32(<1 x i32> %x, <1 x i32> %y) nounwind {
+; CHECK-LABEL: clmul_v1i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; CHECK-NEXT:    vand.vi v10, v9, 2
+; CHECK-NEXT:    vand.vi v11, v9, 1
+; CHECK-NEXT:    vmul.vv v10, v8, v10
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v11, v10
+; CHECK-NEXT:    vand.vi v11, v9, 4
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vi v11, v9, 8
+; CHECK-NEXT:    li a0, 16
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    li a0, 32
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    li a0, 64
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    li a0, 128
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    li a0, 256
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    li a0, 512
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    li a0, 1024
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    li a0, 1
+; CHECK-NEXT:    slli a0, a0, 11
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 2
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 4
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 8
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 16
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 32
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 64
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 128
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 256
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 512
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 1024
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 2048
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 4096
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 8192
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 16384
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 32768
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 65536
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 131072
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 262144
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 524288
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vmul.vv v8, v8, v9
+; CHECK-NEXT:    vxor.vv v8, v10, v8
+; CHECK-NEXT:    ret
+  %a = call <1 x i32> @llvm.clmul.v1i32(<1 x i32> %x, <1 x i32> %y)
+  ret <1 x i32> %a
+}
+
+define <2 x i32> @clmul_v2i32(<2 x i32> %x, <2 x i32> %y) nounwind {
+; CHECK-LABEL: clmul_v2i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
+; CHECK-NEXT:    vand.vi v10, v9, 2
+; CHECK-NEXT:    vand.vi v11, v9, 1
+; CHECK-NEXT:    vmul.vv v10, v8, v10
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v11, v10
+; CHECK-NEXT:    vand.vi v11, v9, 4
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vi v11, v9, 8
+; CHECK-NEXT:    li a0, 16
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    li a0, 32
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    li a0, 64
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    li a0, 128
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    li a0, 256
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    li a0, 512
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    li a0, 1024
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    li a0, 1
+; CHECK-NEXT:    slli a0, a0, 11
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 2
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 4
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 8
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 16
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 32
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 64
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 128
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 256
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 512
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 1024
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 2048
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 4096
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 8192
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 16384
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 32768
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 65536
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 131072
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 262144
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 524288
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vmul.vv v8, v8, v9
+; CHECK-NEXT:    vxor.vv v8, v10, v8
+; CHECK-NEXT:    ret
+  %a = call <2 x i32> @llvm.clmul.v2i32(<2 x i32> %x, <2 x i32> %y)
+  ret <2 x i32> %a
+}
+
+define <4 x i32> @clmul_v4i32(<4 x i32> %x, <4 x i32> %y) nounwind {
+; CHECK-LABEL: clmul_v4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT:    vand.vi v10, v9, 2
+; CHECK-NEXT:    vand.vi v11, v9, 1
+; CHECK-NEXT:    vmul.vv v10, v8, v10
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v11, v10
+; CHECK-NEXT:    vand.vi v11, v9, 4
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vi v11, v9, 8
+; CHECK-NEXT:    li a0, 16
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    li a0, 32
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    li a0, 64
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    li a0, 128
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    li a0, 256
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    li a0, 512
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    li a0, 1024
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    li a0, 1
+; CHECK-NEXT:    slli a0, a0, 11
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 2
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 4
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 8
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 16
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 32
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 64
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 128
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 256
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 512
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 1024
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 2048
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 4096
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 8192
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 16384
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 32768
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 65536
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 131072
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 262144
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vand.vx v11, v9, a0
+; CHECK-NEXT:    lui a0, 524288
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vxor.vv v10, v10, v11
+; CHECK-NEXT:    vmul.vv v8, v8, v9
+; CHECK-NEXT:    vxor.vv v8, v10, v8
+; CHECK-NEXT:    ret
+  %a = call <4 x i32> @llvm.clmul.v4i32(<4 x i32> %x, <4 x i32> %y)
+  ret <4 x i32> %a
+}
+
+define <8 x i32> @clmul_v8i32(<8 x i32> %x, <8 x i32> %y) nounwind {
+; CHECK-LABEL: clmul_v8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vand.vi v10, v8, 2
+; CHECK-NEXT:    vand.vi v12, v8, 1
+; CHECK-NEXT:    vmul.vv v10, v8, v10
+; CHECK-NEXT:    vmul.vv v12, v8, v12
+; CHECK-NEXT:    vxor.vv v10, v12, v10
+; CHECK-NEXT:    vand.vi v12, v8, 4
+; CHECK-NEXT:    vmul.vv v12, v8, v12
+; CHECK-NEXT:    vxor.vv v10, v10, v12
+; CHECK-NEXT:    vand.vi v12, v8, 8
+; CHECK-NEXT:    li a0, 16
+; CHECK-NEXT:    vmul.vv v12, v8, v12
+; CHECK-NEXT:    vxor.vv v10, v10, v12
+; CHECK-NEXT:    vand.vx v12, v8, a0
+; CHECK-NEXT:    li a0, 32
+; CHECK-NEXT:    vmul.vv v12, v8, v12
+; CHECK-NEXT:    vxor.vv v10, v10, v12
+; CHECK-NEXT:    vand.vx v12, v8, a0
+; CHECK-NEXT:    li a0, 64
+; CHECK-NEXT:    vmul.vv v12, v8, v12
+; CHECK-NEXT:    vxor.vv v10, v10, v12
+; CHECK-NEXT:    vand.vx v12, v8, a0
+; CHECK-NEXT:    li a0, 128
+; CHECK-NEXT:    vmul.vv v12, v8, v12
+; CHECK-NEXT:    vxor.vv v10, v10, v12
+; CHECK-NEXT:    vand.vx v12, v8, a0
+; CHECK-NEXT:    li a0, 256
+; CHECK-NEXT:    vmul.vv v12, v8, v12
+; CHECK-NEXT:    vxor.vv v10, v10, v12
+; CHECK-NEXT:    vand.vx v12, v8, a0
+; CHECK-NEXT:    li a0, 512
+; CHECK-NEXT:    vmul.vv v12, v8, v12
+; CHECK-NEXT:    vxor.vv v10, v10, v12
+; CHECK-NEXT:    vand.vx v12, v8, a0
+; CHECK-NEXT:    li a0, 1024
+; CHECK-NEXT:    vmul.vv v12, v8, v12
+; CHECK-NEXT:    vxor.vv v10, v10, v12
+; CHECK-NEXT:    vand.vx v12, v8, a0
+; CHECK-NEXT:    li a0, 1
+; CHECK-NEXT:    slli a0, a0, 11
+; CHECK-NEXT:    vmul.vv v12, v8, v12
+; CHECK-NEXT:    vxor.vv v10, v10, v12
+; CHECK-NEXT:    vand.vx v12, v8, a0
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    vmul.vv v12, v8, v12
+; CHECK-NEXT:    vxor.vv v10, v10, v12
+; CHECK-NEXT:    vand.vx v12, v8, a0
+; CHECK-NEXT:    lui a0, 2
+; CHECK-NEXT:    vmul.vv v12, v8, v12
+; CHECK-NEXT:    vxor.vv v10, v10, v12
+; CHECK-NEXT:    vand.vx v12, v8, a0
+; CHECK-NEXT:    lui a0, 4
+; CHECK-NEXT:    vmul.vv v12, v8, v12
+; CHECK-NEXT:    vxor.vv v10, v10, v12
+; CHECK-NEXT:    vand.vx v12, v8, a0
+; CHECK-NEXT:    lui a0, 8
+; CHECK-NEXT:    vmul.vv v12, v8, v12
+; CHECK-NEXT:    vxor.vv v10, v10, v12
+; CHECK-NEXT:    vand.vx v12, v8, a0
+; CHECK-NEXT:    lui a0, 16
+; CHECK-NEXT:    vmul.vv v12, v8, v12
+; CHECK-NEXT:    vxor.vv v10, v10, v12
+; CHECK-NEXT:    vand.vx v12, v8, a0
+; CHECK-NEXT:    lui a0, 32
+; CHECK-NEXT:    vmul.vv v12, v8, v12
+; CHECK-NEXT:    vxor.vv v10, v10, v12
+; CHECK-NEXT:    vand.vx v12, v8, a0
+; CHECK-NEXT:    lui a0, 64
+; CHECK-NEXT:    vmul.vv v12, v8, v12
+; CHECK-NEXT:    vxor.vv v10, v10, v12
+; CHECK-NEXT:    vand.vx v12, v8, a0
+; CHECK-NEXT:    lui a0, 128
+; CHECK-NEXT:    vmul.vv v12, v8, v12
+; CHECK-NEXT:    vxor.vv v10, v10, v12
+; CHECK-NEXT:    vand.vx v12, v8, a0
+; CHECK-NEXT:    lui a0, 256
+; CHECK-NEXT:    vmul.vv v12, v8, v12
+; CHECK-NEXT:    vxor.vv v10, v10, v12
+; CHECK-NEXT:    vand.vx v12, v8, a0
+; CHECK-NEXT:    lui a0, 512
+; CHECK-NEXT:    vmul.vv v12, v8, v12
+; CHECK-NEXT:    vxor.vv v10, v10, v12
+; CHECK-NEXT:    vand.vx v12, v8, a0
+; CHECK-NEXT:    lui a0, 1024
+; CHECK-NEXT:    vmul.vv v12, v8, v12
+; CHECK-NEXT:    vxor.vv v10, v10, v12
+; CHECK-NEXT:    vand.vx v12, v8, a0
+; CHECK-NEXT:    lui a0, 2048
+; CHECK-NEXT:    vmul.vv v12, v8, v12
+; CHECK-NEXT:    vxor.vv v10, v10, v12
+; CHECK-NEXT:    vand.vx v12, v8, a0
+; CHECK-NEXT:    lui a0, 4096
+; CHECK-NEXT:    vmul.vv v12, v8, v12
+; CHECK-NEXT:    vxor.vv v10, v10, v12
+; CHECK-NEXT:    vand.vx v12, v8, a0
+; CHECK-NEXT:    lui a0, 8192
+; CHECK-NEXT:    vmul.vv v12, v8, v12
+; CHECK-NEXT:    vxor.vv v10, v10, v12
+; CHECK-NEXT:    vand.vx v12, v8, a0
+; CHECK-NEXT:    lui a0, 16384
+; CHECK-NEXT:    vmul.vv v12, v8, v12
+; CHECK-NEXT:    vxor.vv v10, v10, v12
+; CHECK-NEXT:    vand.vx v12, v8, a0
+; CHECK-NEXT:    lui a0, 32768
+; CHECK-NEXT:    vmul.vv v12, v8, v12
+; CHECK-NEXT:    vxor.vv v10, v10, v12
+; CHECK-NEXT:    vand.vx v12, v8, a0
+; CHECK-NEXT:    lui a0, 65536
+; CHECK-NEXT:    vmul.vv v12, v8, v12
+; CHECK-NEXT:    vxor.vv v10, v10, v12
+; CHECK-NEXT:    vand.vx v12, v8, a0
+; CHECK-NEXT:    lui a0, 131072
+; CHECK-NEXT:    vmul.vv v12, v8, v12
+; CHECK-NEXT:    vxor.vv v10, v10, v12
+; CHECK-NEXT:    vand.vx v12, v8, a0
+; CHECK-NEXT:    lui a0, 262144
+; CHECK-NEXT:    vmul.vv v12, v8, v12
+; CHECK-NEXT:    vxor.vv v10, v10, v12
+; CHECK-NEXT:    vand.vx v12, v8, a0
+; CHECK-NEXT:    lui a0, 524288
+; CHECK-NEXT:    vmul.vv v12, v8, v12
+; CHECK-NEXT:    vxor.vv v10, v10, v12
+; CHECK-NEXT:    vand.vx v12, v8, a0
+; CHECK-NEXT:    vmul.vv v8, v8, v12
+; CHECK-NEXT:    vxor.vv v8, v10, v8
+; CHECK-NEXT:    ret
+  %a = call <8 x i32> @llvm.clmul.v8i32(<8 x i32> %x, <8 x i32> %x)
+  ret <8 x i32> %a
+}
+
+define <16 x i32> @clmul_v16i32(<16 x i32> %x, <16 x i32> %y) nounwind {
+; CHECK-LABEL: clmul_v16i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
+; CHECK-NEXT:    vand.vi v16, v12, 2
+; CHECK-NEXT:    vand.vi v20, v12, 1
+; CHECK-NEXT:    vmul.vv v16, v8, v16
+; CHECK-NEXT:    vmul.vv v20, v8, v20
+; CHECK-NEXT:    vxor.vv v16, v20, v16
+; CHECK-NEXT:    vand.vi v20, v12, 4
+; CHECK-NEXT:    vmul.vv v20, v8, v20
+; CHECK-NEXT:    vxor.vv v16, v16, v20
+; CHECK-NEXT:    vand.vi v20, v12, 8
+; CHECK-NEXT:    li a0, 16
+; CHECK-NEXT:    vmul.vv v20, v8, v20
+; CHECK-NEXT:    vxor.vv v16, v16, v20
+; CHECK-NEXT:    vand.vx v20, v12, a0
+; CHECK-NEXT:    li a0, 32
+; CHECK-NEXT:    vmul.vv v20, v8, v20
+; CHECK-NEXT:    vxor.vv v16, v16, v20
+; CHECK-NEXT:    vand.vx v20, v12, a0
+; CHECK-NEXT:    li a0, 64
+; CHECK-NEXT:    vmul.vv v20, v8, v20
+; CHECK-NEXT:    vxor.vv v16, v16, v20
+; CHECK-NEXT:    vand.vx v20, v12, a0
+; CHECK-NEXT:    li a0, 128
+; CHECK-NEXT:    vmul.vv v20, v8, v20
+; CHECK-NEXT:    vxor.vv v16, v16, v20
+; CHECK-NEXT:    vand.vx v20, v12, a0
+; CHECK-NEXT:    li a0, 256
+; CHECK-NEXT:    vmul.vv v20, v8, v20
+; CHECK-NEXT:    vxor.vv v16, v16, v20
+; CHECK-NEXT:    vand.vx v20, v12, a0
+; CHECK-NEXT:    li a0, 512
+; CHECK-NEXT:    vmul.vv v20, v8, v20
+; CHECK-NEXT:    vxor.vv v16, v16, v20
+; CHECK-NEXT:    vand.vx v20, v12, a0
+; CHECK-NEXT:    li a0, 1024
+; CHECK-NEXT:    vmul.vv v20, v8, v20
+; CHECK-NEXT:    vxor.vv v16, v16, v20
+; CHECK-NEXT:    vand.vx v20, v12, a0
+; CHECK-NEXT:    li a0, 1
+; CHECK-NEXT:    slli a0, a0, 11
+; CHECK-NEXT:    vmul.vv v20, v8, v20
+; CHECK-NEXT:    vxor.vv v16, v16, v20
+; CHECK-NEXT:    vand.vx v20, v12, a0
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    vmul.vv v20, v8, v20
+; CHECK-NEXT:    vxor.vv v16, v16, v20
+; CHECK-NEXT:    vand.vx v20, v12, a0
+; CHECK-NEXT:    lui a0, 2
+; CHECK-NEXT:    vmul.vv v20, v8, v20
+; CHECK-NEXT:    vxor.vv v16, v16, v20
+; CHECK-NEXT:    vand.vx v20, v12, a0
+; CHECK-NEXT:    lui a0, 4
+; CHECK-NEXT:    vmul.vv v20, v8, v20
+; CHECK-NEXT:    vxor.vv v16, v16, v20
+; CHECK-NEXT:    vand.vx v20, v12, a0
+; CHECK-NEXT:    lui a0, 8
+; CHECK-NEXT:    vmul.vv v20, v8, v20
+; CHECK-NEXT:    vxor.vv v16, v16, v20
+; CHECK-NEXT:    vand.vx v20, v12, a0
+; CHECK-NEXT:    lui a0, 16
+; CHECK-NEXT:    vmul.vv v20, v8, v20
+; CHECK-NEXT:    vxor.vv v16, v16, v20
+; CHECK-NEXT:    vand.vx v20, v12, a0
+; CHECK-NEXT:    lui a0, 32
+; CHECK-NEXT:    vmul.vv v20, v8, v20
+; CHECK-NEXT:    vxor.vv v16, v16, v20
+; CHECK-NEXT:    vand.vx v20, v12, a0
+; CHECK-NEXT:    lui a0, 64
+; CHECK-NEXT:    vmul.vv v20, v8, v20
+; CHECK-NEXT:    vxor.vv v16, v16, v20
+; CHECK-NEXT:    vand.vx v20, v12, a0
+; CHECK-NEXT:    lui a0, 128
+; CHECK-NEXT:    vmul.vv v20, v8, v20
+; CHECK-NEXT:    vxor.vv v16, v16, v20
+; CHECK-NEXT:    vand.vx v20, v12, a0
+; CHECK-NEXT:    lui a0, 256
+; CHECK-NEXT:    vmul.vv v20, v8, v20
+; CHECK-NEXT:    vxor.vv v16, v16, v20
+; CHECK-NEXT:    vand.vx v20, v12, a0
+; CHECK-NEXT:    lui a0, 512
+; CHECK-NEXT:    vmul.vv v20, v8, v20
+; CHECK-NEXT:    vxor.vv v16, v16, v20
+; CHECK-NEXT:    vand.vx v20, v12, a0
+; CHECK-NEXT:    lui a0, 1024
+; CHECK-NEXT:    vmul.vv v20, v8, v20
+; CHECK-NEXT:    vxor.vv v16, v16, v20
+; CHECK-NEXT:    vand.vx v20, v12, a0
+; CHECK-NEXT:    lui a0, 2048
+; CHECK-NEXT:    vmul.vv v20, v8, v20
+; CHECK-NEXT:    vxor.vv v16, v16, v20
+; CHECK-NEXT:    vand.vx v20, v12, a0
+; CHECK-NEXT:    lui a0, 4096
+; CHECK-NEXT:    vmul.vv v20, v8, v20
+; CHECK-NEXT:    vxor.vv v16, v16, v20
+; CHECK-NEXT:    vand.vx v20, v12, a0
+; CHECK-NEXT:    lui a0, 8192
+; CHECK-NEXT:    vmul.vv v20, v8, v20
+; CHECK-NEXT:    vxor.vv v16, v16, v20
+; CHECK-NEXT:    vand.vx v20, v12, a0
+; CHECK-NEXT:    lui a0, 16384
+; CHECK-NEXT:    vmul.vv v20, v8, v20
+; CHECK-NEXT:    vxor.vv v16, v16, v20
+; CHECK-NEXT:    vand.vx v20, v12, a0
+; CHECK-NEXT:    lui a0, 32768
+; CHECK-NEXT:    vmul.vv v20, v8, v20
+; CHECK-NEXT:    vxor.vv v16, v16, v20
+; CHECK-NEXT:    vand.vx v20, v12, a0
+; CHECK-NEXT:    lui a0, 65536
+; CHECK-NEXT:    vmul.vv v20, v8, v20
+; CHECK-NEXT:    vxor.vv v16, v16, v20
+; CHECK-NEXT:    vand.vx v20, v12, a0
+; CHECK-NEXT:    lui a0, 131072
+; CHECK-NEXT:    vmul.vv v20, v8, v20
+; CHECK-NEXT:    vxor.vv v16, v16, v20
+; CHECK-NEXT:    vand.vx v20, v12, a0
+; CHECK-NEXT:    lui a0, 262144
+; CHECK-NEXT:    vmul.vv v20, v8, v20
+; CHECK-NEXT:    vxor.vv v16, v16, v20
+; CHECK-NEXT:    vand.vx v20, v12, a0
+; CHECK-NEXT:    lui a0, 524288
+; CHECK-NEXT:    vand.vx v12, v12, a0
+; CHECK-NEXT:    vmul.vv v20, v8, v20
+; CHECK-NEXT:    vxor.vv v16, v16, v20
+; CHECK-NEXT:    vmul.vv v8, v8, v12
+; CHECK-NEXT:    vxor.vv v8, v16, v8
+; CHECK-NEXT:    ret
+  %a = call <16 x i32> @llvm.clmul.v16i32(<16 x i32> %x, <16 x i32> %y)
+  ret <16 x i32> %a
+}
+
+define <1 x i64> @clmul_v1i64(<1 x i64> %x, <1 x i64> %y) nounwind {
+; RV32-LABEL: clmul_v1i64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -352
+; RV32-NEXT:    sw ra, 348(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s0, 344(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s1, 340(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s2, 336(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s3, 332(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s4, 328(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s5, 324(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s6, 320(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s7, 316(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s8, 312(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s9, 308(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s10, 304(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s11, 300(sp) # 4-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a1, a0, 3
+; RV32-NEXT:    sub a0, a1, a0
+; RV32-NEXT:    sub sp, sp, a0
+; RV32-NEXT:    lui a1, 524288
+; RV32-NEXT:    li t5, 1
+; RV32-NEXT:    li a4, 2
+; RV32-NEXT:    li a2, 4
+; RV32-NEXT:    li s11, 8
+; RV32-NEXT:    li a0, 16
+; RV32-NEXT:    li ra, 32
+; RV32-NEXT:    li s10, 64
+; RV32-NEXT:    li s9, 128
+; RV32-NEXT:    li s8, 256
+; RV32-NEXT:    li s7, 512
+; RV32-NEXT:    li s1, 1024
+; RV32-NEXT:    lui s6, 1
+; RV32-NEXT:    lui s5, 2
+; RV32-NEXT:    lui s4, 4
+; RV32-NEXT:    lui s3, 8
+; RV32-NEXT:    lui s2, 16
+; RV32-NEXT:    lui s0, 32
+; RV32-NEXT:    lui t6, 64
+; RV32-NEXT:    lui t4, 128
+; RV32-NEXT:    lui t3, 256
+; RV32-NEXT:    lui t2, 512
+; RV32-NEXT:    lui t1, 1024
+; RV32-NEXT:    lui t0, 2048
+; RV32-NEXT:    lui a7, 4096
+; RV32-NEXT:    lui a6, 8192
+; RV32-NEXT:    lui a5, 16384
+; RV32-NEXT:    lui a3, 32768
+; RV32-NEXT:    sw a1, 272(sp)
+; RV32-NEXT:    sw zero, 276(sp)
+; RV32-NEXT:    sw zero, 264(sp)
+; RV32-NEXT:    sw t5, 268(sp)
+; RV32-NEXT:    sw zero, 256(sp)
+; RV32-NEXT:    sw a4, 260(sp)
+; RV32-NEXT:    lui a4, 65536
+; RV32-NEXT:    sw zero, 248(sp)
+; RV32-NEXT:    sw a2, 252(sp)
+; RV32-NEXT:    lui a2, 131072
+; RV32-NEXT:    sw zero, 240(sp)
+; RV32-NEXT:    sw s11, 244(sp)
+; RV32-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
+; RV32-NEXT:    vand.vi v13, v9, 2
+; RV32-NEXT:    vand.vi v14, v9, 1
+; RV32-NEXT:    vand.vi v12, v9, 4
+; RV32-NEXT:    vand.vi v11, v9, 8
+; RV32-NEXT:    sw zero, 232(sp)
+; RV32-NEXT:    sw a0, 236(sp)
+; RV32-NEXT:    vand.vx v10, v9, a0
+; RV32-NEXT:    addi s11, sp, 272
+; RV32-NEXT:    sw zero, 224(sp)
+; RV32-NEXT:    sw ra, 228(sp)
+; RV32-NEXT:    vand.vx v15, v9, ra
+; RV32-NEXT:    addi ra, sp, 264
+; RV32-NEXT:    sw zero, 216(sp)
+; RV32-NEXT:    sw s10, 220(sp)
+; RV32-NEXT:    vand.vx v16, v9, s10
+; RV32-NEXT:    addi s10, sp, 256
+; RV32-NEXT:    sw zero, 208(sp)
+; RV32-NEXT:    sw s9, 212(sp)
+; RV32-NEXT:    vand.vx v17, v9, s9
+; RV32-NEXT:    addi s9, sp, 248
+; RV32-NEXT:    sw zero, 200(sp)
+; RV32-NEXT:    sw s8, 204(sp)
+; RV32-NEXT:    vand.vx v18, v9, s8
+; RV32-NEXT:    addi s8, sp, 240
+; RV32-NEXT:    sw zero, 192(sp)
+; RV32-NEXT:    sw s7, 196(sp)
+; RV32-NEXT:    vand.vx v19, v9, s7
+; RV32-NEXT:    addi s7, sp, 232
+; RV32-NEXT:    sw zero, 184(sp)
+; RV32-NEXT:    sw s1, 188(sp)
+; RV32-NEXT:    vand.vx v20, v9, s1
+; RV32-NEXT:    slli t5, t5, 11
+; RV32-NEXT:    vand.vx v21, v9, s6
+; RV32-NEXT:    sw zero, 176(sp)
+; RV32-NEXT:    sw t5, 180(sp)
+; RV32-NEXT:    sw zero, 168(sp)
+; RV32-NEXT:    sw s6, 172(sp)
+; RV32-NEXT:    addi s6, sp, 216
+; RV32-NEXT:    vand.vx v22, v9, s5
+; RV32-NEXT:    sw zero, 160(sp)
+; RV32-NEXT:    sw s5, 164(sp)
+; RV32-NEXT:    addi s5, sp, 208
+; RV32-NEXT:    vand.vx v23, v9, s4
+; RV32-NEXT:    sw zero, 152(sp)
+; RV32-NEXT:    sw s4, 156(sp)
+; RV32-NEXT:    addi s4, sp, 200
+; RV32-NEXT:    vand.vx v24, v9, s3
+; RV32-NEXT:    sw zero, 144(sp)
+; RV32-NEXT:    sw s3, 148(sp)
+; RV32-NEXT:    addi s3, sp, 192
+; RV32-NEXT:    vand.vx v25, v9, s2
+; RV32-NEXT:    sw zero, 136(sp)
+; RV32-NEXT:    sw s2, 140(sp)
+; RV32-NEXT:    addi s2, sp, 184
+; RV32-NEXT:    vand.vx v26, v9, s0
+; RV32-NEXT:    sw zero, 128(sp)
+; RV32-NEXT:    sw s0, 132(sp)
+; RV32-NEXT:    addi s1, sp, 176
+; RV32-NEXT:    vand.vx v27, v9, t6
+; RV32-NEXT:    sw zero, 120(sp)
+; RV32-NEXT:    sw t6, 124(sp)
+; RV32-NEXT:    addi s0, sp, 168
+; RV32-NEXT:    vand.vx v28, v9, t4
+; RV32-NEXT:    sw zero, 112(sp)
+; RV32-NEXT:    sw t4, 116(sp)
+; RV32-NEXT:    addi t6, sp, 160
+; RV32-NEXT:    vand.vx v29, v9, t3
+; RV32-NEXT:    sw zero, 104(sp)
+; RV32-NEXT:    sw t3, 108(sp)
+; RV32-NEXT:    addi t4, sp, 152
+; RV32-NEXT:    vand.vx v30, v9, t2
+; RV32-NEXT:    sw zero, 96(sp)
+; RV32-NEXT:    sw t2, 100(sp)
+; RV32-NEXT:    addi t3, sp, 144
+; RV32-NEXT:    vand.vx v31, v9, t1
+; RV32-NEXT:    sw zero, 88(sp)
+; RV32-NEXT:    sw t1, 92(sp)
+; RV32-NEXT:    addi t2, sp, 136
+; RV32-NEXT:    vand.vx v7, v9, t0
+; RV32-NEXT:    sw zero, 80(sp)
+; RV32-NEXT:    sw t0, 84(sp)
+; RV32-NEXT:    addi t1, sp, 128
+; RV32-NEXT:    vand.vx v6, v9, a7
+; RV32-NEXT:    sw zero, 72(sp)
+; RV32-NEXT:    sw a7, 76(sp)
+; RV32-NEXT:    addi t0, sp, 120
+; RV32-NEXT:    vand.vx v5, v9, a6
+; RV32-NEXT:    sw zero, 64(sp)
+; RV32-NEXT:    sw a6, 68(sp)
+; RV32-NEXT:    addi a7, sp, 112
+; RV32-NEXT:    vand.vx v4, v9, a5
+; RV32-NEXT:    sw zero, 56(sp)
+; RV32-NEXT:    sw a5, 60(sp)
+; RV32-NEXT:    addi a6, sp, 104
+; RV32-NEXT:    vand.vx v3, v9, a3
+; RV32-NEXT:    sw zero, 48(sp)
+; RV32-NEXT:    sw a3, 52(sp)
+; RV32-NEXT:    addi a5, sp, 96
+; RV32-NEXT:    vand.vx v2, v9, a4
+; RV32-NEXT:    sw zero, 40(sp)
+; RV32-NEXT:    sw a4, 44(sp)
+; RV32-NEXT:    addi a4, sp, 88
+; RV32-NEXT:    vand.vx v1, v9, a2
+; RV32-NEXT:    sw zero, 32(sp)
+; RV32-NEXT:    sw a2, 36(sp)
+; RV32-NEXT:    addi a3, sp, 80
+; RV32-NEXT:    sw zero, 24(sp)
+; RV32-NEXT:    lui a0, 262144
+; RV32-NEXT:    sw a0, 28(sp)
+; RV32-NEXT:    sw zero, 16(sp)
+; RV32-NEXT:    sw a1, 20(sp)
+; RV32-NEXT:    addi a2, sp, 72
+; RV32-NEXT:    vand.vx v0, v9, t5
+; RV32-NEXT:    addi a1, sp, 64
+; RV32-NEXT:    vmul.vv v13, v8, v13
+; RV32-NEXT:    vmul.vv v14, v8, v14
+; RV32-NEXT:    vxor.vi v14, v14, 0
+; RV32-NEXT:    vxor.vv v14, v14, v13
+; RV32-NEXT:    vlse64.v v13, (s11), zero
+; RV32-NEXT:    addi s11, sp, 56
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    vxor.vv v14, v14, v12
+; RV32-NEXT:    vlse64.v v12, (ra), zero
+; RV32-NEXT:    csrr t5, vlenb
+; RV32-NEXT:    slli t5, t5, 1
+; RV32-NEXT:    mv ra, t5
+; RV32-NEXT:    slli t5, t5, 1
+; RV32-NEXT:    add t5, t5, ra
+; RV32-NEXT:    add t5, sp, t5
+; RV32-NEXT:    addi t5, t5, 288
+; RV32-NEXT:    vs1r.v v12, (t5) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    addi ra, sp, 48
+; RV32-NEXT:    vmul.vv v11, v8, v11
+; RV32-NEXT:    vxor.vv v14, v14, v11
+; RV32-NEXT:    vlse64.v v11, (s10), zero
+; RV32-NEXT:    csrr t5, vlenb
+; RV32-NEXT:    slli s10, t5, 2
+; RV32-NEXT:    add t5, s10, t5
+; RV32-NEXT:    add t5, sp, t5
+; RV32-NEXT:    addi t5, t5, 288
+; RV32-NEXT:    vs1r.v v11, (t5) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    addi s10, sp, 40
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    vxor.vv v14, v14, v10
+; RV32-NEXT:    vlse64.v v10, (s9), zero
+; RV32-NEXT:    csrr t5, vlenb
+; RV32-NEXT:    slli t5, t5, 2
+; RV32-NEXT:    add t5, sp, t5
+; RV32-NEXT:    addi t5, t5, 288
+; RV32-NEXT:    vs1r.v v10, (t5) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    addi t5, sp, 32
+; RV32-NEXT:    vmul.vv v15, v8, v15
+; RV32-NEXT:    vxor.vv v15, v14, v15
+; RV32-NEXT:    vlse64.v v10, (s8), zero
+; RV32-NEXT:    csrr s8, vlenb
+; RV32-NEXT:    slli s9, s8, 1
+; RV32-NEXT:    add s8, s9, s8
+; RV32-NEXT:    add s8, sp, s8
+; RV32-NEXT:    addi s8, s8, 288
+; RV32-NEXT:    vs1r.v v10, (s8) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    addi s8, sp, 24
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    vxor.vv v16, v15, v16
+; RV32-NEXT:    vlse64.v v10, (s7), zero
+; RV32-NEXT:    csrr s7, vlenb
+; RV32-NEXT:    slli s7, s7, 1
+; RV32-NEXT:    add s7, sp, s7
+; RV32-NEXT:    addi s7, s7, 288
+; RV32-NEXT:    vs1r.v v10, (s7) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    addi s7, sp, 16
+; RV32-NEXT:    vmul.vv v17, v8, v17
+; RV32-NEXT:    vmul.vv v18, v8, v18
+; RV32-NEXT:    vmul.vv v19, v8, v19
+; RV32-NEXT:    vmul.vv v20, v8, v20
+; RV32-NEXT:    vmul.vv v21, v8, v21
+; RV32-NEXT:    vmul.vv v22, v8, v22
+; RV32-NEXT:    vmul.vv v23, v8, v23
+; RV32-NEXT:    vmul.vv v24, v8, v24
+; RV32-NEXT:    vmul.vv v25, v8, v25
+; RV32-NEXT:    vmul.vv v26, v8, v26
+; RV32-NEXT:    vmul.vv v27, v8, v27
+; RV32-NEXT:    vmul.vv v28, v8, v28
+; RV32-NEXT:    vmul.vv v29, v8, v29
+; RV32-NEXT:    vmul.vv v30, v8, v30
+; RV32-NEXT:    vmul.vv v31, v8, v31
+; RV32-NEXT:    vmul.vv v7, v8, v7
+; RV32-NEXT:    vmul.vv v6, v8, v6
+; RV32-NEXT:    vmul.vv v5, v8, v5
+; RV32-NEXT:    vmul.vv v4, v8, v4
+; RV32-NEXT:    vmul.vv v3, v8, v3
+; RV32-NEXT:    vmul.vv v2, v8, v2
+; RV32-NEXT:    vmul.vv v1, v8, v1
+; RV32-NEXT:    vmul.vv v0, v8, v0
+; RV32-NEXT:    vxor.vv v16, v16, v17
+; RV32-NEXT:    addi s9, sp, 224
+; RV32-NEXT:    vlse64.v v11, (s9), zero
+; RV32-NEXT:    vxor.vv v16, v16, v18
+; RV32-NEXT:    vlse64.v v10, (s6), zero
+; RV32-NEXT:    csrr s6, vlenb
+; RV32-NEXT:    add s6, sp, s6
+; RV32-NEXT:    addi s6, s6, 288
+; RV32-NEXT:    vs1r.v v10, (s6) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vxor.vv v16, v16, v19
+; RV32-NEXT:    vlse64.v v10, (s5), zero
+; RV32-NEXT:    addi s5, sp, 288
+; RV32-NEXT:    vs1r.v v10, (s5) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vxor.vv v16, v16, v20
+; RV32-NEXT:    vlse64.v v12, (s4), zero
+; RV32-NEXT:    vxor.vv v16, v16, v0
+; RV32-NEXT:    vlse64.v v0, (s3), zero
+; RV32-NEXT:    vxor.vv v16, v16, v21
+; RV32-NEXT:    vlse64.v v21, (s2), zero
+; RV32-NEXT:    vxor.vv v16, v16, v22
+; RV32-NEXT:    vlse64.v v22, (s1), zero
+; RV32-NEXT:    vxor.vv v16, v16, v23
+; RV32-NEXT:    vlse64.v v23, (s0), zero
+; RV32-NEXT:    vxor.vv v16, v16, v24
+; RV32-NEXT:    vlse64.v v24, (t6), zero
+; RV32-NEXT:    vxor.vv v16, v16, v25
+; RV32-NEXT:    vlse64.v v25, (t4), zero
+; RV32-NEXT:    vxor.vv v16, v16, v26
+; RV32-NEXT:    vlse64.v v26, (t3), zero
+; RV32-NEXT:    vxor.vv v16, v16, v27
+; RV32-NEXT:    vlse64.v v27, (t2), zero
+; RV32-NEXT:    vxor.vv v16, v16, v28
+; RV32-NEXT:    vlse64.v v28, (t1), zero
+; RV32-NEXT:    vxor.vv v16, v16, v29
+; RV32-NEXT:    vlse64.v v29, (t0), zero
+; RV32-NEXT:    vxor.vv v16, v16, v30
+; RV32-NEXT:    vlse64.v v30, (a7), zero
+; RV32-NEXT:    vxor.vv v16, v16, v31
+; RV32-NEXT:    vlse64.v v31, (a6), zero
+; RV32-NEXT:    vxor.vv v16, v16, v7
+; RV32-NEXT:    vlse64.v v7, (a5), zero
+; RV32-NEXT:    vxor.vv v16, v16, v6
+; RV32-NEXT:    vlse64.v v6, (a4), zero
+; RV32-NEXT:    vxor.vv v16, v16, v5
+; RV32-NEXT:    vlse64.v v5, (a3), zero
+; RV32-NEXT:    vxor.vv v16, v16, v4
+; RV32-NEXT:    vlse64.v v4, (a2), zero
+; RV32-NEXT:    vxor.vv v16, v16, v3
+; RV32-NEXT:    vlse64.v v3, (a1), zero
+; RV32-NEXT:    vxor.vv v16, v16, v2
+; RV32-NEXT:    vlse64.v v2, (s11), zero
+; RV32-NEXT:    vxor.vv v1, v16, v1
+; RV32-NEXT:    vlse64.v v10, (ra), zero
+; RV32-NEXT:    vand.vv v13, v9, v13
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 1
+; RV32-NEXT:    mv a2, a1
+; RV32-NEXT:    slli a1, a1, 1
+; RV32-NEXT:    add a1, a1, a2
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 288
+; RV32-NEXT:    vl1r.v v14, (a1) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vand.vv v14, v9, v14
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a2, a1, 2
+; RV32-NEXT:    add a1, a2, a1
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 288
+; RV32-NEXT:    vl1r.v v15, (a1) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vand.vv v15, v9, v15
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 2
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 288
+; RV32-NEXT:    vl1r.v v16, (a1) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vand.vv v16, v9, v16
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a2, a1, 1
+; RV32-NEXT:    add a1, a2, a1
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 288
+; RV32-NEXT:    vl1r.v v17, (a1) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vand.vv v17, v9, v17
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 1
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 288
+; RV32-NEXT:    vl1r.v v18, (a1) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vand.vv v18, v9, v18
+; RV32-NEXT:    vand.vv v19, v9, v11
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 288
+; RV32-NEXT:    vl1r.v v11, (a1) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vand.vv v20, v9, v11
+; RV32-NEXT:    addi a1, sp, 288
+; RV32-NEXT:    vl1r.v v11, (a1) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vand.vv v11, v9, v11
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 1
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 288
+; RV32-NEXT:    vs1r.v v11, (a1) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vv v11, v9, v12
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a2, a1, 1
+; RV32-NEXT:    add a1, a2, a1
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 288
+; RV32-NEXT:    vs1r.v v11, (a1) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vv v0, v9, v0
+; RV32-NEXT:    vand.vv v21, v9, v21
+; RV32-NEXT:    vand.vv v22, v9, v22
+; RV32-NEXT:    vand.vv v23, v9, v23
+; RV32-NEXT:    vand.vv v24, v9, v24
+; RV32-NEXT:    vand.vv v25, v9, v25
+; RV32-NEXT:    vand.vv v26, v9, v26
+; RV32-NEXT:    vand.vv v27, v9, v27
+; RV32-NEXT:    vand.vv v28, v9, v28
+; RV32-NEXT:    vand.vv v29, v9, v29
+; RV32-NEXT:    vand.vv v30, v9, v30
+; RV32-NEXT:    vand.vv v31, v9, v31
+; RV32-NEXT:    vand.vv v7, v9, v7
+; RV32-NEXT:    vand.vv v6, v9, v6
+; RV32-NEXT:    vand.vv v5, v9, v5
+; RV32-NEXT:    vand.vv v4, v9, v4
+; RV32-NEXT:    vand.vv v11, v9, v3
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 2
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 288
+; RV32-NEXT:    vs1r.v v11, (a1) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vv v2, v9, v2
+; RV32-NEXT:    vand.vv v10, v9, v10
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 1
+; RV32-NEXT:    mv a2, a1
+; RV32-NEXT:    slli a1, a1, 1
+; RV32-NEXT:    add a1, a1, a2
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 288
+; RV32-NEXT:    vs1r.v v10, (a1) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vlse64.v v10, (s10), zero
+; RV32-NEXT:    vlse64.v v3, (t5), zero
+; RV32-NEXT:    vlse64.v v11, (s8), zero
+; RV32-NEXT:    vlse64.v v12, (s7), zero
+; RV32-NEXT:    vand.vv v10, v9, v10
+; RV32-NEXT:    vand.vv v3, v9, v3
+; RV32-NEXT:    vand.vv v11, v9, v11
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a2, a1, 2
+; RV32-NEXT:    add a1, a2, a1
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 288
+; RV32-NEXT:    vs1r.v v11, (a1) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vv v12, v9, v12
+; RV32-NEXT:    vand.vx v9, v9, a0
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    vxor.vv v9, v1, v9
+; RV32-NEXT:    vmul.vv v11, v8, v13
+; RV32-NEXT:    vxor.vv v9, v9, v11
+; RV32-NEXT:    vmul.vv v11, v8, v14
+; RV32-NEXT:    vxor.vv v9, v9, v11
+; RV32-NEXT:    vmul.vv v11, v8, v15
+; RV32-NEXT:    vxor.vv v9, v9, v11
+; RV32-NEXT:    vmul.vv v11, v8, v16
+; RV32-NEXT:    vxor.vv v9, v9, v11
+; RV32-NEXT:    vmul.vv v11, v8, v17
+; RV32-NEXT:    vxor.vv v9, v9, v11
+; RV32-NEXT:    vmul.vv v11, v8, v18
+; RV32-NEXT:    vxor.vv v9, v9, v11
+; RV32-NEXT:    vmul.vv v11, v8, v19
+; RV32-NEXT:    vxor.vv v9, v9, v11
+; RV32-NEXT:    vmul.vv v11, v8, v20
+; RV32-NEXT:    vxor.vv v9, v9, v11
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v11, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vmul.vv v11, v8, v11
+; RV32-NEXT:    vxor.vv v9, v9, v11
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a1, a0, 1
+; RV32-NEXT:    add a0, a1, a0
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v11, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vmul.vv v11, v8, v11
+; RV32-NEXT:    vxor.vv v9, v9, v11
+; RV32-NEXT:    vmul.vv v11, v8, v0
+; RV32-NEXT:    vxor.vv v9, v9, v11
+; RV32-NEXT:    vmul.vv v11, v8, v21
+; RV32-NEXT:    vxor.vv v9, v9, v11
+; RV32-NEXT:    vmul.vv v11, v8, v22
+; RV32-NEXT:    vxor.vv v9, v9, v11
+; RV32-NEXT:    vmul.vv v11, v8, v23
+; RV32-NEXT:    vxor.vv v9, v9, v11
+; RV32-NEXT:    vmul.vv v11, v8, v24
+; RV32-NEXT:    vxor.vv v9, v9, v11
+; RV32-NEXT:    vmul.vv v11, v8, v25
+; RV32-NEXT:    vxor.vv v9, v9, v11
+; RV32-NEXT:    vmul.vv v11, v8, v26
+; RV32-NEXT:    vxor.vv v9, v9, v11
+; RV32-NEXT:    vmul.vv v11, v8, v27
+; RV32-NEXT:    vxor.vv v9, v9, v11
+; RV32-NEXT:    vmul.vv v11, v8, v28
+; RV32-NEXT:    vxor.vv v9, v9, v11
+; RV32-NEXT:    vmul.vv v11, v8, v29
+; RV32-NEXT:    vxor.vv v9, v9, v11
+; RV32-NEXT:    vmul.vv v11, v8, v30
+; RV32-NEXT:    vxor.vv v9, v9, v11
+; RV32-NEXT:    vmul.vv v11, v8, v31
+; RV32-NEXT:    vxor.vv v9, v9, v11
+; RV32-NEXT:    vmul.vv v11, v8, v7
+; RV32-NEXT:    vxor.vv v9, v9, v11
+; RV32-NEXT:    vmul.vv v11, v8, v6
+; RV32-NEXT:    vxor.vv v9, v9, v11
+; RV32-NEXT:    vmul.vv v11, v8, v5
+; RV32-NEXT:    vxor.vv v9, v9, v11
+; RV32-NEXT:    vmul.vv v11, v8, v4
+; RV32-NEXT:    vxor.vv v9, v9, v11
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v11, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vmul.vv v11, v8, v11
+; RV32-NEXT:    vxor.vv v9, v9, v11
+; RV32-NEXT:    vmul.vv v11, v8, v2
+; RV32-NEXT:    vxor.vv v9, v9, v11
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v11, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vmul.vv v11, v8, v11
+; RV32-NEXT:    vxor.vv v9, v9, v11
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    vxor.vv v9, v9, v10
+; RV32-NEXT:    vmul.vv v10, v8, v3
+; RV32-NEXT:    vxor.vv v9, v9, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a1, a0, 2
+; RV32-NEXT:    add a0, a1, a0
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v10, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    vxor.vv v9, v9, v10
+; RV32-NEXT:    vmul.vv v8, v8, v12
+; RV32-NEXT:    vxor.vv v8, v9, v8
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a1, a0, 3
+; RV32-NEXT:    sub a0, a1, a0
+; RV32-NEXT:    add sp, sp, a0
+; RV32-NEXT:    lw ra, 348(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s0, 344(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s1, 340(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s2, 336(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s3, 332(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s4, 328(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s5, 324(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s6, 320(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s7, 316(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s8, 312(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s9, 308(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s10, 304(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s11, 300(sp) # 4-byte Folded Reload
+; RV32-NEXT:    addi sp, sp, 352
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: clmul_v1i64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT:    vand.vi v10, v9, 2
+; RV64-NEXT:    vand.vi v11, v9, 1
+; RV64-NEXT:    vmul.vv v10, v8, v10
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v11, v10
+; RV64-NEXT:    vand.vi v11, v9, 4
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vi v11, v9, 8
+; RV64-NEXT:    li a0, 16
+; RV64-NEXT:    li a1, 32
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a0
+; RV64-NEXT:    li a0, 64
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    li a1, 128
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a0
+; RV64-NEXT:    li a0, 256
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    li a1, 512
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a0
+; RV64-NEXT:    li a2, 1024
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    li a0, 1
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a2
+; RV64-NEXT:    slli a1, a0, 11
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    lui a1, 1
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    lui a1, 2
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    lui a1, 4
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    lui a1, 8
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    lui a1, 16
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    lui a1, 32
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    lui a1, 64
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    lui a1, 128
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    lui a1, 256
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    lui a1, 512
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    lui a1, 1024
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    lui a1, 2048
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    lui a1, 4096
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    lui a1, 8192
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    lui a1, 16384
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    lui a1, 32768
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    lui a1, 65536
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    lui a1, 131072
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    lui a1, 262144
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    slli a1, a0, 31
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    slli a1, a0, 32
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    slli a1, a0, 33
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    slli a1, a0, 34
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    slli a1, a0, 35
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    slli a1, a0, 36
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    slli a1, a0, 37
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    slli a1, a0, 38
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    slli a1, a0, 39
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    slli a1, a0, 40
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    slli a1, a0, 41
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    slli a1, a0, 42
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    slli a1, a0, 43
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    slli a1, a0, 44
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    slli a1, a0, 45
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    slli a1, a0, 46
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    slli a1, a0, 47
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    slli a1, a0, 48
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    slli a1, a0, 49
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    slli a1, a0, 50
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    slli a1, a0, 51
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    slli a1, a0, 52
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    slli a1, a0, 53
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    slli a1, a0, 54
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    slli a1, a0, 55
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    slli a1, a0, 56
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    slli a1, a0, 57
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    slli a1, a0, 58
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    slli a1, a0, 59
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    slli a1, a0, 60
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    slli a1, a0, 61
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    li a1, -1
+; RV64-NEXT:    slli a0, a0, 62
+; RV64-NEXT:    slli a1, a1, 63
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a0
+; RV64-NEXT:    vand.vx v9, v9, a1
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vmul.vv v8, v8, v9
+; RV64-NEXT:    vxor.vv v8, v10, v8
+; RV64-NEXT:    ret
+  %a = call <1 x i64> @llvm.clmul.v1i64(<1 x i64> %x, <1 x i64> %y)
+  ret <1 x i64> %a
+}
+
+define <2 x i64> @clmul_v2i64(<2 x i64> %x, <2 x i64> %y) nounwind {
+; RV32-LABEL: clmul_v2i64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -352
+; RV32-NEXT:    sw ra, 348(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s0, 344(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s1, 340(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s2, 336(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s3, 332(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s4, 328(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s5, 324(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s6, 320(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s7, 316(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s8, 312(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s9, 308(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s10, 304(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s11, 300(sp) # 4-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a1, a0, 3
+; RV32-NEXT:    sub a0, a1, a0
+; RV32-NEXT:    sub sp, sp, a0
+; RV32-NEXT:    lui a1, 524288
+; RV32-NEXT:    li t5, 1
+; RV32-NEXT:    li a4, 2
+; RV32-NEXT:    li a2, 4
+; RV32-NEXT:    li s11, 8
+; RV32-NEXT:    li a0, 16
+; RV32-NEXT:    li ra, 32
+; RV32-NEXT:    li s10, 64
+; RV32-NEXT:    li s9, 128
+; RV32-NEXT:    li s8, 256
+; RV32-NEXT:    li s7, 512
+; RV32-NEXT:    li s1, 1024
+; RV32-NEXT:    lui s6, 1
+; RV32-NEXT:    lui s5, 2
+; RV32-NEXT:    lui s4, 4
+; RV32-NEXT:    lui s3, 8
+; RV32-NEXT:    lui s2, 16
+; RV32-NEXT:    lui s0, 32
+; RV32-NEXT:    lui t6, 64
+; RV32-NEXT:    lui t4, 128
+; RV32-NEXT:    lui t3, 256
+; RV32-NEXT:    lui t2, 512
+; RV32-NEXT:    lui t1, 1024
+; RV32-NEXT:    lui t0, 2048
+; RV32-NEXT:    lui a7, 4096
+; RV32-NEXT:    lui a6, 8192
+; RV32-NEXT:    lui a5, 16384
+; RV32-NEXT:    lui a3, 32768
+; RV32-NEXT:    sw a1, 272(sp)
+; RV32-NEXT:    sw zero, 276(sp)
+; RV32-NEXT:    sw zero, 264(sp)
+; RV32-NEXT:    sw t5, 268(sp)
+; RV32-NEXT:    sw zero, 256(sp)
+; RV32-NEXT:    sw a4, 260(sp)
+; RV32-NEXT:    lui a4, 65536
+; RV32-NEXT:    sw zero, 248(sp)
+; RV32-NEXT:    sw a2, 252(sp)
+; RV32-NEXT:    lui a2, 131072
+; RV32-NEXT:    sw zero, 240(sp)
+; RV32-NEXT:    sw s11, 244(sp)
+; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV32-NEXT:    vand.vi v13, v9, 2
+; RV32-NEXT:    vand.vi v14, v9, 1
+; RV32-NEXT:    vand.vi v12, v9, 4
+; RV32-NEXT:    vand.vi v11, v9, 8
+; RV32-NEXT:    sw zero, 232(sp)
+; RV32-NEXT:    sw a0, 236(sp)
+; RV32-NEXT:    vand.vx v10, v9, a0
+; RV32-NEXT:    addi s11, sp, 272
+; RV32-NEXT:    sw zero, 224(sp)
+; RV32-NEXT:    sw ra, 228(sp)
+; RV32-NEXT:    vand.vx v15, v9, ra
+; RV32-NEXT:    addi ra, sp, 264
+; RV32-NEXT:    sw zero, 216(sp)
+; RV32-NEXT:    sw s10, 220(sp)
+; RV32-NEXT:    vand.vx v16, v9, s10
+; RV32-NEXT:    addi s10, sp, 256
+; RV32-NEXT:    sw zero, 208(sp)
+; RV32-NEXT:    sw s9, 212(sp)
+; RV32-NEXT:    vand.vx v17, v9, s9
+; RV32-NEXT:    addi s9, sp, 248
+; RV32-NEXT:    sw zero, 200(sp)
+; RV32-NEXT:    sw s8, 204(sp)
+; RV32-NEXT:    vand.vx v18, v9, s8
+; RV32-NEXT:    addi s8, sp, 240
+; RV32-NEXT:    sw zero, 192(sp)
+; RV32-NEXT:    sw s7, 196(sp)
+; RV32-NEXT:    vand.vx v19, v9, s7
+; RV32-NEXT:    addi s7, sp, 232
+; RV32-NEXT:    sw zero, 184(sp)
+; RV32-NEXT:    sw s1, 188(sp)
+; RV32-NEXT:    vand.vx v20, v9, s1
+; RV32-NEXT:    slli t5, t5, 11
+; RV32-NEXT:    vand.vx v21, v9, s6
+; RV32-NEXT:    sw zero, 176(sp)
+; RV32-NEXT:    sw t5, 180(sp)
+; RV32-NEXT:    sw zero, 168(sp)
+; RV32-NEXT:    sw s6, 172(sp)
+; RV32-NEXT:    addi s6, sp, 216
+; RV32-NEXT:    vand.vx v22, v9, s5
+; RV32-NEXT:    sw zero, 160(sp)
+; RV32-NEXT:    sw s5, 164(sp)
+; RV32-NEXT:    addi s5, sp, 208
+; RV32-NEXT:    vand.vx v23, v9, s4
+; RV32-NEXT:    sw zero, 152(sp)
+; RV32-NEXT:    sw s4, 156(sp)
+; RV32-NEXT:    addi s4, sp, 200
+; RV32-NEXT:    vand.vx v24, v9, s3
+; RV32-NEXT:    sw zero, 144(sp)
+; RV32-NEXT:    sw s3, 148(sp)
+; RV32-NEXT:    addi s3, sp, 192
+; RV32-NEXT:    vand.vx v25, v9, s2
+; RV32-NEXT:    sw zero, 136(sp)
+; RV32-NEXT:    sw s2, 140(sp)
+; RV32-NEXT:    addi s2, sp, 184
+; RV32-NEXT:    vand.vx v26, v9, s0
+; RV32-NEXT:    sw zero, 128(sp)
+; RV32-NEXT:    sw s0, 132(sp)
+; RV32-NEXT:    addi s1, sp, 176
+; RV32-NEXT:    vand.vx v27, v9, t6
+; RV32-NEXT:    sw zero, 120(sp)
+; RV32-NEXT:    sw t6, 124(sp)
+; RV32-NEXT:    addi s0, sp, 168
+; RV32-NEXT:    vand.vx v28, v9, t4
+; RV32-NEXT:    sw zero, 112(sp)
+; RV32-NEXT:    sw t4, 116(sp)
+; RV32-NEXT:    addi t6, sp, 160
+; RV32-NEXT:    vand.vx v29, v9, t3
+; RV32-NEXT:    sw zero, 104(sp)
+; RV32-NEXT:    sw t3, 108(sp)
+; RV32-NEXT:    addi t4, sp, 152
+; RV32-NEXT:    vand.vx v30, v9, t2
+; RV32-NEXT:    sw zero, 96(sp)
+; RV32-NEXT:    sw t2, 100(sp)
+; RV32-NEXT:    addi t3, sp, 144
+; RV32-NEXT:    vand.vx v31, v9, t1
+; RV32-NEXT:    sw zero, 88(sp)
+; RV32-NEXT:    sw t1, 92(sp)
+; RV32-NEXT:    addi t2, sp, 136
+; RV32-NEXT:    vand.vx v7, v9, t0
+; RV32-NEXT:    sw zero, 80(sp)
+; RV32-NEXT:    sw t0, 84(sp)
+; RV32-NEXT:    addi t1, sp, 128
+; RV32-NEXT:    vand.vx v6, v9, a7
+; RV32-NEXT:    sw zero, 72(sp)
+; RV32-NEXT:    sw a7, 76(sp)
+; RV32-NEXT:    addi t0, sp, 120
+; RV32-NEXT:    vand.vx v5, v9, a6
+; RV32-NEXT:    sw zero, 64(sp)
+; RV32-NEXT:    sw a6, 68(sp)
+; RV32-NEXT:    addi a7, sp, 112
+; RV32-NEXT:    vand.vx v4, v9, a5
+; RV32-NEXT:    sw zero, 56(sp)
+; RV32-NEXT:    sw a5, 60(sp)
+; RV32-NEXT:    addi a6, sp, 104
+; RV32-NEXT:    vand.vx v3, v9, a3
+; RV32-NEXT:    sw zero, 48(sp)
+; RV32-NEXT:    sw a3, 52(sp)
+; RV32-NEXT:    addi a5, sp, 96
+; RV32-NEXT:    vand.vx v2, v9, a4
+; RV32-NEXT:    sw zero, 40(sp)
+; RV32-NEXT:    sw a4, 44(sp)
+; RV32-NEXT:    addi a4, sp, 88
+; RV32-NEXT:    vand.vx v1, v9, a2
+; RV32-NEXT:    sw zero, 32(sp)
+; RV32-NEXT:    sw a2, 36(sp)
+; RV32-NEXT:    addi a3, sp, 80
+; RV32-NEXT:    sw zero, 24(sp)
+; RV32-NEXT:    lui a0, 262144
+; RV32-NEXT:    sw a0, 28(sp)
+; RV32-NEXT:    sw zero, 16(sp)
+; RV32-NEXT:    sw a1, 20(sp)
+; RV32-NEXT:    addi a2, sp, 72
+; RV32-NEXT:    vand.vx v0, v9, t5
+; RV32-NEXT:    addi a1, sp, 64
+; RV32-NEXT:    vmul.vv v13, v8, v13
+; RV32-NEXT:    vmul.vv v14, v8, v14
+; RV32-NEXT:    vxor.vi v14, v14, 0
+; RV32-NEXT:    vxor.vv v14, v14, v13
+; RV32-NEXT:    vlse64.v v13, (s11), zero
+; RV32-NEXT:    addi s11, sp, 56
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    vxor.vv v14, v14, v12
+; RV32-NEXT:    vlse64.v v12, (ra), zero
+; RV32-NEXT:    csrr t5, vlenb
+; RV32-NEXT:    slli t5, t5, 1
+; RV32-NEXT:    mv ra, t5
+; RV32-NEXT:    slli t5, t5, 1
+; RV32-NEXT:    add t5, t5, ra
+; RV32-NEXT:    add t5, sp, t5
+; RV32-NEXT:    addi t5, t5, 288
+; RV32-NEXT:    vs1r.v v12, (t5) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    addi ra, sp, 48
+; RV32-NEXT:    vmul.vv v11, v8, v11
+; RV32-NEXT:    vxor.vv v14, v14, v11
+; RV32-NEXT:    vlse64.v v11, (s10), zero
+; RV32-NEXT:    csrr t5, vlenb
+; RV32-NEXT:    slli s10, t5, 2
+; RV32-NEXT:    add t5, s10, t5
+; RV32-NEXT:    add t5, sp, t5
+; RV32-NEXT:    addi t5, t5, 288
+; RV32-NEXT:    vs1r.v v11, (t5) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    addi s10, sp, 40
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    vxor.vv v14, v14, v10
+; RV32-NEXT:    vlse64.v v10, (s9), zero
+; RV32-NEXT:    csrr t5, vlenb
+; RV32-NEXT:    slli t5, t5, 2
+; RV32-NEXT:    add t5, sp, t5
+; RV32-NEXT:    addi t5, t5, 288
+; RV32-NEXT:    vs1r.v v10, (t5) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    addi t5, sp, 32
+; RV32-NEXT:    vmul.vv v15, v8, v15
+; RV32-NEXT:    vxor.vv v15, v14, v15
+; RV32-NEXT:    vlse64.v v10, (s8), zero
+; RV32-NEXT:    csrr s8, vlenb
+; RV32-NEXT:    slli s9, s8, 1
+; RV32-NEXT:    add s8, s9, s8
+; RV32-NEXT:    add s8, sp, s8
+; RV32-NEXT:    addi s8, s8, 288
+; RV32-NEXT:    vs1r.v v10, (s8) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    addi s8, sp, 24
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    vxor.vv v16, v15, v16
+; RV32-NEXT:    vlse64.v v10, (s7), zero
+; RV32-NEXT:    csrr s7, vlenb
+; RV32-NEXT:    slli s7, s7, 1
+; RV32-NEXT:    add s7, sp, s7
+; RV32-NEXT:    addi s7, s7, 288
+; RV32-NEXT:    vs1r.v v10, (s7) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    addi s7, sp, 16
+; RV32-NEXT:    vmul.vv v17, v8, v17
+; RV32-NEXT:    vmul.vv v18, v8, v18
+; RV32-NEXT:    vmul.vv v19, v8, v19
+; RV32-NEXT:    vmul.vv v20, v8, v20
+; RV32-NEXT:    vmul.vv v21, v8, v21
+; RV32-NEXT:    vmul.vv v22, v8, v22
+; RV32-NEXT:    vmul.vv v23, v8, v23
+; RV32-NEXT:    vmul.vv v24, v8, v24
+; RV32-NEXT:    vmul.vv v25, v8, v25
+; RV32-NEXT:    vmul.vv v26, v8, v26
+; RV32-NEXT:    vmul.vv v27, v8, v27
+; RV32-NEXT:    vmul.vv v28, v8, v28
+; RV32-NEXT:    vmul.vv v29, v8, v29
+; RV32-NEXT:    vmul.vv v30, v8, v30
+; RV32-NEXT:    vmul.vv v31, v8, v31
+; RV32-NEXT:    vmul.vv v7, v8, v7
+; RV32-NEXT:    vmul.vv v6, v8, v6
+; RV32-NEXT:    vmul.vv v5, v8, v5
+; RV32-NEXT:    vmul.vv v4, v8, v4
+; RV32-NEXT:    vmul.vv v3, v8, v3
+; RV32-NEXT:    vmul.vv v2, v8, v2
+; RV32-NEXT:    vmul.vv v1, v8, v1
+; RV32-NEXT:    vmul.vv v0, v8, v0
+; RV32-NEXT:    vxor.vv v16, v16, v17
+; RV32-NEXT:    addi s9, sp, 224
+; RV32-NEXT:    vlse64.v v11, (s9), zero
+; RV32-NEXT:    vxor.vv v16, v16, v18
+; RV32-NEXT:    vlse64.v v10, (s6), zero
+; RV32-NEXT:    csrr s6, vlenb
+; RV32-NEXT:    add s6, sp, s6
+; RV32-NEXT:    addi s6, s6, 288
+; RV32-NEXT:    vs1r.v v10, (s6) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vxor.vv v16, v16, v19
+; RV32-NEXT:    vlse64.v v10, (s5), zero
+; RV32-NEXT:    addi s5, sp, 288
+; RV32-NEXT:    vs1r.v v10, (s5) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vxor.vv v16, v16, v20
+; RV32-NEXT:    vlse64.v v12, (s4), zero
+; RV32-NEXT:    vxor.vv v16, v16, v0
+; RV32-NEXT:    vlse64.v v0, (s3), zero
+; RV32-NEXT:    vxor.vv v16, v16, v21
+; RV32-NEXT:    vlse64.v v21, (s2), zero
+; RV32-NEXT:    vxor.vv v16, v16, v22
+; RV32-NEXT:    vlse64.v v22, (s1), zero
+; RV32-NEXT:    vxor.vv v16, v16, v23
+; RV32-NEXT:    vlse64.v v23, (s0), zero
+; RV32-NEXT:    vxor.vv v16, v16, v24
+; RV32-NEXT:    vlse64.v v24, (t6), zero
+; RV32-NEXT:    vxor.vv v16, v16, v25
+; RV32-NEXT:    vlse64.v v25, (t4), zero
+; RV32-NEXT:    vxor.vv v16, v16, v26
+; RV32-NEXT:    vlse64.v v26, (t3), zero
+; RV32-NEXT:    vxor.vv v16, v16, v27
+; RV32-NEXT:    vlse64.v v27, (t2), zero
+; RV32-NEXT:    vxor.vv v16, v16, v28
+; RV32-NEXT:    vlse64.v v28, (t1), zero
+; RV32-NEXT:    vxor.vv v16, v16, v29
+; RV32-NEXT:    vlse64.v v29, (t0), zero
+; RV32-NEXT:    vxor.vv v16, v16, v30
+; RV32-NEXT:    vlse64.v v30, (a7), zero
+; RV32-NEXT:    vxor.vv v16, v16, v31
+; RV32-NEXT:    vlse64.v v31, (a6), zero
+; RV32-NEXT:    vxor.vv v16, v16, v7
+; RV32-NEXT:    vlse64.v v7, (a5), zero
+; RV32-NEXT:    vxor.vv v16, v16, v6
+; RV32-NEXT:    vlse64.v v6, (a4), zero
+; RV32-NEXT:    vxor.vv v16, v16, v5
+; RV32-NEXT:    vlse64.v v5, (a3), zero
+; RV32-NEXT:    vxor.vv v16, v16, v4
+; RV32-NEXT:    vlse64.v v4, (a2), zero
+; RV32-NEXT:    vxor.vv v16, v16, v3
+; RV32-NEXT:    vlse64.v v3, (a1), zero
+; RV32-NEXT:    vxor.vv v16, v16, v2
+; RV32-NEXT:    vlse64.v v2, (s11), zero
+; RV32-NEXT:    vxor.vv v1, v16, v1
+; RV32-NEXT:    vlse64.v v10, (ra), zero
+; RV32-NEXT:    vand.vv v13, v9, v13
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 1
+; RV32-NEXT:    mv a2, a1
+; RV32-NEXT:    slli a1, a1, 1
+; RV32-NEXT:    add a1, a1, a2
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 288
+; RV32-NEXT:    vl1r.v v14, (a1) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vand.vv v14, v9, v14
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a2, a1, 2
+; RV32-NEXT:    add a1, a2, a1
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 288
+; RV32-NEXT:    vl1r.v v15, (a1) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vand.vv v15, v9, v15
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 2
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 288
+; RV32-NEXT:    vl1r.v v16, (a1) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vand.vv v16, v9, v16
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a2, a1, 1
+; RV32-NEXT:    add a1, a2, a1
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 288
+; RV32-NEXT:    vl1r.v v17, (a1) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vand.vv v17, v9, v17
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 1
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 288
+; RV32-NEXT:    vl1r.v v18, (a1) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vand.vv v18, v9, v18
+; RV32-NEXT:    vand.vv v19, v9, v11
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 288
+; RV32-NEXT:    vl1r.v v11, (a1) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vand.vv v20, v9, v11
+; RV32-NEXT:    addi a1, sp, 288
+; RV32-NEXT:    vl1r.v v11, (a1) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vand.vv v11, v9, v11
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 1
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 288
+; RV32-NEXT:    vs1r.v v11, (a1) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vv v11, v9, v12
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a2, a1, 1
+; RV32-NEXT:    add a1, a2, a1
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 288
+; RV32-NEXT:    vs1r.v v11, (a1) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vv v0, v9, v0
+; RV32-NEXT:    vand.vv v21, v9, v21
+; RV32-NEXT:    vand.vv v22, v9, v22
+; RV32-NEXT:    vand.vv v23, v9, v23
+; RV32-NEXT:    vand.vv v24, v9, v24
+; RV32-NEXT:    vand.vv v25, v9, v25
+; RV32-NEXT:    vand.vv v26, v9, v26
+; RV32-NEXT:    vand.vv v27, v9, v27
+; RV32-NEXT:    vand.vv v28, v9, v28
+; RV32-NEXT:    vand.vv v29, v9, v29
+; RV32-NEXT:    vand.vv v30, v9, v30
+; RV32-NEXT:    vand.vv v31, v9, v31
+; RV32-NEXT:    vand.vv v7, v9, v7
+; RV32-NEXT:    vand.vv v6, v9, v6
+; RV32-NEXT:    vand.vv v5, v9, v5
+; RV32-NEXT:    vand.vv v4, v9, v4
+; RV32-NEXT:    vand.vv v11, v9, v3
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 2
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 288
+; RV32-NEXT:    vs1r.v v11, (a1) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vv v2, v9, v2
+; RV32-NEXT:    vand.vv v10, v9, v10
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 1
+; RV32-NEXT:    mv a2, a1
+; RV32-NEXT:    slli a1, a1, 1
+; RV32-NEXT:    add a1, a1, a2
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 288
+; RV32-NEXT:    vs1r.v v10, (a1) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vlse64.v v10, (s10), zero
+; RV32-NEXT:    vlse64.v v3, (t5), zero
+; RV32-NEXT:    vlse64.v v11, (s8), zero
+; RV32-NEXT:    vlse64.v v12, (s7), zero
+; RV32-NEXT:    vand.vv v10, v9, v10
+; RV32-NEXT:    vand.vv v3, v9, v3
+; RV32-NEXT:    vand.vv v11, v9, v11
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a2, a1, 2
+; RV32-NEXT:    add a1, a2, a1
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 288
+; RV32-NEXT:    vs1r.v v11, (a1) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vv v12, v9, v12
+; RV32-NEXT:    vand.vx v9, v9, a0
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    vxor.vv v9, v1, v9
+; RV32-NEXT:    vmul.vv v11, v8, v13
+; RV32-NEXT:    vxor.vv v9, v9, v11
+; RV32-NEXT:    vmul.vv v11, v8, v14
+; RV32-NEXT:    vxor.vv v9, v9, v11
+; RV32-NEXT:    vmul.vv v11, v8, v15
+; RV32-NEXT:    vxor.vv v9, v9, v11
+; RV32-NEXT:    vmul.vv v11, v8, v16
+; RV32-NEXT:    vxor.vv v9, v9, v11
+; RV32-NEXT:    vmul.vv v11, v8, v17
+; RV32-NEXT:    vxor.vv v9, v9, v11
+; RV32-NEXT:    vmul.vv v11, v8, v18
+; RV32-NEXT:    vxor.vv v9, v9, v11
+; RV32-NEXT:    vmul.vv v11, v8, v19
+; RV32-NEXT:    vxor.vv v9, v9, v11
+; RV32-NEXT:    vmul.vv v11, v8, v20
+; RV32-NEXT:    vxor.vv v9, v9, v11
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v11, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vmul.vv v11, v8, v11
+; RV32-NEXT:    vxor.vv v9, v9, v11
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a1, a0, 1
+; RV32-NEXT:    add a0, a1, a0
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v11, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vmul.vv v11, v8, v11
+; RV32-NEXT:    vxor.vv v9, v9, v11
+; RV32-NEXT:    vmul.vv v11, v8, v0
+; RV32-NEXT:    vxor.vv v9, v9, v11
+; RV32-NEXT:    vmul.vv v11, v8, v21
+; RV32-NEXT:    vxor.vv v9, v9, v11
+; RV32-NEXT:    vmul.vv v11, v8, v22
+; RV32-NEXT:    vxor.vv v9, v9, v11
+; RV32-NEXT:    vmul.vv v11, v8, v23
+; RV32-NEXT:    vxor.vv v9, v9, v11
+; RV32-NEXT:    vmul.vv v11, v8, v24
+; RV32-NEXT:    vxor.vv v9, v9, v11
+; RV32-NEXT:    vmul.vv v11, v8, v25
+; RV32-NEXT:    vxor.vv v9, v9, v11
+; RV32-NEXT:    vmul.vv v11, v8, v26
+; RV32-NEXT:    vxor.vv v9, v9, v11
+; RV32-NEXT:    vmul.vv v11, v8, v27
+; RV32-NEXT:    vxor.vv v9, v9, v11
+; RV32-NEXT:    vmul.vv v11, v8, v28
+; RV32-NEXT:    vxor.vv v9, v9, v11
+; RV32-NEXT:    vmul.vv v11, v8, v29
+; RV32-NEXT:    vxor.vv v9, v9, v11
+; RV32-NEXT:    vmul.vv v11, v8, v30
+; RV32-NEXT:    vxor.vv v9, v9, v11
+; RV32-NEXT:    vmul.vv v11, v8, v31
+; RV32-NEXT:    vxor.vv v9, v9, v11
+; RV32-NEXT:    vmul.vv v11, v8, v7
+; RV32-NEXT:    vxor.vv v9, v9, v11
+; RV32-NEXT:    vmul.vv v11, v8, v6
+; RV32-NEXT:    vxor.vv v9, v9, v11
+; RV32-NEXT:    vmul.vv v11, v8, v5
+; RV32-NEXT:    vxor.vv v9, v9, v11
+; RV32-NEXT:    vmul.vv v11, v8, v4
+; RV32-NEXT:    vxor.vv v9, v9, v11
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v11, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vmul.vv v11, v8, v11
+; RV32-NEXT:    vxor.vv v9, v9, v11
+; RV32-NEXT:    vmul.vv v11, v8, v2
+; RV32-NEXT:    vxor.vv v9, v9, v11
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v11, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vmul.vv v11, v8, v11
+; RV32-NEXT:    vxor.vv v9, v9, v11
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    vxor.vv v9, v9, v10
+; RV32-NEXT:    vmul.vv v10, v8, v3
+; RV32-NEXT:    vxor.vv v9, v9, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a1, a0, 2
+; RV32-NEXT:    add a0, a1, a0
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v10, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    vxor.vv v9, v9, v10
+; RV32-NEXT:    vmul.vv v8, v8, v12
+; RV32-NEXT:    vxor.vv v8, v9, v8
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a1, a0, 3
+; RV32-NEXT:    sub a0, a1, a0
+; RV32-NEXT:    add sp, sp, a0
+; RV32-NEXT:    lw ra, 348(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s0, 344(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s1, 340(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s2, 336(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s3, 332(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s4, 328(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s5, 324(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s6, 320(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s7, 316(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s8, 312(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s9, 308(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s10, 304(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s11, 300(sp) # 4-byte Folded Reload
+; RV32-NEXT:    addi sp, sp, 352
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: clmul_v2i64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV64-NEXT:    vand.vi v10, v9, 2
+; RV64-NEXT:    vand.vi v11, v9, 1
+; RV64-NEXT:    vmul.vv v10, v8, v10
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v11, v10
+; RV64-NEXT:    vand.vi v11, v9, 4
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vi v11, v9, 8
+; RV64-NEXT:    li a0, 16
+; RV64-NEXT:    li a1, 32
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a0
+; RV64-NEXT:    li a0, 64
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    li a1, 128
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a0
+; RV64-NEXT:    li a0, 256
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    li a1, 512
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a0
+; RV64-NEXT:    li a2, 1024
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    li a0, 1
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a2
+; RV64-NEXT:    slli a1, a0, 11
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    lui a1, 1
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    lui a1, 2
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    lui a1, 4
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    lui a1, 8
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    lui a1, 16
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    lui a1, 32
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    lui a1, 64
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    lui a1, 128
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    lui a1, 256
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    lui a1, 512
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    lui a1, 1024
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    lui a1, 2048
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    lui a1, 4096
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    lui a1, 8192
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    lui a1, 16384
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    lui a1, 32768
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    lui a1, 65536
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    lui a1, 131072
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    lui a1, 262144
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    slli a1, a0, 31
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    slli a1, a0, 32
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    slli a1, a0, 33
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    slli a1, a0, 34
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    slli a1, a0, 35
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    slli a1, a0, 36
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    slli a1, a0, 37
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    slli a1, a0, 38
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    slli a1, a0, 39
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    slli a1, a0, 40
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    slli a1, a0, 41
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    slli a1, a0, 42
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    slli a1, a0, 43
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    slli a1, a0, 44
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    slli a1, a0, 45
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    slli a1, a0, 46
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    slli a1, a0, 47
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    slli a1, a0, 48
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    slli a1, a0, 49
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    slli a1, a0, 50
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    slli a1, a0, 51
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    slli a1, a0, 52
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    slli a1, a0, 53
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    slli a1, a0, 54
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    slli a1, a0, 55
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    slli a1, a0, 56
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    slli a1, a0, 57
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    slli a1, a0, 58
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    slli a1, a0, 59
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    slli a1, a0, 60
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    slli a1, a0, 61
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a1
+; RV64-NEXT:    li a1, -1
+; RV64-NEXT:    slli a0, a0, 62
+; RV64-NEXT:    slli a1, a1, 63
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vand.vx v11, v9, a0
+; RV64-NEXT:    vand.vx v9, v9, a1
+; RV64-NEXT:    vmul.vv v11, v8, v11
+; RV64-NEXT:    vxor.vv v10, v10, v11
+; RV64-NEXT:    vmul.vv v8, v8, v9
+; RV64-NEXT:    vxor.vv v8, v10, v8
+; RV64-NEXT:    ret
+  %a = call <2 x i64> @llvm.clmul.v2i64(<2 x i64> %x, <2 x i64> %y)
+  ret <2 x i64> %a
+}
+
+define <4 x i64> @clmul_v4i64(<4 x i64> %x, <4 x i64> %y) nounwind {
+; RV32-LABEL: clmul_v4i64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -352
+; RV32-NEXT:    sw ra, 348(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s0, 344(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s1, 340(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s2, 336(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s3, 332(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s4, 328(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s5, 324(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s6, 320(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s7, 316(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s8, 312(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s9, 308(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s10, 304(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s11, 300(sp) # 4-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    sub sp, sp, a0
+; RV32-NEXT:    lui a1, 524288
+; RV32-NEXT:    li s2, 1
+; RV32-NEXT:    li a3, 2
+; RV32-NEXT:    li a2, 4
+; RV32-NEXT:    li s7, 8
+; RV32-NEXT:    li a0, 16
+; RV32-NEXT:    li s6, 32
+; RV32-NEXT:    li s5, 64
+; RV32-NEXT:    li s4, 128
+; RV32-NEXT:    li s1, 256
+; RV32-NEXT:    li s0, 512
+; RV32-NEXT:    li t5, 1024
+; RV32-NEXT:    lui ra, 1
+; RV32-NEXT:    lui s8, 2
+; RV32-NEXT:    lui s10, 4
+; RV32-NEXT:    lui s11, 8
+; RV32-NEXT:    lui s9, 16
+; RV32-NEXT:    lui s3, 32
+; RV32-NEXT:    lui t6, 64
+; RV32-NEXT:    lui t4, 128
+; RV32-NEXT:    lui t3, 256
+; RV32-NEXT:    lui t2, 512
+; RV32-NEXT:    lui t1, 1024
+; RV32-NEXT:    lui t0, 2048
+; RV32-NEXT:    lui a7, 4096
+; RV32-NEXT:    lui a6, 8192
+; RV32-NEXT:    lui a5, 16384
+; RV32-NEXT:    lui a4, 32768
+; RV32-NEXT:    sw a1, 272(sp)
+; RV32-NEXT:    sw zero, 276(sp)
+; RV32-NEXT:    sw zero, 264(sp)
+; RV32-NEXT:    sw s2, 268(sp)
+; RV32-NEXT:    sw zero, 256(sp)
+; RV32-NEXT:    sw a3, 260(sp)
+; RV32-NEXT:    lui a3, 65536
+; RV32-NEXT:    sw zero, 248(sp)
+; RV32-NEXT:    sw a2, 252(sp)
+; RV32-NEXT:    lui a2, 131072
+; RV32-NEXT:    sw zero, 240(sp)
+; RV32-NEXT:    sw s7, 244(sp)
+; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV32-NEXT:    vand.vi v28, v10, 2
+; RV32-NEXT:    vand.vi v20, v10, 1
+; RV32-NEXT:    vand.vi v30, v10, 4
+; RV32-NEXT:    vand.vi v14, v10, 8
+; RV32-NEXT:    sw zero, 232(sp)
+; RV32-NEXT:    sw a0, 236(sp)
+; RV32-NEXT:    vand.vx v12, v10, a0
+; RV32-NEXT:    addi s7, sp, 272
+; RV32-NEXT:    sw zero, 224(sp)
+; RV32-NEXT:    sw s6, 228(sp)
+; RV32-NEXT:    vand.vx v16, v10, s6
+; RV32-NEXT:    addi s6, sp, 264
+; RV32-NEXT:    sw zero, 216(sp)
+; RV32-NEXT:    sw s5, 220(sp)
+; RV32-NEXT:    vand.vx v18, v10, s5
+; RV32-NEXT:    addi s5, sp, 256
+; RV32-NEXT:    sw zero, 208(sp)
+; RV32-NEXT:    sw s4, 212(sp)
+; RV32-NEXT:    vand.vx v0, v10, s4
+; RV32-NEXT:    addi s4, sp, 248
+; RV32-NEXT:    sw zero, 200(sp)
+; RV32-NEXT:    sw s1, 204(sp)
+; RV32-NEXT:    vand.vx v6, v10, s1
+; RV32-NEXT:    addi s1, sp, 240
+; RV32-NEXT:    sw zero, 192(sp)
+; RV32-NEXT:    sw s0, 196(sp)
+; RV32-NEXT:    vand.vx v4, v10, s0
+; RV32-NEXT:    addi s0, sp, 232
+; RV32-NEXT:    sw zero, 184(sp)
+; RV32-NEXT:    sw t5, 188(sp)
+; RV32-NEXT:    vand.vx v2, v10, t5
+; RV32-NEXT:    slli s2, s2, 11
+; RV32-NEXT:    vand.vx v24, v10, ra
+; RV32-NEXT:    sw zero, 176(sp)
+; RV32-NEXT:    sw s2, 180(sp)
+; RV32-NEXT:    sw zero, 168(sp)
+; RV32-NEXT:    sw ra, 172(sp)
+; RV32-NEXT:    addi t5, sp, 216
+; RV32-NEXT:    vand.vx v26, v10, s8
+; RV32-NEXT:    sw zero, 160(sp)
+; RV32-NEXT:    sw s8, 164(sp)
+; RV32-NEXT:    addi s8, sp, 208
+; RV32-NEXT:    vand.vx v22, v10, s10
+; RV32-NEXT:    sw zero, 152(sp)
+; RV32-NEXT:    sw s10, 156(sp)
+; RV32-NEXT:    addi s10, sp, 200
+; RV32-NEXT:    vmul.vv v28, v8, v28
+; RV32-NEXT:    vmul.vv v20, v8, v20
+; RV32-NEXT:    vxor.vi v20, v20, 0
+; RV32-NEXT:    vxor.vv v20, v20, v28
+; RV32-NEXT:    vand.vx v28, v10, s11
+; RV32-NEXT:    sw zero, 144(sp)
+; RV32-NEXT:    sw s11, 148(sp)
+; RV32-NEXT:    addi s11, sp, 192
+; RV32-NEXT:    vmul.vv v30, v8, v30
+; RV32-NEXT:    vxor.vv v20, v20, v30
+; RV32-NEXT:    vand.vx v30, v10, s9
+; RV32-NEXT:    sw zero, 136(sp)
+; RV32-NEXT:    sw s9, 140(sp)
+; RV32-NEXT:    addi s9, sp, 184
+; RV32-NEXT:    vmul.vv v14, v8, v14
+; RV32-NEXT:    vxor.vv v14, v20, v14
+; RV32-NEXT:    vand.vx v20, v10, s3
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv ra, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, ra
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v20, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    sw zero, 128(sp)
+; RV32-NEXT:    sw s3, 132(sp)
+; RV32-NEXT:    addi s3, sp, 176
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    vxor.vv v12, v14, v12
+; RV32-NEXT:    vand.vx v14, v10, t6
+; RV32-NEXT:    sw zero, 120(sp)
+; RV32-NEXT:    sw t6, 124(sp)
+; RV32-NEXT:    addi t6, sp, 168
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    vxor.vv v12, v12, v16
+; RV32-NEXT:    vand.vx v16, v10, t4
+; RV32-NEXT:    sw zero, 112(sp)
+; RV32-NEXT:    sw t4, 116(sp)
+; RV32-NEXT:    addi t4, sp, 160
+; RV32-NEXT:    vmul.vv v18, v8, v18
+; RV32-NEXT:    vxor.vv v18, v12, v18
+; RV32-NEXT:    vand.vx v12, v10, t3
+; RV32-NEXT:    sw zero, 104(sp)
+; RV32-NEXT:    sw t3, 108(sp)
+; RV32-NEXT:    addi t3, sp, 152
+; RV32-NEXT:    vmul.vv v20, v8, v0
+; RV32-NEXT:    vxor.vv v18, v18, v20
+; RV32-NEXT:    vand.vx v20, v10, t2
+; RV32-NEXT:    sw zero, 96(sp)
+; RV32-NEXT:    sw t2, 100(sp)
+; RV32-NEXT:    addi t2, sp, 144
+; RV32-NEXT:    vmul.vv v6, v8, v6
+; RV32-NEXT:    vxor.vv v18, v18, v6
+; RV32-NEXT:    vand.vx v6, v10, t1
+; RV32-NEXT:    sw zero, 88(sp)
+; RV32-NEXT:    sw t1, 92(sp)
+; RV32-NEXT:    addi t1, sp, 136
+; RV32-NEXT:    vmul.vv v4, v8, v4
+; RV32-NEXT:    vxor.vv v18, v18, v4
+; RV32-NEXT:    vand.vx v4, v10, t0
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv ra, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add ra, ra, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, ra
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v4, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    sw zero, 80(sp)
+; RV32-NEXT:    sw t0, 84(sp)
+; RV32-NEXT:    addi t0, sp, 128
+; RV32-NEXT:    vmul.vv v2, v8, v2
+; RV32-NEXT:    vxor.vv v18, v18, v2
+; RV32-NEXT:    vand.vx v2, v10, s2
+; RV32-NEXT:    addi ra, sp, 120
+; RV32-NEXT:    vmul.vv v2, v8, v2
+; RV32-NEXT:    vxor.vv v18, v18, v2
+; RV32-NEXT:    vand.vx v2, v10, a7
+; RV32-NEXT:    sw zero, 72(sp)
+; RV32-NEXT:    sw a7, 76(sp)
+; RV32-NEXT:    addi a7, sp, 112
+; RV32-NEXT:    vmul.vv v24, v8, v24
+; RV32-NEXT:    vxor.vv v18, v18, v24
+; RV32-NEXT:    vand.vx v4, v10, a6
+; RV32-NEXT:    sw zero, 64(sp)
+; RV32-NEXT:    sw a6, 68(sp)
+; RV32-NEXT:    addi a6, sp, 104
+; RV32-NEXT:    vmul.vv v26, v8, v26
+; RV32-NEXT:    vxor.vv v18, v18, v26
+; RV32-NEXT:    vand.vx v26, v10, a5
+; RV32-NEXT:    sw zero, 56(sp)
+; RV32-NEXT:    sw a5, 60(sp)
+; RV32-NEXT:    addi a5, sp, 96
+; RV32-NEXT:    vmul.vv v22, v8, v22
+; RV32-NEXT:    vxor.vv v18, v18, v22
+; RV32-NEXT:    vand.vx v24, v10, a4
+; RV32-NEXT:    sw zero, 48(sp)
+; RV32-NEXT:    sw a4, 52(sp)
+; RV32-NEXT:    addi a4, sp, 88
+; RV32-NEXT:    vmul.vv v28, v8, v28
+; RV32-NEXT:    vxor.vv v18, v18, v28
+; RV32-NEXT:    vand.vx v28, v10, a3
+; RV32-NEXT:    sw zero, 40(sp)
+; RV32-NEXT:    sw a3, 44(sp)
+; RV32-NEXT:    addi a3, sp, 80
+; RV32-NEXT:    vmul.vv v30, v8, v30
+; RV32-NEXT:    vxor.vv v18, v18, v30
+; RV32-NEXT:    vand.vx v30, v10, a2
+; RV32-NEXT:    sw zero, 32(sp)
+; RV32-NEXT:    sw a2, 36(sp)
+; RV32-NEXT:    addi a2, sp, 72
+; RV32-NEXT:    sw zero, 24(sp)
+; RV32-NEXT:    lui a0, 262144
+; RV32-NEXT:    sw a0, 28(sp)
+; RV32-NEXT:    sw zero, 16(sp)
+; RV32-NEXT:    sw a1, 20(sp)
+; RV32-NEXT:    addi a1, sp, 64
+; RV32-NEXT:    sw a6, 4(sp) # 4-byte Folded Spill
+; RV32-NEXT:    csrr a6, vlenb
+; RV32-NEXT:    slli a6, a6, 3
+; RV32-NEXT:    mv s2, a6
+; RV32-NEXT:    slli a6, a6, 2
+; RV32-NEXT:    add a6, a6, s2
+; RV32-NEXT:    add a6, sp, a6
+; RV32-NEXT:    addi a6, a6, 288
+; RV32-NEXT:    vl2r.v v22, (a6) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v0, v8, v22
+; RV32-NEXT:    vxor.vv v0, v18, v0
+; RV32-NEXT:    vlse64.v v18, (s7), zero
+; RV32-NEXT:    csrr a6, vlenb
+; RV32-NEXT:    slli a6, a6, 3
+; RV32-NEXT:    mv s2, a6
+; RV32-NEXT:    slli a6, a6, 2
+; RV32-NEXT:    add a6, a6, s2
+; RV32-NEXT:    add a6, sp, a6
+; RV32-NEXT:    addi a6, a6, 288
+; RV32-NEXT:    vs2r.v v18, (a6) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    addi s7, sp, 56
+; RV32-NEXT:    vmul.vv v14, v8, v14
+; RV32-NEXT:    vxor.vv v14, v0, v14
+; RV32-NEXT:    vlse64.v v18, (s6), zero
+; RV32-NEXT:    csrr a6, vlenb
+; RV32-NEXT:    slli a6, a6, 2
+; RV32-NEXT:    mv s2, a6
+; RV32-NEXT:    slli a6, a6, 3
+; RV32-NEXT:    add a6, a6, s2
+; RV32-NEXT:    add a6, sp, a6
+; RV32-NEXT:    addi a6, a6, 288
+; RV32-NEXT:    vs2r.v v18, (a6) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    addi s2, sp, 48
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    vxor.vv v14, v14, v16
+; RV32-NEXT:    vlse64.v v16, (s5), zero
+; RV32-NEXT:    csrr a6, vlenb
+; RV32-NEXT:    slli a6, a6, 1
+; RV32-NEXT:    mv s5, a6
+; RV32-NEXT:    slli a6, a6, 4
+; RV32-NEXT:    add a6, a6, s5
+; RV32-NEXT:    add a6, sp, a6
+; RV32-NEXT:    addi a6, a6, 288
+; RV32-NEXT:    vs2r.v v16, (a6) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    addi s5, sp, 40
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    vxor.vv v12, v14, v12
+; RV32-NEXT:    vlse64.v v14, (s4), zero
+; RV32-NEXT:    csrr a6, vlenb
+; RV32-NEXT:    slli a6, a6, 5
+; RV32-NEXT:    add a6, sp, a6
+; RV32-NEXT:    addi a6, a6, 288
+; RV32-NEXT:    vs2r.v v14, (a6) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    addi s4, sp, 32
+; RV32-NEXT:    vmul.vv v20, v8, v20
+; RV32-NEXT:    vxor.vv v20, v12, v20
+; RV32-NEXT:    vlse64.v v12, (s1), zero
+; RV32-NEXT:    csrr a6, vlenb
+; RV32-NEXT:    slli a6, a6, 1
+; RV32-NEXT:    mv s1, a6
+; RV32-NEXT:    slli a6, a6, 1
+; RV32-NEXT:    add s1, s1, a6
+; RV32-NEXT:    slli a6, a6, 1
+; RV32-NEXT:    add s1, s1, a6
+; RV32-NEXT:    slli a6, a6, 1
+; RV32-NEXT:    add a6, a6, s1
+; RV32-NEXT:    add a6, sp, a6
+; RV32-NEXT:    addi a6, a6, 288
+; RV32-NEXT:    vs2r.v v12, (a6) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    addi s1, sp, 24
+; RV32-NEXT:    vmul.vv v6, v8, v6
+; RV32-NEXT:    vxor.vv v20, v20, v6
+; RV32-NEXT:    vlse64.v v12, (s0), zero
+; RV32-NEXT:    csrr a6, vlenb
+; RV32-NEXT:    slli a6, a6, 2
+; RV32-NEXT:    mv s0, a6
+; RV32-NEXT:    slli a6, a6, 1
+; RV32-NEXT:    add s0, s0, a6
+; RV32-NEXT:    slli a6, a6, 1
+; RV32-NEXT:    add a6, a6, s0
+; RV32-NEXT:    add a6, sp, a6
+; RV32-NEXT:    addi a6, a6, 288
+; RV32-NEXT:    vs2r.v v12, (a6) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    addi s0, sp, 16
+; RV32-NEXT:    csrr s6, vlenb
+; RV32-NEXT:    slli s6, s6, 1
+; RV32-NEXT:    mv a6, s6
+; RV32-NEXT:    slli s6, s6, 1
+; RV32-NEXT:    add a6, a6, s6
+; RV32-NEXT:    slli s6, s6, 3
+; RV32-NEXT:    add s6, s6, a6
+; RV32-NEXT:    lw a6, 4(sp) # 4-byte Folded Reload
+; RV32-NEXT:    add s6, sp, s6
+; RV32-NEXT:    addi s6, s6, 288
+; RV32-NEXT:    vl2r.v v12, (s6) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v6, v8, v12
+; RV32-NEXT:    vmul.vv v2, v8, v2
+; RV32-NEXT:    vmul.vv v4, v8, v4
+; RV32-NEXT:    vmul.vv v26, v8, v26
+; RV32-NEXT:    vmul.vv v24, v8, v24
+; RV32-NEXT:    vmul.vv v28, v8, v28
+; RV32-NEXT:    vmul.vv v30, v8, v30
+; RV32-NEXT:    vxor.vv v20, v20, v6
+; RV32-NEXT:    addi s6, sp, 224
+; RV32-NEXT:    vlse64.v v0, (s6), zero
+; RV32-NEXT:    vxor.vv v20, v20, v2
+; RV32-NEXT:    vlse64.v v6, (t5), zero
+; RV32-NEXT:    vxor.vv v20, v20, v4
+; RV32-NEXT:    vlse64.v v22, (s8), zero
+; RV32-NEXT:    vxor.vv v20, v20, v26
+; RV32-NEXT:    vlse64.v v18, (s10), zero
+; RV32-NEXT:    vxor.vv v20, v20, v24
+; RV32-NEXT:    vlse64.v v16, (s11), zero
+; RV32-NEXT:    vxor.vv v20, v20, v28
+; RV32-NEXT:    vlse64.v v14, (s9), zero
+; RV32-NEXT:    vxor.vv v2, v20, v30
+; RV32-NEXT:    vlse64.v v12, (s3), zero
+; RV32-NEXT:    csrr t5, vlenb
+; RV32-NEXT:    slli t5, t5, 3
+; RV32-NEXT:    mv s3, t5
+; RV32-NEXT:    slli t5, t5, 2
+; RV32-NEXT:    add t5, t5, s3
+; RV32-NEXT:    add t5, sp, t5
+; RV32-NEXT:    addi t5, t5, 288
+; RV32-NEXT:    vl2r.v v20, (t5) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vand.vv v26, v10, v20
+; RV32-NEXT:    csrr t5, vlenb
+; RV32-NEXT:    slli t5, t5, 2
+; RV32-NEXT:    mv s3, t5
+; RV32-NEXT:    slli t5, t5, 3
+; RV32-NEXT:    add t5, t5, s3
+; RV32-NEXT:    add t5, sp, t5
+; RV32-NEXT:    addi t5, t5, 288
+; RV32-NEXT:    vl2r.v v20, (t5) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vand.vv v4, v10, v20
+; RV32-NEXT:    csrr t5, vlenb
+; RV32-NEXT:    slli t5, t5, 1
+; RV32-NEXT:    mv s3, t5
+; RV32-NEXT:    slli t5, t5, 4
+; RV32-NEXT:    add t5, t5, s3
+; RV32-NEXT:    add t5, sp, t5
+; RV32-NEXT:    addi t5, t5, 288
+; RV32-NEXT:    vl2r.v v20, (t5) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vand.vv v30, v10, v20
+; RV32-NEXT:    csrr t5, vlenb
+; RV32-NEXT:    slli t5, t5, 5
+; RV32-NEXT:    add t5, sp, t5
+; RV32-NEXT:    addi t5, t5, 288
+; RV32-NEXT:    vl2r.v v20, (t5) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vand.vv v20, v10, v20
+; RV32-NEXT:    csrr t5, vlenb
+; RV32-NEXT:    slli t5, t5, 1
+; RV32-NEXT:    mv s3, t5
+; RV32-NEXT:    slli t5, t5, 1
+; RV32-NEXT:    add s3, s3, t5
+; RV32-NEXT:    slli t5, t5, 1
+; RV32-NEXT:    add s3, s3, t5
+; RV32-NEXT:    slli t5, t5, 1
+; RV32-NEXT:    add t5, t5, s3
+; RV32-NEXT:    add t5, sp, t5
+; RV32-NEXT:    addi t5, t5, 288
+; RV32-NEXT:    vl2r.v v24, (t5) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vand.vv v28, v10, v24
+; RV32-NEXT:    csrr t5, vlenb
+; RV32-NEXT:    slli t5, t5, 2
+; RV32-NEXT:    mv s3, t5
+; RV32-NEXT:    slli t5, t5, 1
+; RV32-NEXT:    add s3, s3, t5
+; RV32-NEXT:    slli t5, t5, 1
+; RV32-NEXT:    add t5, t5, s3
+; RV32-NEXT:    add t5, sp, t5
+; RV32-NEXT:    addi t5, t5, 288
+; RV32-NEXT:    vl2r.v v24, (t5) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vand.vv v24, v10, v24
+; RV32-NEXT:    vand.vv v0, v10, v0
+; RV32-NEXT:    vand.vv v6, v10, v6
+; RV32-NEXT:    vand.vv v22, v10, v22
+; RV32-NEXT:    vand.vv v18, v10, v18
+; RV32-NEXT:    csrr t5, vlenb
+; RV32-NEXT:    slli t5, t5, 3
+; RV32-NEXT:    add t5, sp, t5
+; RV32-NEXT:    addi t5, t5, 288
+; RV32-NEXT:    vs2r.v v18, (t5) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vv v16, v10, v16
+; RV32-NEXT:    csrr t5, vlenb
+; RV32-NEXT:    slli t5, t5, 2
+; RV32-NEXT:    mv s3, t5
+; RV32-NEXT:    slli t5, t5, 2
+; RV32-NEXT:    add t5, t5, s3
+; RV32-NEXT:    add t5, sp, t5
+; RV32-NEXT:    addi t5, t5, 288
+; RV32-NEXT:    vs2r.v v16, (t5) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vv v14, v10, v14
+; RV32-NEXT:    csrr t5, vlenb
+; RV32-NEXT:    slli t5, t5, 1
+; RV32-NEXT:    mv s3, t5
+; RV32-NEXT:    slli t5, t5, 1
+; RV32-NEXT:    add s3, s3, t5
+; RV32-NEXT:    slli t5, t5, 1
+; RV32-NEXT:    add s3, s3, t5
+; RV32-NEXT:    slli t5, t5, 1
+; RV32-NEXT:    add t5, t5, s3
+; RV32-NEXT:    add t5, sp, t5
+; RV32-NEXT:    addi t5, t5, 288
+; RV32-NEXT:    vs2r.v v14, (t5) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vv v12, v10, v12
+; RV32-NEXT:    csrr t5, vlenb
+; RV32-NEXT:    slli t5, t5, 3
+; RV32-NEXT:    mv s3, t5
+; RV32-NEXT:    slli t5, t5, 2
+; RV32-NEXT:    add t5, t5, s3
+; RV32-NEXT:    add t5, sp, t5
+; RV32-NEXT:    addi t5, t5, 288
+; RV32-NEXT:    vs2r.v v12, (t5) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vlse64.v v12, (t6), zero
+; RV32-NEXT:    vlse64.v v14, (t4), zero
+; RV32-NEXT:    vlse64.v v16, (t3), zero
+; RV32-NEXT:    vlse64.v v18, (t2), zero
+; RV32-NEXT:    vand.vv v12, v10, v12
+; RV32-NEXT:    csrr t2, vlenb
+; RV32-NEXT:    slli t2, t2, 1
+; RV32-NEXT:    mv t3, t2
+; RV32-NEXT:    slli t2, t2, 1
+; RV32-NEXT:    add t2, t2, t3
+; RV32-NEXT:    add t2, sp, t2
+; RV32-NEXT:    addi t2, t2, 288
+; RV32-NEXT:    vs2r.v v12, (t2) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vv v12, v10, v14
+; RV32-NEXT:    csrr t2, vlenb
+; RV32-NEXT:    slli t2, t2, 1
+; RV32-NEXT:    mv t3, t2
+; RV32-NEXT:    slli t2, t2, 3
+; RV32-NEXT:    add t2, t2, t3
+; RV32-NEXT:    add t2, sp, t2
+; RV32-NEXT:    addi t2, t2, 288
+; RV32-NEXT:    vs2r.v v12, (t2) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vv v12, v10, v16
+; RV32-NEXT:    csrr t2, vlenb
+; RV32-NEXT:    slli t2, t2, 2
+; RV32-NEXT:    mv t3, t2
+; RV32-NEXT:    slli t2, t2, 1
+; RV32-NEXT:    add t3, t3, t2
+; RV32-NEXT:    slli t2, t2, 1
+; RV32-NEXT:    add t2, t2, t3
+; RV32-NEXT:    add t2, sp, t2
+; RV32-NEXT:    addi t2, t2, 288
+; RV32-NEXT:    vs2r.v v12, (t2) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vv v12, v10, v18
+; RV32-NEXT:    csrr t2, vlenb
+; RV32-NEXT:    slli t2, t2, 1
+; RV32-NEXT:    mv t3, t2
+; RV32-NEXT:    slli t2, t2, 1
+; RV32-NEXT:    add t3, t3, t2
+; RV32-NEXT:    slli t2, t2, 3
+; RV32-NEXT:    add t2, t2, t3
+; RV32-NEXT:    add t2, sp, t2
+; RV32-NEXT:    addi t2, t2, 288
+; RV32-NEXT:    vs2r.v v12, (t2) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vlse64.v v12, (t1), zero
+; RV32-NEXT:    vlse64.v v14, (t0), zero
+; RV32-NEXT:    vlse64.v v16, (ra), zero
+; RV32-NEXT:    vlse64.v v18, (a7), zero
+; RV32-NEXT:    vand.vv v12, v10, v12
+; RV32-NEXT:    csrr a7, vlenb
+; RV32-NEXT:    slli a7, a7, 2
+; RV32-NEXT:    add a7, sp, a7
+; RV32-NEXT:    addi a7, a7, 288
+; RV32-NEXT:    vs2r.v v12, (a7) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vv v12, v10, v14
+; RV32-NEXT:    csrr a7, vlenb
+; RV32-NEXT:    slli a7, a7, 4
+; RV32-NEXT:    add a7, sp, a7
+; RV32-NEXT:    addi a7, a7, 288
+; RV32-NEXT:    vs2r.v v12, (a7) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vv v12, v10, v16
+; RV32-NEXT:    csrr a7, vlenb
+; RV32-NEXT:    slli a7, a7, 1
+; RV32-NEXT:    mv t0, a7
+; RV32-NEXT:    slli a7, a7, 2
+; RV32-NEXT:    add t0, t0, a7
+; RV32-NEXT:    slli a7, a7, 1
+; RV32-NEXT:    add a7, a7, t0
+; RV32-NEXT:    add a7, sp, a7
+; RV32-NEXT:    addi a7, a7, 288
+; RV32-NEXT:    vs2r.v v12, (a7) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vv v12, v10, v18
+; RV32-NEXT:    csrr a7, vlenb
+; RV32-NEXT:    slli a7, a7, 2
+; RV32-NEXT:    mv t0, a7
+; RV32-NEXT:    slli a7, a7, 3
+; RV32-NEXT:    add a7, a7, t0
+; RV32-NEXT:    add a7, sp, a7
+; RV32-NEXT:    addi a7, a7, 288
+; RV32-NEXT:    vs2r.v v12, (a7) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vlse64.v v12, (a6), zero
+; RV32-NEXT:    vlse64.v v14, (a5), zero
+; RV32-NEXT:    vlse64.v v16, (a4), zero
+; RV32-NEXT:    vlse64.v v18, (a3), zero
+; RV32-NEXT:    vand.vv v12, v10, v12
+; RV32-NEXT:    csrr a3, vlenb
+; RV32-NEXT:    slli a3, a3, 1
+; RV32-NEXT:    add a3, sp, a3
+; RV32-NEXT:    addi a3, a3, 288
+; RV32-NEXT:    vs2r.v v12, (a3) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vv v12, v10, v14
+; RV32-NEXT:    csrr a3, vlenb
+; RV32-NEXT:    slli a3, a3, 1
+; RV32-NEXT:    mv a4, a3
+; RV32-NEXT:    slli a3, a3, 1
+; RV32-NEXT:    add a4, a4, a3
+; RV32-NEXT:    slli a3, a3, 1
+; RV32-NEXT:    add a3, a3, a4
+; RV32-NEXT:    add a3, sp, a3
+; RV32-NEXT:    addi a3, a3, 288
+; RV32-NEXT:    vs2r.v v12, (a3) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vv v12, v10, v16
+; RV32-NEXT:    csrr a3, vlenb
+; RV32-NEXT:    slli a3, a3, 3
+; RV32-NEXT:    mv a4, a3
+; RV32-NEXT:    slli a3, a3, 1
+; RV32-NEXT:    add a3, a3, a4
+; RV32-NEXT:    add a3, sp, a3
+; RV32-NEXT:    addi a3, a3, 288
+; RV32-NEXT:    vs2r.v v12, (a3) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vv v12, v10, v18
+; RV32-NEXT:    csrr a3, vlenb
+; RV32-NEXT:    slli a3, a3, 1
+; RV32-NEXT:    mv a4, a3
+; RV32-NEXT:    slli a3, a3, 4
+; RV32-NEXT:    add a3, a3, a4
+; RV32-NEXT:    add a3, sp, a3
+; RV32-NEXT:    addi a3, a3, 288
+; RV32-NEXT:    vs2r.v v12, (a3) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vlse64.v v12, (a2), zero
+; RV32-NEXT:    vlse64.v v14, (a1), zero
+; RV32-NEXT:    vlse64.v v16, (s7), zero
+; RV32-NEXT:    vlse64.v v18, (s2), zero
+; RV32-NEXT:    vand.vv v12, v10, v12
+; RV32-NEXT:    addi a1, sp, 288
+; RV32-NEXT:    vs2r.v v12, (a1) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vv v12, v10, v14
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 2
+; RV32-NEXT:    mv a2, a1
+; RV32-NEXT:    slli a1, a1, 1
+; RV32-NEXT:    add a1, a1, a2
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 288
+; RV32-NEXT:    vs2r.v v12, (a1) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vv v12, v10, v16
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 1
+; RV32-NEXT:    mv a2, a1
+; RV32-NEXT:    slli a1, a1, 1
+; RV32-NEXT:    add a2, a2, a1
+; RV32-NEXT:    slli a1, a1, 2
+; RV32-NEXT:    add a1, a1, a2
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 288
+; RV32-NEXT:    vs2r.v v12, (a1) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vv v12, v10, v18
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 5
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 288
+; RV32-NEXT:    vs2r.v v12, (a1) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vlse64.v v14, (s5), zero
+; RV32-NEXT:    vlse64.v v16, (s4), zero
+; RV32-NEXT:    vlse64.v v18, (s1), zero
+; RV32-NEXT:    vlse64.v v12, (s0), zero
+; RV32-NEXT:    vand.vv v14, v10, v14
+; RV32-NEXT:    vand.vv v16, v10, v16
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 1
+; RV32-NEXT:    mv a2, a1
+; RV32-NEXT:    slli a1, a1, 2
+; RV32-NEXT:    add a1, a1, a2
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 288
+; RV32-NEXT:    vs2r.v v16, (a1) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vv v18, v10, v18
+; RV32-NEXT:    vand.vv v16, v10, v12
+; RV32-NEXT:    vand.vx v10, v10, a0
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    vxor.vv v10, v2, v10
+; RV32-NEXT:    vmul.vv v12, v8, v26
+; RV32-NEXT:    vxor.vv v10, v10, v12
+; RV32-NEXT:    vmul.vv v12, v8, v4
+; RV32-NEXT:    vxor.vv v10, v10, v12
+; RV32-NEXT:    vmul.vv v12, v8, v30
+; RV32-NEXT:    vxor.vv v10, v10, v12
+; RV32-NEXT:    vmul.vv v12, v8, v20
+; RV32-NEXT:    vxor.vv v10, v10, v12
+; RV32-NEXT:    vmul.vv v12, v8, v28
+; RV32-NEXT:    vxor.vv v10, v10, v12
+; RV32-NEXT:    vmul.vv v12, v8, v24
+; RV32-NEXT:    vxor.vv v10, v10, v12
+; RV32-NEXT:    vmul.vv v12, v8, v0
+; RV32-NEXT:    vxor.vv v10, v10, v12
+; RV32-NEXT:    vmul.vv v12, v8, v6
+; RV32-NEXT:    vxor.vv v10, v10, v12
+; RV32-NEXT:    vmul.vv v12, v8, v22
+; RV32-NEXT:    vxor.vv v10, v10, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v12, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    vxor.vv v10, v10, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v12, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    vxor.vv v10, v10, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v12, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    vxor.vv v10, v10, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v12, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    vxor.vv v10, v10, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v12, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    vxor.vv v10, v10, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v12, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    vxor.vv v10, v10, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v12, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    vxor.vv v10, v10, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v12, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    vxor.vv v10, v10, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v12, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    vxor.vv v10, v10, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v12, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    vxor.vv v10, v10, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v12, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    vxor.vv v10, v10, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v12, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    vxor.vv v10, v10, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v12, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    vxor.vv v10, v10, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v12, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    vxor.vv v10, v10, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v12, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    vxor.vv v10, v10, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v12, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    vxor.vv v10, v10, v12
+; RV32-NEXT:    addi a0, sp, 288
+; RV32-NEXT:    vl2r.v v12, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    vxor.vv v10, v10, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v12, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    vxor.vv v10, v10, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v12, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    vxor.vv v10, v10, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v12, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    vxor.vv v10, v10, v12
+; RV32-NEXT:    vmul.vv v12, v8, v14
+; RV32-NEXT:    vxor.vv v10, v10, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v12, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    vxor.vv v10, v10, v12
+; RV32-NEXT:    vmul.vv v12, v8, v18
+; RV32-NEXT:    vxor.vv v10, v10, v12
+; RV32-NEXT:    vmul.vv v8, v8, v16
+; RV32-NEXT:    vxor.vv v8, v10, v8
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add sp, sp, a0
+; RV32-NEXT:    lw ra, 348(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s0, 344(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s1, 340(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s2, 336(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s3, 332(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s4, 328(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s5, 324(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s6, 320(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s7, 316(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s8, 312(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s9, 308(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s10, 304(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s11, 300(sp) # 4-byte Folded Reload
+; RV32-NEXT:    addi sp, sp, 352
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: clmul_v4i64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT:    vand.vi v12, v10, 2
+; RV64-NEXT:    vand.vi v14, v10, 1
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v14, v12
+; RV64-NEXT:    vand.vi v14, v10, 4
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vi v14, v10, 8
+; RV64-NEXT:    li a0, 16
+; RV64-NEXT:    li a1, 32
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a0
+; RV64-NEXT:    li a0, 64
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    li a1, 128
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a0
+; RV64-NEXT:    li a0, 256
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    li a1, 512
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a0
+; RV64-NEXT:    li a2, 1024
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    li a0, 1
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a2
+; RV64-NEXT:    slli a1, a0, 11
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    lui a1, 1
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    lui a1, 2
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    lui a1, 4
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    lui a1, 8
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    lui a1, 16
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    lui a1, 32
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    lui a1, 64
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    lui a1, 128
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    lui a1, 256
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    lui a1, 512
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    lui a1, 1024
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    lui a1, 2048
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    lui a1, 4096
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    lui a1, 8192
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    lui a1, 16384
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    lui a1, 32768
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    lui a1, 65536
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    lui a1, 131072
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    lui a1, 262144
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    slli a1, a0, 31
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    slli a1, a0, 32
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    slli a1, a0, 33
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    slli a1, a0, 34
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    slli a1, a0, 35
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    slli a1, a0, 36
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    slli a1, a0, 37
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    slli a1, a0, 38
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    slli a1, a0, 39
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    slli a1, a0, 40
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    slli a1, a0, 41
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    slli a1, a0, 42
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    slli a1, a0, 43
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    slli a1, a0, 44
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    slli a1, a0, 45
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    slli a1, a0, 46
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    slli a1, a0, 47
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    slli a1, a0, 48
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    slli a1, a0, 49
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    slli a1, a0, 50
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    slli a1, a0, 51
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    slli a1, a0, 52
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    slli a1, a0, 53
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    slli a1, a0, 54
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    slli a1, a0, 55
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    slli a1, a0, 56
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    slli a1, a0, 57
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    slli a1, a0, 58
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    slli a1, a0, 59
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    slli a1, a0, 60
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    slli a1, a0, 61
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a1
+; RV64-NEXT:    li a1, -1
+; RV64-NEXT:    slli a0, a0, 62
+; RV64-NEXT:    slli a1, a1, 63
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vand.vx v14, v10, a0
+; RV64-NEXT:    vand.vx v10, v10, a1
+; RV64-NEXT:    vmul.vv v14, v8, v14
+; RV64-NEXT:    vxor.vv v12, v12, v14
+; RV64-NEXT:    vmul.vv v8, v8, v10
+; RV64-NEXT:    vxor.vv v8, v12, v8
+; RV64-NEXT:    ret
+  %a = call <4 x i64> @llvm.clmul.v4i64(<4 x i64> %x, <4 x i64> %y)
+  ret <4 x i64> %a
+}
+
+define <8 x i64> @clmul_v8i64(<8 x i64> %x, <8 x i64> %y) nounwind {
+; RV32-LABEL: clmul_v8i64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -352
+; RV32-NEXT:    sw ra, 348(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s0, 344(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s1, 340(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s2, 336(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s3, 332(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s4, 328(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s5, 324(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s6, 320(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s7, 316(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s8, 312(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s9, 308(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s10, 304(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s11, 300(sp) # 4-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    sub sp, sp, a0
+; RV32-NEXT:    lui a1, 524288
+; RV32-NEXT:    li s4, 1
+; RV32-NEXT:    li a3, 2
+; RV32-NEXT:    li a2, 4
+; RV32-NEXT:    li a0, 8
+; RV32-NEXT:    li s3, 16
+; RV32-NEXT:    li s2, 32
+; RV32-NEXT:    li s5, 64
+; RV32-NEXT:    li s6, 128
+; RV32-NEXT:    li s8, 256
+; RV32-NEXT:    li s1, 512
+; RV32-NEXT:    li s7, 1024
+; RV32-NEXT:    lui ra, 1
+; RV32-NEXT:    lui s11, 2
+; RV32-NEXT:    lui s10, 4
+; RV32-NEXT:    lui s9, 8
+; RV32-NEXT:    lui s0, 16
+; RV32-NEXT:    lui t6, 32
+; RV32-NEXT:    lui t5, 64
+; RV32-NEXT:    lui t4, 128
+; RV32-NEXT:    lui t3, 256
+; RV32-NEXT:    lui t2, 512
+; RV32-NEXT:    lui t1, 1024
+; RV32-NEXT:    lui t0, 2048
+; RV32-NEXT:    lui a7, 4096
+; RV32-NEXT:    lui a6, 8192
+; RV32-NEXT:    lui a5, 16384
+; RV32-NEXT:    lui a4, 32768
+; RV32-NEXT:    sw a1, 272(sp)
+; RV32-NEXT:    sw zero, 276(sp)
+; RV32-NEXT:    sw zero, 264(sp)
+; RV32-NEXT:    sw s4, 268(sp)
+; RV32-NEXT:    sw zero, 256(sp)
+; RV32-NEXT:    sw a3, 260(sp)
+; RV32-NEXT:    lui a3, 65536
+; RV32-NEXT:    sw zero, 248(sp)
+; RV32-NEXT:    sw a2, 252(sp)
+; RV32-NEXT:    lui a2, 131072
+; RV32-NEXT:    sw zero, 240(sp)
+; RV32-NEXT:    sw a0, 244(sp)
+; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
+; RV32-NEXT:    vand.vi v28, v12, 2
+; RV32-NEXT:    vand.vi v4, v12, 1
+; RV32-NEXT:    vand.vi v24, v12, 4
+; RV32-NEXT:    vand.vi v20, v12, 8
+; RV32-NEXT:    sw zero, 232(sp)
+; RV32-NEXT:    sw s3, 236(sp)
+; RV32-NEXT:    vand.vx v16, v12, s3
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v16, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    addi s3, sp, 272
+; RV32-NEXT:    sw zero, 224(sp)
+; RV32-NEXT:    sw s2, 228(sp)
+; RV32-NEXT:    vand.vx v0, v12, s2
+; RV32-NEXT:    addi s2, sp, 264
+; RV32-NEXT:    sw zero, 216(sp)
+; RV32-NEXT:    sw s5, 220(sp)
+; RV32-NEXT:    vmul.vv v16, v8, v28
+; RV32-NEXT:    vmul.vv v28, v8, v4
+; RV32-NEXT:    vxor.vi v28, v28, 0
+; RV32-NEXT:    vxor.vv v28, v28, v16
+; RV32-NEXT:    vand.vx v16, v12, s5
+; RV32-NEXT:    addi s5, sp, 256
+; RV32-NEXT:    sw zero, 208(sp)
+; RV32-NEXT:    sw s6, 212(sp)
+; RV32-NEXT:    vmul.vv v24, v8, v24
+; RV32-NEXT:    vxor.vv v28, v28, v24
+; RV32-NEXT:    vand.vx v24, v12, s6
+; RV32-NEXT:    addi s6, sp, 248
+; RV32-NEXT:    sw zero, 200(sp)
+; RV32-NEXT:    sw s8, 204(sp)
+; RV32-NEXT:    vmul.vv v20, v8, v20
+; RV32-NEXT:    vxor.vv v20, v28, v20
+; RV32-NEXT:    vand.vx v28, v12, s8
+; RV32-NEXT:    addi s8, sp, 240
+; RV32-NEXT:    sw zero, 192(sp)
+; RV32-NEXT:    sw s1, 196(sp)
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v4, v8, v4
+; RV32-NEXT:    vxor.vv v20, v20, v4
+; RV32-NEXT:    vand.vx v4, v12, s1
+; RV32-NEXT:    sw zero, 184(sp)
+; RV32-NEXT:    sw s7, 188(sp)
+; RV32-NEXT:    vmul.vv v0, v8, v0
+; RV32-NEXT:    vxor.vv v20, v20, v0
+; RV32-NEXT:    vand.vx v0, v12, s7
+; RV32-NEXT:    slli a0, s4, 11
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    vxor.vv v20, v20, v16
+; RV32-NEXT:    vand.vx v16, v12, ra
+; RV32-NEXT:    sw zero, 176(sp)
+; RV32-NEXT:    sw a0, 180(sp)
+; RV32-NEXT:    sw zero, 168(sp)
+; RV32-NEXT:    sw ra, 172(sp)
+; RV32-NEXT:    addi s4, sp, 216
+; RV32-NEXT:    vmul.vv v24, v8, v24
+; RV32-NEXT:    vxor.vv v24, v20, v24
+; RV32-NEXT:    vand.vx v20, v12, s11
+; RV32-NEXT:    sw zero, 160(sp)
+; RV32-NEXT:    sw s11, 164(sp)
+; RV32-NEXT:    addi s11, sp, 208
+; RV32-NEXT:    vmul.vv v28, v8, v28
+; RV32-NEXT:    vxor.vv v28, v24, v28
+; RV32-NEXT:    vand.vx v24, v12, s10
+; RV32-NEXT:    sw zero, 152(sp)
+; RV32-NEXT:    sw s10, 156(sp)
+; RV32-NEXT:    addi s10, sp, 200
+; RV32-NEXT:    vmul.vv v4, v8, v4
+; RV32-NEXT:    vxor.vv v4, v28, v4
+; RV32-NEXT:    vand.vx v28, v12, s9
+; RV32-NEXT:    sw zero, 144(sp)
+; RV32-NEXT:    sw s9, 148(sp)
+; RV32-NEXT:    addi s9, sp, 192
+; RV32-NEXT:    vmul.vv v0, v8, v0
+; RV32-NEXT:    vxor.vv v4, v4, v0
+; RV32-NEXT:    vand.vx v0, v12, a0
+; RV32-NEXT:    addi ra, sp, 184
+; RV32-NEXT:    vmul.vv v0, v8, v0
+; RV32-NEXT:    vxor.vv v0, v4, v0
+; RV32-NEXT:    vand.vx v4, v12, s0
+; RV32-NEXT:    sw zero, 136(sp)
+; RV32-NEXT:    sw s0, 140(sp)
+; RV32-NEXT:    addi s1, sp, 176
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    vxor.vv v0, v0, v16
+; RV32-NEXT:    vand.vx v16, v12, t6
+; RV32-NEXT:    sw zero, 128(sp)
+; RV32-NEXT:    sw t6, 132(sp)
+; RV32-NEXT:    addi s0, sp, 168
+; RV32-NEXT:    vmul.vv v20, v8, v20
+; RV32-NEXT:    vxor.vv v0, v0, v20
+; RV32-NEXT:    vand.vx v20, v12, t5
+; RV32-NEXT:    sw zero, 120(sp)
+; RV32-NEXT:    sw t5, 124(sp)
+; RV32-NEXT:    addi t6, sp, 160
+; RV32-NEXT:    vmul.vv v24, v8, v24
+; RV32-NEXT:    vxor.vv v0, v0, v24
+; RV32-NEXT:    vand.vx v24, v12, t4
+; RV32-NEXT:    sw zero, 112(sp)
+; RV32-NEXT:    sw t4, 116(sp)
+; RV32-NEXT:    addi t5, sp, 152
+; RV32-NEXT:    vmul.vv v28, v8, v28
+; RV32-NEXT:    vxor.vv v0, v0, v28
+; RV32-NEXT:    vand.vx v28, v12, t3
+; RV32-NEXT:    sw zero, 104(sp)
+; RV32-NEXT:    sw t3, 108(sp)
+; RV32-NEXT:    addi t4, sp, 144
+; RV32-NEXT:    vmul.vv v4, v8, v4
+; RV32-NEXT:    vxor.vv v0, v0, v4
+; RV32-NEXT:    vand.vx v4, v12, t2
+; RV32-NEXT:    sw zero, 96(sp)
+; RV32-NEXT:    sw t2, 100(sp)
+; RV32-NEXT:    addi t3, sp, 136
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    vxor.vv v16, v0, v16
+; RV32-NEXT:    vand.vx v0, v12, t1
+; RV32-NEXT:    sw zero, 88(sp)
+; RV32-NEXT:    sw t1, 92(sp)
+; RV32-NEXT:    addi t2, sp, 128
+; RV32-NEXT:    vmul.vv v20, v8, v20
+; RV32-NEXT:    vxor.vv v20, v16, v20
+; RV32-NEXT:    vand.vx v16, v12, t0
+; RV32-NEXT:    sw zero, 80(sp)
+; RV32-NEXT:    sw t0, 84(sp)
+; RV32-NEXT:    addi t1, sp, 120
+; RV32-NEXT:    vmul.vv v24, v8, v24
+; RV32-NEXT:    vxor.vv v24, v20, v24
+; RV32-NEXT:    vand.vx v20, v12, a7
+; RV32-NEXT:    sw zero, 72(sp)
+; RV32-NEXT:    sw a7, 76(sp)
+; RV32-NEXT:    addi t0, sp, 112
+; RV32-NEXT:    vmul.vv v28, v8, v28
+; RV32-NEXT:    vxor.vv v24, v24, v28
+; RV32-NEXT:    vand.vx v28, v12, a6
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v28, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    sw zero, 64(sp)
+; RV32-NEXT:    sw a6, 68(sp)
+; RV32-NEXT:    addi a7, sp, 104
+; RV32-NEXT:    vmul.vv v28, v8, v4
+; RV32-NEXT:    vxor.vv v24, v24, v28
+; RV32-NEXT:    vand.vx v28, v12, a5
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v28, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    sw zero, 56(sp)
+; RV32-NEXT:    sw a5, 60(sp)
+; RV32-NEXT:    addi a6, sp, 96
+; RV32-NEXT:    vmul.vv v28, v8, v0
+; RV32-NEXT:    vxor.vv v28, v24, v28
+; RV32-NEXT:    vand.vx v24, v12, a4
+; RV32-NEXT:    sw zero, 48(sp)
+; RV32-NEXT:    sw a4, 52(sp)
+; RV32-NEXT:    addi a5, sp, 88
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    vxor.vv v16, v28, v16
+; RV32-NEXT:    vand.vx v28, v12, a3
+; RV32-NEXT:    sw zero, 40(sp)
+; RV32-NEXT:    sw a3, 44(sp)
+; RV32-NEXT:    addi a4, sp, 80
+; RV32-NEXT:    vmul.vv v20, v8, v20
+; RV32-NEXT:    vxor.vv v16, v16, v20
+; RV32-NEXT:    vand.vx v4, v12, a2
+; RV32-NEXT:    sw zero, 32(sp)
+; RV32-NEXT:    sw a2, 36(sp)
+; RV32-NEXT:    addi a3, sp, 72
+; RV32-NEXT:    sw zero, 24(sp)
+; RV32-NEXT:    lui a1, 262144
+; RV32-NEXT:    sw a1, 28(sp)
+; RV32-NEXT:    sw zero, 16(sp)
+; RV32-NEXT:    lui a0, 524288
+; RV32-NEXT:    sw a0, 20(sp)
+; RV32-NEXT:    addi a2, sp, 64
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv s7, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add s7, s7, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add s7, s7, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, s7
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v20, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v20, v8, v20
+; RV32-NEXT:    vxor.vv v20, v16, v20
+; RV32-NEXT:    vlse64.v v16, (s3), zero
+; RV32-NEXT:    addi s3, sp, 56
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv s7, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add s7, s7, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, s7
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v0, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v0, v8, v0
+; RV32-NEXT:    vxor.vv v0, v20, v0
+; RV32-NEXT:    vlse64.v v20, (s2), zero
+; RV32-NEXT:    addi s2, sp, 48
+; RV32-NEXT:    vmul.vv v24, v8, v24
+; RV32-NEXT:    vxor.vv v0, v0, v24
+; RV32-NEXT:    vlse64.v v24, (s5), zero
+; RV32-NEXT:    addi s5, sp, 40
+; RV32-NEXT:    vmul.vv v28, v8, v28
+; RV32-NEXT:    vxor.vv v0, v0, v28
+; RV32-NEXT:    vlse64.v v28, (s6), zero
+; RV32-NEXT:    addi s6, sp, 32
+; RV32-NEXT:    vmul.vv v4, v8, v4
+; RV32-NEXT:    vxor.vv v4, v0, v4
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv s7, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add s7, s7, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add s7, s7, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, s7
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v4, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vlse64.v v4, (s8), zero
+; RV32-NEXT:    addi s8, sp, 24
+; RV32-NEXT:    vand.vv v16, v12, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    mv s7, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, s7
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v16, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vv v16, v12, v20
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv s7, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add s7, s7, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, s7
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v16, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vv v16, v12, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv s7, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add s7, s7, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, s7
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v16, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vv v16, v12, v28
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv s7, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add s7, s7, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add s7, s7, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, s7
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v16, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vv v16, v12, v4
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv s7, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add s7, s7, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, s7
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v16, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    addi a0, sp, 16
+; RV32-NEXT:    addi s7, sp, 232
+; RV32-NEXT:    vlse64.v v16, (s7), zero
+; RV32-NEXT:    addi s7, sp, 224
+; RV32-NEXT:    vlse64.v v20, (s7), zero
+; RV32-NEXT:    vlse64.v v24, (s4), zero
+; RV32-NEXT:    vlse64.v v28, (s11), zero
+; RV32-NEXT:    vand.vv v16, v12, v16
+; RV32-NEXT:    csrr s4, vlenb
+; RV32-NEXT:    slli s4, s4, 4
+; RV32-NEXT:    add s4, sp, s4
+; RV32-NEXT:    addi s4, s4, 288
+; RV32-NEXT:    vs4r.v v16, (s4) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vv v16, v12, v20
+; RV32-NEXT:    csrr s4, vlenb
+; RV32-NEXT:    slli s4, s4, 2
+; RV32-NEXT:    mv s7, s4
+; RV32-NEXT:    slli s4, s4, 1
+; RV32-NEXT:    add s7, s7, s4
+; RV32-NEXT:    slli s4, s4, 2
+; RV32-NEXT:    add s4, s4, s7
+; RV32-NEXT:    add s4, sp, s4
+; RV32-NEXT:    addi s4, s4, 288
+; RV32-NEXT:    vs4r.v v16, (s4) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vv v16, v12, v24
+; RV32-NEXT:    csrr s4, vlenb
+; RV32-NEXT:    slli s4, s4, 2
+; RV32-NEXT:    mv s7, s4
+; RV32-NEXT:    slli s4, s4, 4
+; RV32-NEXT:    add s4, s4, s7
+; RV32-NEXT:    add s4, sp, s4
+; RV32-NEXT:    addi s4, s4, 288
+; RV32-NEXT:    vs4r.v v16, (s4) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vv v16, v12, v28
+; RV32-NEXT:    csrr s4, vlenb
+; RV32-NEXT:    slli s4, s4, 2
+; RV32-NEXT:    mv s7, s4
+; RV32-NEXT:    slli s4, s4, 1
+; RV32-NEXT:    add s7, s7, s4
+; RV32-NEXT:    slli s4, s4, 1
+; RV32-NEXT:    add s7, s7, s4
+; RV32-NEXT:    slli s4, s4, 2
+; RV32-NEXT:    add s4, s4, s7
+; RV32-NEXT:    add s4, sp, s4
+; RV32-NEXT:    addi s4, s4, 288
+; RV32-NEXT:    vs4r.v v16, (s4) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vlse64.v v20, (s10), zero
+; RV32-NEXT:    vlse64.v v24, (s9), zero
+; RV32-NEXT:    vlse64.v v28, (ra), zero
+; RV32-NEXT:    vlse64.v v4, (s1), zero
+; RV32-NEXT:    vand.vv v16, v12, v20
+; RV32-NEXT:    csrr s1, vlenb
+; RV32-NEXT:    slli s1, s1, 2
+; RV32-NEXT:    mv s4, s1
+; RV32-NEXT:    slli s1, s1, 1
+; RV32-NEXT:    add s1, s1, s4
+; RV32-NEXT:    add s1, sp, s1
+; RV32-NEXT:    addi s1, s1, 288
+; RV32-NEXT:    vs4r.v v16, (s1) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vv v16, v12, v24
+; RV32-NEXT:    csrr s1, vlenb
+; RV32-NEXT:    slli s1, s1, 3
+; RV32-NEXT:    mv s4, s1
+; RV32-NEXT:    slli s1, s1, 2
+; RV32-NEXT:    add s1, s1, s4
+; RV32-NEXT:    add s1, sp, s1
+; RV32-NEXT:    addi s1, s1, 288
+; RV32-NEXT:    vs4r.v v16, (s1) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vv v16, v12, v28
+; RV32-NEXT:    csrr s1, vlenb
+; RV32-NEXT:    slli s1, s1, 6
+; RV32-NEXT:    add s1, sp, s1
+; RV32-NEXT:    addi s1, s1, 288
+; RV32-NEXT:    vs4r.v v16, (s1) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vv v16, v12, v4
+; RV32-NEXT:    csrr s1, vlenb
+; RV32-NEXT:    slli s1, s1, 3
+; RV32-NEXT:    mv s4, s1
+; RV32-NEXT:    slli s1, s1, 1
+; RV32-NEXT:    add s4, s4, s1
+; RV32-NEXT:    slli s1, s1, 2
+; RV32-NEXT:    add s1, s1, s4
+; RV32-NEXT:    add s1, sp, s1
+; RV32-NEXT:    addi s1, s1, 288
+; RV32-NEXT:    vs4r.v v16, (s1) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vlse64.v v24, (s0), zero
+; RV32-NEXT:    vlse64.v v28, (t6), zero
+; RV32-NEXT:    vlse64.v v4, (t5), zero
+; RV32-NEXT:    vlse64.v v0, (t4), zero
+; RV32-NEXT:    vand.vv v16, v12, v24
+; RV32-NEXT:    csrr t4, vlenb
+; RV32-NEXT:    slli t4, t4, 3
+; RV32-NEXT:    add t4, sp, t4
+; RV32-NEXT:    addi t4, t4, 288
+; RV32-NEXT:    vs4r.v v16, (t4) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vv v16, v12, v28
+; RV32-NEXT:    csrr t4, vlenb
+; RV32-NEXT:    slli t4, t4, 2
+; RV32-NEXT:    mv t5, t4
+; RV32-NEXT:    slli t4, t4, 3
+; RV32-NEXT:    add t4, t4, t5
+; RV32-NEXT:    add t4, sp, t4
+; RV32-NEXT:    addi t4, t4, 288
+; RV32-NEXT:    vs4r.v v16, (t4) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vv v16, v12, v4
+; RV32-NEXT:    csrr t4, vlenb
+; RV32-NEXT:    slli t4, t4, 2
+; RV32-NEXT:    mv t5, t4
+; RV32-NEXT:    slli t4, t4, 1
+; RV32-NEXT:    add t5, t5, t4
+; RV32-NEXT:    slli t4, t4, 1
+; RV32-NEXT:    add t5, t5, t4
+; RV32-NEXT:    slli t4, t4, 1
+; RV32-NEXT:    add t4, t4, t5
+; RV32-NEXT:    add t4, sp, t4
+; RV32-NEXT:    addi t4, t4, 288
+; RV32-NEXT:    vs4r.v v16, (t4) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vv v16, v12, v0
+; RV32-NEXT:    csrr t4, vlenb
+; RV32-NEXT:    slli t4, t4, 2
+; RV32-NEXT:    mv t5, t4
+; RV32-NEXT:    slli t4, t4, 2
+; RV32-NEXT:    add t5, t5, t4
+; RV32-NEXT:    slli t4, t4, 2
+; RV32-NEXT:    add t4, t4, t5
+; RV32-NEXT:    add t4, sp, t4
+; RV32-NEXT:    addi t4, t4, 288
+; RV32-NEXT:    vs4r.v v16, (t4) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vlse64.v v28, (t3), zero
+; RV32-NEXT:    vlse64.v v4, (t2), zero
+; RV32-NEXT:    vlse64.v v0, (t1), zero
+; RV32-NEXT:    vlse64.v v16, (t0), zero
+; RV32-NEXT:    vand.vv v20, v12, v28
+; RV32-NEXT:    csrr t0, vlenb
+; RV32-NEXT:    slli t0, t0, 2
+; RV32-NEXT:    add t0, sp, t0
+; RV32-NEXT:    addi t0, t0, 288
+; RV32-NEXT:    vs4r.v v20, (t0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vv v20, v12, v4
+; RV32-NEXT:    csrr t0, vlenb
+; RV32-NEXT:    slli t0, t0, 5
+; RV32-NEXT:    add t0, sp, t0
+; RV32-NEXT:    addi t0, t0, 288
+; RV32-NEXT:    vs4r.v v20, (t0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vv v20, v12, v0
+; RV32-NEXT:    csrr t0, vlenb
+; RV32-NEXT:    slli t0, t0, 3
+; RV32-NEXT:    mv t1, t0
+; RV32-NEXT:    slli t0, t0, 1
+; RV32-NEXT:    add t1, t1, t0
+; RV32-NEXT:    slli t0, t0, 1
+; RV32-NEXT:    add t0, t0, t1
+; RV32-NEXT:    add t0, sp, t0
+; RV32-NEXT:    addi t0, t0, 288
+; RV32-NEXT:    vs4r.v v20, (t0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vv v16, v12, v16
+; RV32-NEXT:    csrr t0, vlenb
+; RV32-NEXT:    slli t0, t0, 4
+; RV32-NEXT:    mv t1, t0
+; RV32-NEXT:    slli t0, t0, 2
+; RV32-NEXT:    add t0, t0, t1
+; RV32-NEXT:    add t0, sp, t0
+; RV32-NEXT:    addi t0, t0, 288
+; RV32-NEXT:    vs4r.v v16, (t0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vlse64.v v16, (a7), zero
+; RV32-NEXT:    vlse64.v v0, (a6), zero
+; RV32-NEXT:    vlse64.v v20, (a5), zero
+; RV32-NEXT:    vlse64.v v24, (a4), zero
+; RV32-NEXT:    vand.vv v4, v12, v16
+; RV32-NEXT:    vand.vv v16, v12, v0
+; RV32-NEXT:    csrr a4, vlenb
+; RV32-NEXT:    slli a4, a4, 2
+; RV32-NEXT:    mv a5, a4
+; RV32-NEXT:    slli a4, a4, 1
+; RV32-NEXT:    add a5, a5, a4
+; RV32-NEXT:    slli a4, a4, 1
+; RV32-NEXT:    add a4, a4, a5
+; RV32-NEXT:    add a4, sp, a4
+; RV32-NEXT:    addi a4, a4, 288
+; RV32-NEXT:    vs4r.v v16, (a4) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vv v16, v12, v20
+; RV32-NEXT:    csrr a4, vlenb
+; RV32-NEXT:    slli a4, a4, 2
+; RV32-NEXT:    mv a5, a4
+; RV32-NEXT:    slli a4, a4, 2
+; RV32-NEXT:    add a5, a5, a4
+; RV32-NEXT:    slli a4, a4, 1
+; RV32-NEXT:    add a4, a4, a5
+; RV32-NEXT:    add a4, sp, a4
+; RV32-NEXT:    addi a4, a4, 288
+; RV32-NEXT:    vs4r.v v16, (a4) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vv v16, v12, v24
+; RV32-NEXT:    csrr a4, vlenb
+; RV32-NEXT:    slli a4, a4, 2
+; RV32-NEXT:    mv a5, a4
+; RV32-NEXT:    slli a4, a4, 1
+; RV32-NEXT:    add a5, a5, a4
+; RV32-NEXT:    slli a4, a4, 3
+; RV32-NEXT:    add a4, a4, a5
+; RV32-NEXT:    add a4, sp, a4
+; RV32-NEXT:    addi a4, a4, 288
+; RV32-NEXT:    vs4r.v v16, (a4) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vlse64.v v16, (a3), zero
+; RV32-NEXT:    vlse64.v v20, (a2), zero
+; RV32-NEXT:    vlse64.v v24, (s3), zero
+; RV32-NEXT:    vlse64.v v28, (s2), zero
+; RV32-NEXT:    vand.vv v0, v12, v16
+; RV32-NEXT:    vand.vv v16, v12, v20
+; RV32-NEXT:    csrr a2, vlenb
+; RV32-NEXT:    slli a2, a2, 3
+; RV32-NEXT:    mv a3, a2
+; RV32-NEXT:    slli a2, a2, 1
+; RV32-NEXT:    add a2, a2, a3
+; RV32-NEXT:    add a2, sp, a2
+; RV32-NEXT:    addi a2, a2, 288
+; RV32-NEXT:    vs4r.v v16, (a2) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vv v16, v12, v24
+; RV32-NEXT:    csrr a2, vlenb
+; RV32-NEXT:    slli a2, a2, 4
+; RV32-NEXT:    mv a3, a2
+; RV32-NEXT:    slli a2, a2, 1
+; RV32-NEXT:    add a2, a2, a3
+; RV32-NEXT:    add a2, sp, a2
+; RV32-NEXT:    addi a2, a2, 288
+; RV32-NEXT:    vs4r.v v16, (a2) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vv v16, v12, v28
+; RV32-NEXT:    csrr a2, vlenb
+; RV32-NEXT:    slli a2, a2, 3
+; RV32-NEXT:    mv a3, a2
+; RV32-NEXT:    slli a2, a2, 3
+; RV32-NEXT:    add a2, a2, a3
+; RV32-NEXT:    add a2, sp, a2
+; RV32-NEXT:    addi a2, a2, 288
+; RV32-NEXT:    vs4r.v v16, (a2) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vlse64.v v16, (s5), zero
+; RV32-NEXT:    vlse64.v v20, (s6), zero
+; RV32-NEXT:    vlse64.v v24, (s8), zero
+; RV32-NEXT:    vlse64.v v28, (a0), zero
+; RV32-NEXT:    vand.vv v16, v12, v16
+; RV32-NEXT:    addi a0, sp, 288
+; RV32-NEXT:    vs4r.v v16, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vv v16, v12, v20
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a2, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a2
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v16, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vv v24, v12, v24
+; RV32-NEXT:    vand.vv v20, v12, v28
+; RV32-NEXT:    vand.vx v12, v12, a1
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v12, v16, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    vxor.vv v12, v12, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    vxor.vv v12, v12, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    vxor.vv v12, v12, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    vxor.vv v12, v12, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    vxor.vv v12, v12, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    vxor.vv v12, v12, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    vxor.vv v12, v12, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    vxor.vv v12, v12, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    vxor.vv v12, v12, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    vxor.vv v12, v12, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    vxor.vv v12, v12, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 6
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    vxor.vv v12, v12, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    vxor.vv v12, v12, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    vxor.vv v12, v12, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    vxor.vv v12, v12, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    vxor.vv v12, v12, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    vxor.vv v12, v12, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    vxor.vv v12, v12, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    vxor.vv v12, v12, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    vxor.vv v12, v12, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    vxor.vv v12, v12, v16
+; RV32-NEXT:    vmul.vv v16, v8, v4
+; RV32-NEXT:    vxor.vv v12, v12, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    vxor.vv v12, v12, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    vxor.vv v12, v12, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    vxor.vv v12, v12, v16
+; RV32-NEXT:    vmul.vv v16, v8, v0
+; RV32-NEXT:    vxor.vv v12, v12, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    vxor.vv v12, v12, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    vxor.vv v12, v12, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    vxor.vv v12, v12, v16
+; RV32-NEXT:    addi a0, sp, 288
+; RV32-NEXT:    vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    vxor.vv v12, v12, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v16, v8, v16
+; RV32-NEXT:    vxor.vv v12, v12, v16
+; RV32-NEXT:    vmul.vv v16, v8, v24
+; RV32-NEXT:    vxor.vv v12, v12, v16
+; RV32-NEXT:    vmul.vv v8, v8, v20
+; RV32-NEXT:    vxor.vv v8, v12, v8
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add sp, sp, a0
+; RV32-NEXT:    lw ra, 348(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s0, 344(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s1, 340(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s2, 336(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s3, 332(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s4, 328(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s5, 324(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s6, 320(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s7, 316(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s8, 312(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s9, 308(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s10, 304(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s11, 300(sp) # 4-byte Folded Reload
+; RV32-NEXT:    addi sp, sp, 352
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: clmul_v8i64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
+; RV64-NEXT:    vand.vi v16, v12, 2
+; RV64-NEXT:    vand.vi v20, v12, 1
+; RV64-NEXT:    vmul.vv v16, v8, v16
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v20, v16
+; RV64-NEXT:    vand.vi v20, v12, 4
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vi v20, v12, 8
+; RV64-NEXT:    li a0, 16
+; RV64-NEXT:    li a1, 32
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a0
+; RV64-NEXT:    li a0, 64
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    li a1, 128
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a0
+; RV64-NEXT:    li a0, 256
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    li a1, 512
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a0
+; RV64-NEXT:    li a2, 1024
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    li a0, 1
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a2
+; RV64-NEXT:    slli a1, a0, 11
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    lui a1, 1
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    lui a1, 2
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    lui a1, 4
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    lui a1, 8
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    lui a1, 16
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    lui a1, 32
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    lui a1, 64
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    lui a1, 128
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    lui a1, 256
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    lui a1, 512
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    lui a1, 1024
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    lui a1, 2048
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    lui a1, 4096
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    lui a1, 8192
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    lui a1, 16384
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    lui a1, 32768
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    lui a1, 65536
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    lui a1, 131072
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    lui a1, 262144
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    slli a1, a0, 31
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    slli a1, a0, 32
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    slli a1, a0, 33
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    slli a1, a0, 34
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    slli a1, a0, 35
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    slli a1, a0, 36
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    slli a1, a0, 37
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    slli a1, a0, 38
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    slli a1, a0, 39
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    slli a1, a0, 40
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    slli a1, a0, 41
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    slli a1, a0, 42
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    slli a1, a0, 43
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    slli a1, a0, 44
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    slli a1, a0, 45
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    slli a1, a0, 46
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    slli a1, a0, 47
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    slli a1, a0, 48
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    slli a1, a0, 49
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    slli a1, a0, 50
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    slli a1, a0, 51
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    slli a1, a0, 52
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    slli a1, a0, 53
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    slli a1, a0, 54
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    slli a1, a0, 55
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    slli a1, a0, 56
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    slli a1, a0, 57
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    slli a1, a0, 58
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    slli a1, a0, 59
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    slli a1, a0, 60
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    slli a1, a0, 61
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a1
+; RV64-NEXT:    li a1, -1
+; RV64-NEXT:    slli a0, a0, 62
+; RV64-NEXT:    slli a1, a1, 63
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vand.vx v20, v12, a0
+; RV64-NEXT:    vand.vx v12, v12, a1
+; RV64-NEXT:    vmul.vv v20, v8, v20
+; RV64-NEXT:    vxor.vv v16, v16, v20
+; RV64-NEXT:    vmul.vv v8, v8, v12
+; RV64-NEXT:    vxor.vv v8, v16, v8
+; RV64-NEXT:    ret
+  %a = call <8 x i64> @llvm.clmul.v8i64(<8 x i64> %x, <8 x i64> %y)
+  ret <8 x i64> %a
+}
+
+define <1 x i32> @clmulr_v1i32(<1 x i32> %x, <1 x i32> %y) nounwind {
+; CHECK-LABEL: clmulr_v1i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    sub sp, sp, a0
+; CHECK-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; CHECK-NEXT:    vsrl.vi v9, v8, 8
+; CHECK-NEXT:    lui a4, 16
+; CHECK-NEXT:    vsrl.vi v10, v8, 24
+; CHECK-NEXT:    vsll.vi v11, v8, 24
+; CHECK-NEXT:    lui a0, 61681
+; CHECK-NEXT:    lui a1, 209715
+; CHECK-NEXT:    lui a5, 349525
+; CHECK-NEXT:    li a6, 16
+; CHECK-NEXT:    addi a3, a4, -256
+; CHECK-NEXT:    addi a2, a0, -241
+; CHECK-NEXT:    addi a1, a1, 819
+; CHECK-NEXT:    addi a0, a5, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a3
+; CHECK-NEXT:    vand.vx v8, v8, a3
+; CHECK-NEXT:    vor.vv v9, v9, v10
+; CHECK-NEXT:    vsll.vi v8, v8, 8
+; CHECK-NEXT:    vor.vv v8, v11, v8
+; CHECK-NEXT:    vor.vv v8, v8, v9
+; CHECK-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-NEXT:    vand.vx v8, v8, a2
+; CHECK-NEXT:    vand.vx v9, v9, a2
+; CHECK-NEXT:    vsll.vi v8, v8, 4
+; CHECK-NEXT:    vor.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 2
+; CHECK-NEXT:    vand.vx v8, v8, a1
+; CHECK-NEXT:    vand.vx v9, v9, a1
+; CHECK-NEXT:    vsll.vi v8, v8, 2
+; CHECK-NEXT:    vor.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vadd.vv v8, v8, v8
+; CHECK-NEXT:    vor.vv v8, v9, v8
+; CHECK-NEXT:    vand.vx v9, v8, a6
+; CHECK-NEXT:    li a5, 32
+; CHECK-NEXT:    vand.vx v10, v8, a5
+; CHECK-NEXT:    li a5, 64
+; CHECK-NEXT:    vand.vx v11, v8, a5
+; CHECK-NEXT:    li a5, 128
+; CHECK-NEXT:    vand.vx v12, v8, a5
+; CHECK-NEXT:    li a5, 256
+; CHECK-NEXT:    vand.vx v13, v8, a5
+; CHECK-NEXT:    li a5, 512
+; CHECK-NEXT:    vand.vx v14, v8, a5
+; CHECK-NEXT:    li a5, 1024
+; CHECK-NEXT:    vand.vx v15, v8, a5
+; CHECK-NEXT:    li a5, 1
+; CHECK-NEXT:    slli a5, a5, 11
+; CHECK-NEXT:    vand.vx v16, v8, a5
+; CHECK-NEXT:    lui a5, 1
+; CHECK-NEXT:    vand.vx v17, v8, a5
+; CHECK-NEXT:    lui a5, 2
+; CHECK-NEXT:    vand.vx v18, v8, a5
+; CHECK-NEXT:    lui a5, 4
+; CHECK-NEXT:    vand.vx v19, v8, a5
+; CHECK-NEXT:    lui a5, 8
+; CHECK-NEXT:    vand.vx v20, v8, a5
+; CHECK-NEXT:    lui a5, 32
+; CHECK-NEXT:    vand.vx v21, v8, a4
+; CHECK-NEXT:    lui a4, 64
+; CHECK-NEXT:    vand.vx v22, v8, a5
+; CHECK-NEXT:    lui a5, 128
+; CHECK-NEXT:    vand.vx v23, v8, a4
+; CHECK-NEXT:    lui a4, 256
+; CHECK-NEXT:    vand.vx v24, v8, a5
+; CHECK-NEXT:    lui a5, 512
+; CHECK-NEXT:    vand.vx v25, v8, a4
+; CHECK-NEXT:    lui a4, 1024
+; CHECK-NEXT:    vand.vx v26, v8, a5
+; CHECK-NEXT:    lui a5, 2048
+; CHECK-NEXT:    vand.vx v27, v8, a4
+; CHECK-NEXT:    lui a4, 4096
+; CHECK-NEXT:    vand.vx v28, v8, a5
+; CHECK-NEXT:    lui a5, 8192
+; CHECK-NEXT:    vand.vx v29, v8, a4
+; CHECK-NEXT:    lui a4, 16384
+; CHECK-NEXT:    vand.vx v30, v8, a5
+; CHECK-NEXT:    lui a5, 32768
+; CHECK-NEXT:    vand.vx v31, v8, a4
+; CHECK-NEXT:    lui a4, 65536
+; CHECK-NEXT:    vand.vx v7, v8, a5
+; CHECK-NEXT:    lui a5, 131072
+; CHECK-NEXT:    vand.vx v6, v8, a4
+; CHECK-NEXT:    lui a4, 262144
+; CHECK-NEXT:    vand.vx v5, v8, a5
+; CHECK-NEXT:    lui a5, 524288
+; CHECK-NEXT:    vand.vi v4, v8, 2
+; CHECK-NEXT:    vand.vi v3, v8, 1
+; CHECK-NEXT:    vand.vi v2, v8, 4
+; CHECK-NEXT:    vand.vi v1, v8, 8
+; CHECK-NEXT:    vand.vx v0, v8, a4
+; CHECK-NEXT:    vmul.vv v4, v8, v4
+; CHECK-NEXT:    addi a4, sp, 16
+; CHECK-NEXT:    vs1r.v v4, (a4) # vscale x 8-byte Folded Spill
+; CHECK-NEXT:    vmul.vv v3, v8, v3
+; CHECK-NEXT:    vmul.vv v2, v8, v2
+; CHECK-NEXT:    vmul.vv v1, v8, v1
+; CHECK-NEXT:    vmul.vv v9, v8, v9
+; CHECK-NEXT:    vmul.vv v10, v8, v10
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vmul.vv v12, v8, v12
+; CHECK-NEXT:    vmul.vv v13, v8, v13
+; CHECK-NEXT:    vmul.vv v14, v8, v14
+; CHECK-NEXT:    vmul.vv v15, v8, v15
+; CHECK-NEXT:    vmul.vv v16, v8, v16
+; CHECK-NEXT:    vmul.vv v17, v8, v17
+; CHECK-NEXT:    vmul.vv v18, v8, v18
+; CHECK-NEXT:    vmul.vv v19, v8, v19
+; CHECK-NEXT:    vmul.vv v20, v8, v20
+; CHECK-NEXT:    vmul.vv v21, v8, v21
+; CHECK-NEXT:    vmul.vv v22, v8, v22
+; CHECK-NEXT:    vmul.vv v23, v8, v23
+; CHECK-NEXT:    vmul.vv v24, v8, v24
+; CHECK-NEXT:    vmul.vv v25, v8, v25
+; CHECK-NEXT:    vmul.vv v26, v8, v26
+; CHECK-NEXT:    vmul.vv v27, v8, v27
+; CHECK-NEXT:    vmul.vv v28, v8, v28
+; CHECK-NEXT:    vmul.vv v29, v8, v29
+; CHECK-NEXT:    vmul.vv v30, v8, v30
+; CHECK-NEXT:    vmul.vv v31, v8, v31
+; CHECK-NEXT:    vmul.vv v7, v8, v7
+; CHECK-NEXT:    vmul.vv v6, v8, v6
+; CHECK-NEXT:    vmul.vv v5, v8, v5
+; CHECK-NEXT:    vmul.vv v0, v8, v0
+; CHECK-NEXT:    vand.vx v4, v8, a5
+; CHECK-NEXT:    vmul.vv v8, v8, v4
+; CHECK-NEXT:    vl1r.v v4, (a4) # vscale x 8-byte Folded Reload
+; CHECK-NEXT:    vxor.vv v4, v3, v4
+; CHECK-NEXT:    vxor.vv v4, v4, v2
+; CHECK-NEXT:    vxor.vv v4, v4, v1
+; CHECK-NEXT:    vxor.vv v9, v4, v9
+; CHECK-NEXT:    vxor.vv v9, v9, v10
+; CHECK-NEXT:    vxor.vv v9, v9, v11
+; CHECK-NEXT:    vxor.vv v9, v9, v12
+; CHECK-NEXT:    vxor.vv v9, v9, v13
+; CHECK-NEXT:    vxor.vv v9, v9, v14
+; CHECK-NEXT:    vxor.vv v9, v9, v15
+; CHECK-NEXT:    vxor.vv v9, v9, v16
+; CHECK-NEXT:    vxor.vv v9, v9, v17
+; CHECK-NEXT:    vxor.vv v9, v9, v18
+; CHECK-NEXT:    vxor.vv v9, v9, v19
+; CHECK-NEXT:    vxor.vv v9, v9, v20
+; CHECK-NEXT:    vxor.vv v9, v9, v21
+; CHECK-NEXT:    vxor.vv v9, v9, v22
+; CHECK-NEXT:    vxor.vv v9, v9, v23
+; CHECK-NEXT:    vxor.vv v9, v9, v24
+; CHECK-NEXT:    vxor.vv v9, v9, v25
+; CHECK-NEXT:    vxor.vv v9, v9, v26
+; CHECK-NEXT:    vxor.vv v9, v9, v27
+; CHECK-NEXT:    vxor.vv v9, v9, v28
+; CHECK-NEXT:    vxor.vv v9, v9, v29
+; CHECK-NEXT:    vxor.vv v9, v9, v30
+; CHECK-NEXT:    vxor.vv v9, v9, v31
+; CHECK-NEXT:    vxor.vv v9, v9, v7
+; CHECK-NEXT:    vxor.vv v9, v9, v6
+; CHECK-NEXT:    vxor.vv v9, v9, v5
+; CHECK-NEXT:    vxor.vv v9, v9, v0
+; CHECK-NEXT:    vxor.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 8
+; CHECK-NEXT:    vsrl.vi v10, v8, 24
+; CHECK-NEXT:    vand.vx v9, v9, a3
+; CHECK-NEXT:    vor.vv v9, v9, v10
+; CHECK-NEXT:    vsll.vi v10, v8, 24
+; CHECK-NEXT:    vand.vx v8, v8, a3
+; CHECK-NEXT:    vsll.vi v8, v8, 8
+; CHECK-NEXT:    vor.vv v8, v10, v8
+; CHECK-NEXT:    vor.vv v8, v8, v9
+; CHECK-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-NEXT:    vand.vx v8, v8, a2
+; CHECK-NEXT:    vand.vx v9, v9, a2
+; CHECK-NEXT:    vsll.vi v8, v8, 4
+; CHECK-NEXT:    vor.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 2
+; CHECK-NEXT:    vand.vx v8, v8, a1
+; CHECK-NEXT:    vand.vx v9, v9, a1
+; CHECK-NEXT:    vsll.vi v8, v8, 2
+; CHECK-NEXT:    vor.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vadd.vv v8, v8, v8
+; CHECK-NEXT:    vor.vv v8, v9, v8
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    add sp, sp, a0
+; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    ret
+  %a = call <1 x i32> @llvm.clmulr.v1i32(<1 x i32> %x, <1 x i32> %y)
+  ret <1 x i32> %a
+}
+
+define <2 x i32> @clmulr_v2i32(<2 x i32> %x, <2 x i32> %y) nounwind {
+; CHECK-LABEL: clmulr_v2i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    sub sp, sp, a0
+; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
+; CHECK-NEXT:    vsrl.vi v9, v8, 8
+; CHECK-NEXT:    lui a4, 16
+; CHECK-NEXT:    vsrl.vi v10, v8, 24
+; CHECK-NEXT:    vsll.vi v11, v8, 24
+; CHECK-NEXT:    lui a0, 61681
+; CHECK-NEXT:    lui a1, 209715
+; CHECK-NEXT:    lui a5, 349525
+; CHECK-NEXT:    li a6, 16
+; CHECK-NEXT:    addi a3, a4, -256
+; CHECK-NEXT:    addi a2, a0, -241
+; CHECK-NEXT:    addi a1, a1, 819
+; CHECK-NEXT:    addi a0, a5, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a3
+; CHECK-NEXT:    vand.vx v8, v8, a3
+; CHECK-NEXT:    vor.vv v9, v9, v10
+; CHECK-NEXT:    vsll.vi v8, v8, 8
+; CHECK-NEXT:    vor.vv v8, v11, v8
+; CHECK-NEXT:    vor.vv v8, v8, v9
+; CHECK-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-NEXT:    vand.vx v8, v8, a2
+; CHECK-NEXT:    vand.vx v9, v9, a2
+; CHECK-NEXT:    vsll.vi v8, v8, 4
+; CHECK-NEXT:    vor.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 2
+; CHECK-NEXT:    vand.vx v8, v8, a1
+; CHECK-NEXT:    vand.vx v9, v9, a1
+; CHECK-NEXT:    vsll.vi v8, v8, 2
+; CHECK-NEXT:    vor.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vadd.vv v8, v8, v8
+; CHECK-NEXT:    vor.vv v8, v9, v8
+; CHECK-NEXT:    vand.vx v9, v8, a6
+; CHECK-NEXT:    li a5, 32
+; CHECK-NEXT:    vand.vx v10, v8, a5
+; CHECK-NEXT:    li a5, 64
+; CHECK-NEXT:    vand.vx v11, v8, a5
+; CHECK-NEXT:    li a5, 128
+; CHECK-NEXT:    vand.vx v12, v8, a5
+; CHECK-NEXT:    li a5, 256
+; CHECK-NEXT:    vand.vx v13, v8, a5
+; CHECK-NEXT:    li a5, 512
+; CHECK-NEXT:    vand.vx v14, v8, a5
+; CHECK-NEXT:    li a5, 1024
+; CHECK-NEXT:    vand.vx v15, v8, a5
+; CHECK-NEXT:    li a5, 1
+; CHECK-NEXT:    slli a5, a5, 11
+; CHECK-NEXT:    vand.vx v16, v8, a5
+; CHECK-NEXT:    lui a5, 1
+; CHECK-NEXT:    vand.vx v17, v8, a5
+; CHECK-NEXT:    lui a5, 2
+; CHECK-NEXT:    vand.vx v18, v8, a5
+; CHECK-NEXT:    lui a5, 4
+; CHECK-NEXT:    vand.vx v19, v8, a5
+; CHECK-NEXT:    lui a5, 8
+; CHECK-NEXT:    vand.vx v20, v8, a5
+; CHECK-NEXT:    lui a5, 32
+; CHECK-NEXT:    vand.vx v21, v8, a4
+; CHECK-NEXT:    lui a4, 64
+; CHECK-NEXT:    vand.vx v22, v8, a5
+; CHECK-NEXT:    lui a5, 128
+; CHECK-NEXT:    vand.vx v23, v8, a4
+; CHECK-NEXT:    lui a4, 256
+; CHECK-NEXT:    vand.vx v24, v8, a5
+; CHECK-NEXT:    lui a5, 512
+; CHECK-NEXT:    vand.vx v25, v8, a4
+; CHECK-NEXT:    lui a4, 1024
+; CHECK-NEXT:    vand.vx v26, v8, a5
+; CHECK-NEXT:    lui a5, 2048
+; CHECK-NEXT:    vand.vx v27, v8, a4
+; CHECK-NEXT:    lui a4, 4096
+; CHECK-NEXT:    vand.vx v28, v8, a5
+; CHECK-NEXT:    lui a5, 8192
+; CHECK-NEXT:    vand.vx v29, v8, a4
+; CHECK-NEXT:    lui a4, 16384
+; CHECK-NEXT:    vand.vx v30, v8, a5
+; CHECK-NEXT:    lui a5, 32768
+; CHECK-NEXT:    vand.vx v31, v8, a4
+; CHECK-NEXT:    lui a4, 65536
+; CHECK-NEXT:    vand.vx v7, v8, a5
+; CHECK-NEXT:    lui a5, 131072
+; CHECK-NEXT:    vand.vx v6, v8, a4
+; CHECK-NEXT:    lui a4, 262144
+; CHECK-NEXT:    vand.vx v5, v8, a5
+; CHECK-NEXT:    lui a5, 524288
+; CHECK-NEXT:    vand.vi v4, v8, 2
+; CHECK-NEXT:    vand.vi v3, v8, 1
+; CHECK-NEXT:    vand.vi v2, v8, 4
+; CHECK-NEXT:    vand.vi v1, v8, 8
+; CHECK-NEXT:    vand.vx v0, v8, a4
+; CHECK-NEXT:    vmul.vv v4, v8, v4
+; CHECK-NEXT:    addi a4, sp, 16
+; CHECK-NEXT:    vs1r.v v4, (a4) # vscale x 8-byte Folded Spill
+; CHECK-NEXT:    vmul.vv v3, v8, v3
+; CHECK-NEXT:    vmul.vv v2, v8, v2
+; CHECK-NEXT:    vmul.vv v1, v8, v1
+; CHECK-NEXT:    vmul.vv v9, v8, v9
+; CHECK-NEXT:    vmul.vv v10, v8, v10
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vmul.vv v12, v8, v12
+; CHECK-NEXT:    vmul.vv v13, v8, v13
+; CHECK-NEXT:    vmul.vv v14, v8, v14
+; CHECK-NEXT:    vmul.vv v15, v8, v15
+; CHECK-NEXT:    vmul.vv v16, v8, v16
+; CHECK-NEXT:    vmul.vv v17, v8, v17
+; CHECK-NEXT:    vmul.vv v18, v8, v18
+; CHECK-NEXT:    vmul.vv v19, v8, v19
+; CHECK-NEXT:    vmul.vv v20, v8, v20
+; CHECK-NEXT:    vmul.vv v21, v8, v21
+; CHECK-NEXT:    vmul.vv v22, v8, v22
+; CHECK-NEXT:    vmul.vv v23, v8, v23
+; CHECK-NEXT:    vmul.vv v24, v8, v24
+; CHECK-NEXT:    vmul.vv v25, v8, v25
+; CHECK-NEXT:    vmul.vv v26, v8, v26
+; CHECK-NEXT:    vmul.vv v27, v8, v27
+; CHECK-NEXT:    vmul.vv v28, v8, v28
+; CHECK-NEXT:    vmul.vv v29, v8, v29
+; CHECK-NEXT:    vmul.vv v30, v8, v30
+; CHECK-NEXT:    vmul.vv v31, v8, v31
+; CHECK-NEXT:    vmul.vv v7, v8, v7
+; CHECK-NEXT:    vmul.vv v6, v8, v6
+; CHECK-NEXT:    vmul.vv v5, v8, v5
+; CHECK-NEXT:    vmul.vv v0, v8, v0
+; CHECK-NEXT:    vand.vx v4, v8, a5
+; CHECK-NEXT:    vmul.vv v8, v8, v4
+; CHECK-NEXT:    vl1r.v v4, (a4) # vscale x 8-byte Folded Reload
+; CHECK-NEXT:    vxor.vv v4, v3, v4
+; CHECK-NEXT:    vxor.vv v4, v4, v2
+; CHECK-NEXT:    vxor.vv v4, v4, v1
+; CHECK-NEXT:    vxor.vv v9, v4, v9
+; CHECK-NEXT:    vxor.vv v9, v9, v10
+; CHECK-NEXT:    vxor.vv v9, v9, v11
+; CHECK-NEXT:    vxor.vv v9, v9, v12
+; CHECK-NEXT:    vxor.vv v9, v9, v13
+; CHECK-NEXT:    vxor.vv v9, v9, v14
+; CHECK-NEXT:    vxor.vv v9, v9, v15
+; CHECK-NEXT:    vxor.vv v9, v9, v16
+; CHECK-NEXT:    vxor.vv v9, v9, v17
+; CHECK-NEXT:    vxor.vv v9, v9, v18
+; CHECK-NEXT:    vxor.vv v9, v9, v19
+; CHECK-NEXT:    vxor.vv v9, v9, v20
+; CHECK-NEXT:    vxor.vv v9, v9, v21
+; CHECK-NEXT:    vxor.vv v9, v9, v22
+; CHECK-NEXT:    vxor.vv v9, v9, v23
+; CHECK-NEXT:    vxor.vv v9, v9, v24
+; CHECK-NEXT:    vxor.vv v9, v9, v25
+; CHECK-NEXT:    vxor.vv v9, v9, v26
+; CHECK-NEXT:    vxor.vv v9, v9, v27
+; CHECK-NEXT:    vxor.vv v9, v9, v28
+; CHECK-NEXT:    vxor.vv v9, v9, v29
+; CHECK-NEXT:    vxor.vv v9, v9, v30
+; CHECK-NEXT:    vxor.vv v9, v9, v31
+; CHECK-NEXT:    vxor.vv v9, v9, v7
+; CHECK-NEXT:    vxor.vv v9, v9, v6
+; CHECK-NEXT:    vxor.vv v9, v9, v5
+; CHECK-NEXT:    vxor.vv v9, v9, v0
+; CHECK-NEXT:    vxor.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 8
+; CHECK-NEXT:    vsrl.vi v10, v8, 24
+; CHECK-NEXT:    vand.vx v9, v9, a3
+; CHECK-NEXT:    vor.vv v9, v9, v10
+; CHECK-NEXT:    vsll.vi v10, v8, 24
+; CHECK-NEXT:    vand.vx v8, v8, a3
+; CHECK-NEXT:    vsll.vi v8, v8, 8
+; CHECK-NEXT:    vor.vv v8, v10, v8
+; CHECK-NEXT:    vor.vv v8, v8, v9
+; CHECK-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-NEXT:    vand.vx v8, v8, a2
+; CHECK-NEXT:    vand.vx v9, v9, a2
+; CHECK-NEXT:    vsll.vi v8, v8, 4
+; CHECK-NEXT:    vor.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 2
+; CHECK-NEXT:    vand.vx v8, v8, a1
+; CHECK-NEXT:    vand.vx v9, v9, a1
+; CHECK-NEXT:    vsll.vi v8, v8, 2
+; CHECK-NEXT:    vor.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vadd.vv v8, v8, v8
+; CHECK-NEXT:    vor.vv v8, v9, v8
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    add sp, sp, a0
+; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    ret
+  %a = call <2 x i32> @llvm.clmulr.v2i32(<2 x i32> %x, <2 x i32> %y)
+  ret <2 x i32> %a
+}
+
+define <4 x i32> @clmulr_v4i32(<4 x i32> %x, <4 x i32> %y) nounwind {
+; CHECK-LABEL: clmulr_v4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    sub sp, sp, a0
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT:    vsrl.vi v9, v8, 8
+; CHECK-NEXT:    lui a4, 16
+; CHECK-NEXT:    vsrl.vi v10, v8, 24
+; CHECK-NEXT:    vsll.vi v11, v8, 24
+; CHECK-NEXT:    lui a0, 61681
+; CHECK-NEXT:    lui a1, 209715
+; CHECK-NEXT:    lui a5, 349525
+; CHECK-NEXT:    li a6, 16
+; CHECK-NEXT:    addi a3, a4, -256
+; CHECK-NEXT:    addi a2, a0, -241
+; CHECK-NEXT:    addi a1, a1, 819
+; CHECK-NEXT:    addi a0, a5, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a3
+; CHECK-NEXT:    vand.vx v8, v8, a3
+; CHECK-NEXT:    vor.vv v9, v9, v10
+; CHECK-NEXT:    vsll.vi v8, v8, 8
+; CHECK-NEXT:    vor.vv v8, v11, v8
+; CHECK-NEXT:    vor.vv v8, v8, v9
+; CHECK-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-NEXT:    vand.vx v8, v8, a2
+; CHECK-NEXT:    vand.vx v9, v9, a2
+; CHECK-NEXT:    vsll.vi v8, v8, 4
+; CHECK-NEXT:    vor.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 2
+; CHECK-NEXT:    vand.vx v8, v8, a1
+; CHECK-NEXT:    vand.vx v9, v9, a1
+; CHECK-NEXT:    vsll.vi v8, v8, 2
+; CHECK-NEXT:    vor.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vadd.vv v8, v8, v8
+; CHECK-NEXT:    vor.vv v8, v9, v8
+; CHECK-NEXT:    vand.vx v9, v8, a6
+; CHECK-NEXT:    li a5, 32
+; CHECK-NEXT:    vand.vx v10, v8, a5
+; CHECK-NEXT:    li a5, 64
+; CHECK-NEXT:    vand.vx v11, v8, a5
+; CHECK-NEXT:    li a5, 128
+; CHECK-NEXT:    vand.vx v12, v8, a5
+; CHECK-NEXT:    li a5, 256
+; CHECK-NEXT:    vand.vx v13, v8, a5
+; CHECK-NEXT:    li a5, 512
+; CHECK-NEXT:    vand.vx v14, v8, a5
+; CHECK-NEXT:    li a5, 1024
+; CHECK-NEXT:    vand.vx v15, v8, a5
+; CHECK-NEXT:    li a5, 1
+; CHECK-NEXT:    slli a5, a5, 11
+; CHECK-NEXT:    vand.vx v16, v8, a5
+; CHECK-NEXT:    lui a5, 1
+; CHECK-NEXT:    vand.vx v17, v8, a5
+; CHECK-NEXT:    lui a5, 2
+; CHECK-NEXT:    vand.vx v18, v8, a5
+; CHECK-NEXT:    lui a5, 4
+; CHECK-NEXT:    vand.vx v19, v8, a5
+; CHECK-NEXT:    lui a5, 8
+; CHECK-NEXT:    vand.vx v20, v8, a5
+; CHECK-NEXT:    lui a5, 32
+; CHECK-NEXT:    vand.vx v21, v8, a4
+; CHECK-NEXT:    lui a4, 64
+; CHECK-NEXT:    vand.vx v22, v8, a5
+; CHECK-NEXT:    lui a5, 128
+; CHECK-NEXT:    vand.vx v23, v8, a4
+; CHECK-NEXT:    lui a4, 256
+; CHECK-NEXT:    vand.vx v24, v8, a5
+; CHECK-NEXT:    lui a5, 512
+; CHECK-NEXT:    vand.vx v25, v8, a4
+; CHECK-NEXT:    lui a4, 1024
+; CHECK-NEXT:    vand.vx v26, v8, a5
+; CHECK-NEXT:    lui a5, 2048
+; CHECK-NEXT:    vand.vx v27, v8, a4
+; CHECK-NEXT:    lui a4, 4096
+; CHECK-NEXT:    vand.vx v28, v8, a5
+; CHECK-NEXT:    lui a5, 8192
+; CHECK-NEXT:    vand.vx v29, v8, a4
+; CHECK-NEXT:    lui a4, 16384
+; CHECK-NEXT:    vand.vx v30, v8, a5
+; CHECK-NEXT:    lui a5, 32768
+; CHECK-NEXT:    vand.vx v31, v8, a4
+; CHECK-NEXT:    lui a4, 65536
+; CHECK-NEXT:    vand.vx v7, v8, a5
+; CHECK-NEXT:    lui a5, 131072
+; CHECK-NEXT:    vand.vx v6, v8, a4
+; CHECK-NEXT:    lui a4, 262144
+; CHECK-NEXT:    vand.vx v5, v8, a5
+; CHECK-NEXT:    lui a5, 524288
+; CHECK-NEXT:    vand.vi v4, v8, 2
+; CHECK-NEXT:    vand.vi v3, v8, 1
+; CHECK-NEXT:    vand.vi v2, v8, 4
+; CHECK-NEXT:    vand.vi v1, v8, 8
+; CHECK-NEXT:    vand.vx v0, v8, a4
+; CHECK-NEXT:    vmul.vv v4, v8, v4
+; CHECK-NEXT:    addi a4, sp, 16
+; CHECK-NEXT:    vs1r.v v4, (a4) # vscale x 8-byte Folded Spill
+; CHECK-NEXT:    vmul.vv v3, v8, v3
+; CHECK-NEXT:    vmul.vv v2, v8, v2
+; CHECK-NEXT:    vmul.vv v1, v8, v1
+; CHECK-NEXT:    vmul.vv v9, v8, v9
+; CHECK-NEXT:    vmul.vv v10, v8, v10
+; CHECK-NEXT:    vmul.vv v11, v8, v11
+; CHECK-NEXT:    vmul.vv v12, v8, v12
+; CHECK-NEXT:    vmul.vv v13, v8, v13
+; CHECK-NEXT:    vmul.vv v14, v8, v14
+; CHECK-NEXT:    vmul.vv v15, v8, v15
+; CHECK-NEXT:    vmul.vv v16, v8, v16
+; CHECK-NEXT:    vmul.vv v17, v8, v17
+; CHECK-NEXT:    vmul.vv v18, v8, v18
+; CHECK-NEXT:    vmul.vv v19, v8, v19
+; CHECK-NEXT:    vmul.vv v20, v8, v20
+; CHECK-NEXT:    vmul.vv v21, v8, v21
+; CHECK-NEXT:    vmul.vv v22, v8, v22
+; CHECK-NEXT:    vmul.vv v23, v8, v23
+; CHECK-NEXT:    vmul.vv v24, v8, v24
+; CHECK-NEXT:    vmul.vv v25, v8, v25
+; CHECK-NEXT:    vmul.vv v26, v8, v26
+; CHECK-NEXT:    vmul.vv v27, v8, v27
+; CHECK-NEXT:    vmul.vv v28, v8, v28
+; CHECK-NEXT:    vmul.vv v29, v8, v29
+; CHECK-NEXT:    vmul.vv v30, v8, v30
+; CHECK-NEXT:    vmul.vv v31, v8, v31
+; CHECK-NEXT:    vmul.vv v7, v8, v7
+; CHECK-NEXT:    vmul.vv v6, v8, v6
+; CHECK-NEXT:    vmul.vv v5, v8, v5
+; CHECK-NEXT:    vmul.vv v0, v8, v0
+; CHECK-NEXT:    vand.vx v4, v8, a5
+; CHECK-NEXT:    vmul.vv v8, v8, v4
+; CHECK-NEXT:    vl1r.v v4, (a4) # vscale x 8-byte Folded Reload
+; CHECK-NEXT:    vxor.vv v4, v3, v4
+; CHECK-NEXT:    vxor.vv v4, v4, v2
+; CHECK-NEXT:    vxor.vv v4, v4, v1
+; CHECK-NEXT:    vxor.vv v9, v4, v9
+; CHECK-NEXT:    vxor.vv v9, v9, v10
+; CHECK-NEXT:    vxor.vv v9, v9, v11
+; CHECK-NEXT:    vxor.vv v9, v9, v12
+; CHECK-NEXT:    vxor.vv v9, v9, v13
+; CHECK-NEXT:    vxor.vv v9, v9, v14
+; CHECK-NEXT:    vxor.vv v9, v9, v15
+; CHECK-NEXT:    vxor.vv v9, v9, v16
+; CHECK-NEXT:    vxor.vv v9, v9, v17
+; CHECK-NEXT:    vxor.vv v9, v9, v18
+; CHECK-NEXT:    vxor.vv v9, v9, v19
+; CHECK-NEXT:    vxor.vv v9, v9, v20
+; CHECK-NEXT:    vxor.vv v9, v9, v21
+; CHECK-NEXT:    vxor.vv v9, v9, v22
+; CHECK-NEXT:    vxor.vv v9, v9, v23
+; CHECK-NEXT:    vxor.vv v9, v9, v24
+; CHECK-NEXT:    vxor.vv v9, v9, v25
+; CHECK-NEXT:    vxor.vv v9, v9, v26
+; CHECK-NEXT:    vxor.vv v9, v9, v27
+; CHECK-NEXT:    vxor.vv v9, v9, v28
+; CHECK-NEXT:    vxor.vv v9, v9, v29
+; CHECK-NEXT:    vxor.vv v9, v9, v30
+; CHECK-NEXT:    vxor.vv v9, v9, v31
+; CHECK-NEXT:    vxor.vv v9, v9, v7
+; CHECK-NEXT:    vxor.vv v9, v9, v6
+; CHECK-NEXT:    vxor.vv v9, v9, v5
+; CHECK-NEXT:    vxor.vv v9, v9, v0
+; CHECK-NEXT:    vxor.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 8
+; CHECK-NEXT:    vsrl.vi v10, v8, 24
+; CHECK-NEXT:    vand.vx v9, v9, a3
+; CHECK-NEXT:    vor.vv v9, v9, v10
+; CHECK-NEXT:    vsll.vi v10, v8, 24
+; CHECK-NEXT:    vand.vx v8, v8, a3
+; CHECK-NEXT:    vsll.vi v8, v8, 8
+; CHECK-NEXT:    vor.vv v8, v10, v8
+; CHECK-NEXT:    vor.vv v8, v8, v9
+; CHECK-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-NEXT:    vand.vx v8, v8, a2
+; CHECK-NEXT:    vand.vx v9, v9, a2
+; CHECK-NEXT:    vsll.vi v8, v8, 4
+; CHECK-NEXT:    vor.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 2
+; CHECK-NEXT:    vand.vx v8, v8, a1
+; CHECK-NEXT:    vand.vx v9, v9, a1
+; CHECK-NEXT:    vsll.vi v8, v8, 2
+; CHECK-NEXT:    vor.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vadd.vv v8, v8, v8
+; CHECK-NEXT:    vor.vv v8, v9, v8
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    add sp, sp, a0
+; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    ret
+  %a = call <4 x i32> @llvm.clmulr.v4i32(<4 x i32> %x, <4 x i32> %y)
+  ret <4 x i32> %a
+}
+
+define <8 x i32> @clmulr_v8i32(<8 x i32> %x, <8 x i32> %y) nounwind {
+; RV32-LABEL: clmulr_v8i32:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -64
+; RV32-NEXT:    sw s0, 60(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s1, 56(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s2, 52(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s3, 48(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s4, 44(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s5, 40(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s6, 36(sp) # 4-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    sub sp, sp, a0
+; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; RV32-NEXT:    vsrl.vi v10, v8, 8
+; RV32-NEXT:    lui a0, 16
+; RV32-NEXT:    vsrl.vi v12, v8, 24
+; RV32-NEXT:    vsll.vi v14, v8, 24
+; RV32-NEXT:    lui a1, 61681
+; RV32-NEXT:    lui a2, 209715
+; RV32-NEXT:    lui s6, 349525
+; RV32-NEXT:    li t2, 16
+; RV32-NEXT:    li t6, 32
+; RV32-NEXT:    li s3, 64
+; RV32-NEXT:    li s5, 128
+; RV32-NEXT:    li s4, 256
+; RV32-NEXT:    li s2, 512
+; RV32-NEXT:    li s1, 1024
+; RV32-NEXT:    li s0, 1
+; RV32-NEXT:    lui t5, 1
+; RV32-NEXT:    lui t4, 2
+; RV32-NEXT:    lui t3, 4
+; RV32-NEXT:    lui a5, 8
+; RV32-NEXT:    lui a6, 32
+; RV32-NEXT:    lui a7, 64
+; RV32-NEXT:    lui t0, 128
+; RV32-NEXT:    lui t1, 256
+; RV32-NEXT:    addi a4, a0, -256
+; RV32-NEXT:    addi a3, a1, -241
+; RV32-NEXT:    addi a2, a2, 819
+; RV32-NEXT:    addi a1, s6, 1365
+; RV32-NEXT:    vand.vx v10, v10, a4
+; RV32-NEXT:    vand.vx v8, v8, a4
+; RV32-NEXT:    vor.vv v10, v10, v12
+; RV32-NEXT:    vsll.vi v8, v8, 8
+; RV32-NEXT:    vor.vv v8, v14, v8
+; RV32-NEXT:    vor.vv v8, v8, v10
+; RV32-NEXT:    vsrl.vi v10, v8, 4
+; RV32-NEXT:    vand.vx v8, v8, a3
+; RV32-NEXT:    vand.vx v10, v10, a3
+; RV32-NEXT:    vsll.vi v8, v8, 4
+; RV32-NEXT:    vor.vv v8, v10, v8
+; RV32-NEXT:    vsrl.vi v10, v8, 2
+; RV32-NEXT:    vand.vx v8, v8, a2
+; RV32-NEXT:    vand.vx v10, v10, a2
+; RV32-NEXT:    vsll.vi v8, v8, 2
+; RV32-NEXT:    vor.vv v8, v10, v8
+; RV32-NEXT:    vsrl.vi v10, v8, 1
+; RV32-NEXT:    vand.vx v8, v8, a1
+; RV32-NEXT:    vand.vx v10, v10, a1
+; RV32-NEXT:    vadd.vv v8, v8, v8
+; RV32-NEXT:    vor.vv v8, v10, v8
+; RV32-NEXT:    vand.vx v10, v8, t2
+; RV32-NEXT:    lui t2, 512
+; RV32-NEXT:    vand.vx v12, v8, t6
+; RV32-NEXT:    lui t6, 1024
+; RV32-NEXT:    vand.vx v14, v8, s3
+; RV32-NEXT:    lui s3, 2048
+; RV32-NEXT:    vand.vx v16, v8, s5
+; RV32-NEXT:    lui s5, 4096
+; RV32-NEXT:    vand.vx v26, v8, s4
+; RV32-NEXT:    lui s4, 8192
+; RV32-NEXT:    vand.vx v28, v8, s2
+; RV32-NEXT:    lui s2, 16384
+; RV32-NEXT:    vand.vx v18, v8, s1
+; RV32-NEXT:    lui s1, 32768
+; RV32-NEXT:    slli s0, s0, 11
+; RV32-NEXT:    vand.vx v20, v8, s0
+; RV32-NEXT:    lui s0, 65536
+; RV32-NEXT:    vand.vx v22, v8, t5
+; RV32-NEXT:    lui t5, 131072
+; RV32-NEXT:    vand.vx v24, v8, t4
+; RV32-NEXT:    lui t4, 262144
+; RV32-NEXT:    vand.vx v30, v8, t3
+; RV32-NEXT:    lui t3, 524288
+; RV32-NEXT:    vand.vi v6, v8, 2
+; RV32-NEXT:    vand.vi v4, v8, 1
+; RV32-NEXT:    vand.vi v2, v8, 4
+; RV32-NEXT:    vand.vi v0, v8, 8
+; RV32-NEXT:    vmul.vv v6, v8, v6
+; RV32-NEXT:    sw a0, 4(sp) # 4-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv s6, a0
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, a0, s6
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 32
+; RV32-NEXT:    vs2r.v v6, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vmul.vv v6, v8, v4
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 32
+; RV32-NEXT:    vs2r.v v6, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vmul.vv v6, v8, v2
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv s6, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add s6, s6, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add s6, s6, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, s6
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 32
+; RV32-NEXT:    vs2r.v v6, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vmul.vv v6, v8, v0
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv s6, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add s6, s6, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, s6
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 32
+; RV32-NEXT:    vs2r.v v6, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv s6, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add s6, s6, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, s6
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 32
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vmul.vv v10, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv s6, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, s6
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 32
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vmul.vv v10, v8, v14
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv s6, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add s6, s6, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, s6
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 32
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vmul.vv v10, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv s6, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, s6
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 32
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vmul.vv v10, v8, v26
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv s6, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, s6
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 32
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vmul.vv v10, v8, v28
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 32
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vmul.vv v10, v8, v18
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv s6, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add s6, s6, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, s6
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 32
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vmul.vv v10, v8, v20
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv s6, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, s6
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 32
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vmul.vv v10, v8, v22
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv s6, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, s6
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 32
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vmul.vv v10, v8, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 32
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vmul.vv v10, v8, v30
+; RV32-NEXT:    csrr s6, vlenb
+; RV32-NEXT:    slli s6, s6, 1
+; RV32-NEXT:    mv a0, s6
+; RV32-NEXT:    slli s6, s6, 1
+; RV32-NEXT:    add s6, s6, a0
+; RV32-NEXT:    lw a0, 4(sp) # 4-byte Folded Reload
+; RV32-NEXT:    add s6, sp, s6
+; RV32-NEXT:    addi s6, s6, 32
+; RV32-NEXT:    vs2r.v v10, (s6) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vx v10, v8, a5
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a5, vlenb
+; RV32-NEXT:    slli a5, a5, 2
+; RV32-NEXT:    add a5, sp, a5
+; RV32-NEXT:    addi a5, a5, 32
+; RV32-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vx v10, v8, a0
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 32
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vx v10, v8, a6
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    addi a0, sp, 32
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vx v10, v8, a7
+; RV32-NEXT:    vmul.vv v6, v8, v10
+; RV32-NEXT:    vand.vx v10, v8, t0
+; RV32-NEXT:    vmul.vv v30, v8, v10
+; RV32-NEXT:    vand.vx v10, v8, t1
+; RV32-NEXT:    vmul.vv v28, v8, v10
+; RV32-NEXT:    vand.vx v10, v8, t2
+; RV32-NEXT:    vmul.vv v26, v8, v10
+; RV32-NEXT:    vand.vx v10, v8, t6
+; RV32-NEXT:    vmul.vv v24, v8, v10
+; RV32-NEXT:    vand.vx v10, v8, s3
+; RV32-NEXT:    vmul.vv v22, v8, v10
+; RV32-NEXT:    vand.vx v10, v8, s5
+; RV32-NEXT:    vmul.vv v20, v8, v10
+; RV32-NEXT:    vand.vx v10, v8, s4
+; RV32-NEXT:    vmul.vv v18, v8, v10
+; RV32-NEXT:    vand.vx v10, v8, s2
+; RV32-NEXT:    vmul.vv v16, v8, v10
+; RV32-NEXT:    vand.vx v10, v8, s1
+; RV32-NEXT:    vmul.vv v14, v8, v10
+; RV32-NEXT:    vand.vx v10, v8, s0
+; RV32-NEXT:    vmul.vv v12, v8, v10
+; RV32-NEXT:    vand.vx v10, v8, t5
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    vand.vx v0, v8, t4
+; RV32-NEXT:    vmul.vv v0, v8, v0
+; RV32-NEXT:    vand.vx v2, v8, t3
+; RV32-NEXT:    vmul.vv v8, v8, v2
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 32
+; RV32-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 32
+; RV32-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v2, v4, v2
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a5, a5, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a5, a5, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 32
+; RV32-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v2, v2, v4
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a5, a5, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 32
+; RV32-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v2, v2, v4
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a5, a5, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 32
+; RV32-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v2, v2, v4
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 32
+; RV32-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v2, v2, v4
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a5, a5, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 32
+; RV32-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v2, v2, v4
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 32
+; RV32-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v2, v2, v4
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 32
+; RV32-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v2, v2, v4
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 32
+; RV32-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v2, v2, v4
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a5, a5, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 32
+; RV32-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v2, v2, v4
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 32
+; RV32-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v2, v2, v4
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 32
+; RV32-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v2, v2, v4
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 32
+; RV32-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v2, v2, v4
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 32
+; RV32-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v2, v2, v4
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 32
+; RV32-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v2, v2, v4
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 32
+; RV32-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v2, v2, v4
+; RV32-NEXT:    addi a0, sp, 32
+; RV32-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v4, v2, v4
+; RV32-NEXT:    vxor.vv v6, v4, v6
+; RV32-NEXT:    vxor.vv v30, v6, v30
+; RV32-NEXT:    vxor.vv v28, v30, v28
+; RV32-NEXT:    vxor.vv v26, v28, v26
+; RV32-NEXT:    vxor.vv v24, v26, v24
+; RV32-NEXT:    vxor.vv v22, v24, v22
+; RV32-NEXT:    vxor.vv v20, v22, v20
+; RV32-NEXT:    vxor.vv v18, v20, v18
+; RV32-NEXT:    vxor.vv v16, v18, v16
+; RV32-NEXT:    vxor.vv v14, v16, v14
+; RV32-NEXT:    vxor.vv v12, v14, v12
+; RV32-NEXT:    vxor.vv v10, v12, v10
+; RV32-NEXT:    vxor.vv v10, v10, v0
+; RV32-NEXT:    vxor.vv v8, v10, v8
+; RV32-NEXT:    vsrl.vi v10, v8, 8
+; RV32-NEXT:    vsrl.vi v12, v8, 24
+; RV32-NEXT:    vand.vx v10, v10, a4
+; RV32-NEXT:    vor.vv v10, v10, v12
+; RV32-NEXT:    vsll.vi v12, v8, 24
+; RV32-NEXT:    vand.vx v8, v8, a4
+; RV32-NEXT:    vsll.vi v8, v8, 8
+; RV32-NEXT:    vor.vv v8, v12, v8
+; RV32-NEXT:    vor.vv v8, v8, v10
+; RV32-NEXT:    vsrl.vi v10, v8, 4
+; RV32-NEXT:    vand.vx v8, v8, a3
+; RV32-NEXT:    vand.vx v10, v10, a3
+; RV32-NEXT:    vsll.vi v8, v8, 4
+; RV32-NEXT:    vor.vv v8, v10, v8
+; RV32-NEXT:    vsrl.vi v10, v8, 2
+; RV32-NEXT:    vand.vx v8, v8, a2
+; RV32-NEXT:    vand.vx v10, v10, a2
+; RV32-NEXT:    vsll.vi v8, v8, 2
+; RV32-NEXT:    vor.vv v8, v10, v8
+; RV32-NEXT:    vsrl.vi v10, v8, 1
+; RV32-NEXT:    vand.vx v8, v8, a1
+; RV32-NEXT:    vand.vx v10, v10, a1
+; RV32-NEXT:    vadd.vv v8, v8, v8
+; RV32-NEXT:    vor.vv v8, v10, v8
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add sp, sp, a0
+; RV32-NEXT:    lw s0, 60(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s1, 56(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s2, 52(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s3, 48(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s4, 44(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s5, 40(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s6, 36(sp) # 4-byte Folded Reload
+; RV32-NEXT:    addi sp, sp, 64
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: clmulr_v8i32:
+; RV64:       # %bb.0:
+; RV64-NEXT:    addi sp, sp, -96
+; RV64-NEXT:    sd s0, 88(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s1, 80(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s2, 72(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s3, 64(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s4, 56(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s5, 48(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s6, 40(sp) # 8-byte Folded Spill
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    sub sp, sp, a0
+; RV64-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; RV64-NEXT:    vsrl.vi v10, v8, 8
+; RV64-NEXT:    lui a0, 16
+; RV64-NEXT:    vsrl.vi v12, v8, 24
+; RV64-NEXT:    vsll.vi v14, v8, 24
+; RV64-NEXT:    lui a1, 61681
+; RV64-NEXT:    lui a2, 209715
+; RV64-NEXT:    lui s6, 349525
+; RV64-NEXT:    li t2, 16
+; RV64-NEXT:    li t6, 32
+; RV64-NEXT:    li s3, 64
+; RV64-NEXT:    li s5, 128
+; RV64-NEXT:    li s4, 256
+; RV64-NEXT:    li s2, 512
+; RV64-NEXT:    li s1, 1024
+; RV64-NEXT:    li s0, 1
+; RV64-NEXT:    lui t5, 1
+; RV64-NEXT:    lui t4, 2
+; RV64-NEXT:    lui t3, 4
+; RV64-NEXT:    lui a5, 8
+; RV64-NEXT:    lui a6, 32
+; RV64-NEXT:    lui a7, 64
+; RV64-NEXT:    lui t0, 128
+; RV64-NEXT:    lui t1, 256
+; RV64-NEXT:    addi a4, a0, -256
+; RV64-NEXT:    addi a3, a1, -241
+; RV64-NEXT:    addi a2, a2, 819
+; RV64-NEXT:    addi a1, s6, 1365
+; RV64-NEXT:    vand.vx v10, v10, a4
+; RV64-NEXT:    vand.vx v8, v8, a4
+; RV64-NEXT:    vor.vv v10, v10, v12
+; RV64-NEXT:    vsll.vi v8, v8, 8
+; RV64-NEXT:    vor.vv v8, v14, v8
+; RV64-NEXT:    vor.vv v8, v8, v10
+; RV64-NEXT:    vsrl.vi v10, v8, 4
+; RV64-NEXT:    vand.vx v8, v8, a3
+; RV64-NEXT:    vand.vx v10, v10, a3
+; RV64-NEXT:    vsll.vi v8, v8, 4
+; RV64-NEXT:    vor.vv v8, v10, v8
+; RV64-NEXT:    vsrl.vi v10, v8, 2
+; RV64-NEXT:    vand.vx v8, v8, a2
+; RV64-NEXT:    vand.vx v10, v10, a2
+; RV64-NEXT:    vsll.vi v8, v8, 2
+; RV64-NEXT:    vor.vv v8, v10, v8
+; RV64-NEXT:    vsrl.vi v10, v8, 1
+; RV64-NEXT:    vand.vx v8, v8, a1
+; RV64-NEXT:    vand.vx v10, v10, a1
+; RV64-NEXT:    vadd.vv v8, v8, v8
+; RV64-NEXT:    vor.vv v8, v10, v8
+; RV64-NEXT:    vand.vx v10, v8, t2
+; RV64-NEXT:    lui t2, 512
+; RV64-NEXT:    vand.vx v12, v8, t6
+; RV64-NEXT:    lui t6, 1024
+; RV64-NEXT:    vand.vx v14, v8, s3
+; RV64-NEXT:    lui s3, 2048
+; RV64-NEXT:    vand.vx v16, v8, s5
+; RV64-NEXT:    lui s5, 4096
+; RV64-NEXT:    vand.vx v26, v8, s4
+; RV64-NEXT:    lui s4, 8192
+; RV64-NEXT:    vand.vx v28, v8, s2
+; RV64-NEXT:    lui s2, 16384
+; RV64-NEXT:    vand.vx v18, v8, s1
+; RV64-NEXT:    lui s1, 32768
+; RV64-NEXT:    slli s0, s0, 11
+; RV64-NEXT:    vand.vx v20, v8, s0
+; RV64-NEXT:    lui s0, 65536
+; RV64-NEXT:    vand.vx v22, v8, t5
+; RV64-NEXT:    lui t5, 131072
+; RV64-NEXT:    vand.vx v24, v8, t4
+; RV64-NEXT:    lui t4, 262144
+; RV64-NEXT:    vand.vx v30, v8, t3
+; RV64-NEXT:    lui t3, 524288
+; RV64-NEXT:    vand.vi v6, v8, 2
+; RV64-NEXT:    vand.vi v4, v8, 1
+; RV64-NEXT:    vand.vi v2, v8, 4
+; RV64-NEXT:    vand.vi v0, v8, 8
+; RV64-NEXT:    vmul.vv v6, v8, v6
+; RV64-NEXT:    sd a0, 8(sp) # 8-byte Folded Spill
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    mv s6, a0
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    add a0, a0, s6
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vs2r.v v6, (a0) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    vmul.vv v6, v8, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vs2r.v v6, (a0) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    vmul.vv v6, v8, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    mv s6, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add s6, s6, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add s6, s6, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, s6
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vs2r.v v6, (a0) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    vmul.vv v6, v8, v0
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv s6, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add s6, s6, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, s6
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vs2r.v v6, (a0) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    vmul.vv v10, v8, v10
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    mv s6, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add s6, s6, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, s6
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    vmul.vv v10, v8, v12
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    mv s6, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, s6
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    vmul.vv v10, v8, v14
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    mv s6, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add s6, s6, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, s6
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    vmul.vv v10, v8, v16
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv s6, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, s6
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    vmul.vv v10, v8, v26
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    mv s6, a0
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, a0, s6
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    vmul.vv v10, v8, v28
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    vmul.vv v10, v8, v18
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    mv s6, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add s6, s6, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, s6
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    vmul.vv v10, v8, v20
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv s6, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, s6
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    vmul.vv v10, v8, v22
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    mv s6, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, s6
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    vmul.vv v10, v8, v24
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    vmul.vv v10, v8, v30
+; RV64-NEXT:    csrr s6, vlenb
+; RV64-NEXT:    slli s6, s6, 1
+; RV64-NEXT:    mv a0, s6
+; RV64-NEXT:    slli s6, s6, 1
+; RV64-NEXT:    add s6, s6, a0
+; RV64-NEXT:    ld a0, 8(sp) # 8-byte Folded Reload
+; RV64-NEXT:    add s6, sp, s6
+; RV64-NEXT:    addi s6, s6, 32
+; RV64-NEXT:    vs2r.v v10, (s6) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    vand.vx v10, v8, a5
+; RV64-NEXT:    vmul.vv v10, v8, v10
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 32
+; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    vand.vx v10, v8, a0
+; RV64-NEXT:    vmul.vv v10, v8, v10
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    vand.vx v10, v8, a6
+; RV64-NEXT:    vmul.vv v10, v8, v10
+; RV64-NEXT:    addi a0, sp, 32
+; RV64-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    vand.vx v10, v8, a7
+; RV64-NEXT:    vmul.vv v6, v8, v10
+; RV64-NEXT:    vand.vx v10, v8, t0
+; RV64-NEXT:    vmul.vv v30, v8, v10
+; RV64-NEXT:    vand.vx v10, v8, t1
+; RV64-NEXT:    vmul.vv v28, v8, v10
+; RV64-NEXT:    vand.vx v10, v8, t2
+; RV64-NEXT:    vmul.vv v26, v8, v10
+; RV64-NEXT:    vand.vx v10, v8, t6
+; RV64-NEXT:    vmul.vv v24, v8, v10
+; RV64-NEXT:    vand.vx v10, v8, s3
+; RV64-NEXT:    vmul.vv v22, v8, v10
+; RV64-NEXT:    vand.vx v10, v8, s5
+; RV64-NEXT:    vmul.vv v20, v8, v10
+; RV64-NEXT:    vand.vx v10, v8, s4
+; RV64-NEXT:    vmul.vv v18, v8, v10
+; RV64-NEXT:    vand.vx v10, v8, s2
+; RV64-NEXT:    vmul.vv v16, v8, v10
+; RV64-NEXT:    vand.vx v10, v8, s1
+; RV64-NEXT:    vmul.vv v14, v8, v10
+; RV64-NEXT:    vand.vx v10, v8, s0
+; RV64-NEXT:    vmul.vv v12, v8, v10
+; RV64-NEXT:    vand.vx v10, v8, t5
+; RV64-NEXT:    vmul.vv v10, v8, v10
+; RV64-NEXT:    vand.vx v0, v8, t4
+; RV64-NEXT:    vmul.vv v0, v8, v0
+; RV64-NEXT:    vand.vx v2, v8, t3
+; RV64-NEXT:    vmul.vv v8, v8, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v2, v4, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a5, a5, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a5, a5, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v2, v2, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a5, a5, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v2, v2, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a5, a5, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v2, v2, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v2, v2, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a5, a5, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v2, v2, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v2, v2, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v2, v2, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v2, v2, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a5, a5, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v2, v2, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v2, v2, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v2, v2, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v2, v2, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v2, v2, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v2, v2, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v2, v2, v4
+; RV64-NEXT:    addi a0, sp, 32
+; RV64-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v4, v2, v4
+; RV64-NEXT:    vxor.vv v6, v4, v6
+; RV64-NEXT:    vxor.vv v30, v6, v30
+; RV64-NEXT:    vxor.vv v28, v30, v28
+; RV64-NEXT:    vxor.vv v26, v28, v26
+; RV64-NEXT:    vxor.vv v24, v26, v24
+; RV64-NEXT:    vxor.vv v22, v24, v22
+; RV64-NEXT:    vxor.vv v20, v22, v20
+; RV64-NEXT:    vxor.vv v18, v20, v18
+; RV64-NEXT:    vxor.vv v16, v18, v16
+; RV64-NEXT:    vxor.vv v14, v16, v14
+; RV64-NEXT:    vxor.vv v12, v14, v12
+; RV64-NEXT:    vxor.vv v10, v12, v10
+; RV64-NEXT:    vxor.vv v10, v10, v0
+; RV64-NEXT:    vxor.vv v8, v10, v8
+; RV64-NEXT:    vsrl.vi v10, v8, 8
+; RV64-NEXT:    vsrl.vi v12, v8, 24
+; RV64-NEXT:    vand.vx v10, v10, a4
+; RV64-NEXT:    vor.vv v10, v10, v12
+; RV64-NEXT:    vsll.vi v12, v8, 24
+; RV64-NEXT:    vand.vx v8, v8, a4
+; RV64-NEXT:    vsll.vi v8, v8, 8
+; RV64-NEXT:    vor.vv v8, v12, v8
+; RV64-NEXT:    vor.vv v8, v8, v10
+; RV64-NEXT:    vsrl.vi v10, v8, 4
+; RV64-NEXT:    vand.vx v8, v8, a3
+; RV64-NEXT:    vand.vx v10, v10, a3
+; RV64-NEXT:    vsll.vi v8, v8, 4
+; RV64-NEXT:    vor.vv v8, v10, v8
+; RV64-NEXT:    vsrl.vi v10, v8, 2
+; RV64-NEXT:    vand.vx v8, v8, a2
+; RV64-NEXT:    vand.vx v10, v10, a2
+; RV64-NEXT:    vsll.vi v8, v8, 2
+; RV64-NEXT:    vor.vv v8, v10, v8
+; RV64-NEXT:    vsrl.vi v10, v8, 1
+; RV64-NEXT:    vand.vx v8, v8, a1
+; RV64-NEXT:    vand.vx v10, v10, a1
+; RV64-NEXT:    vadd.vv v8, v8, v8
+; RV64-NEXT:    vor.vv v8, v10, v8
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add sp, sp, a0
+; RV64-NEXT:    ld s0, 88(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s1, 80(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s2, 72(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s3, 64(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s4, 56(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s5, 48(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s6, 40(sp) # 8-byte Folded Reload
+; RV64-NEXT:    addi sp, sp, 96
+; RV64-NEXT:    ret
+  %a = call <8 x i32> @llvm.clmulr.v8i32(<8 x i32> %x, <8 x i32> %x)
+  ret <8 x i32> %a
+}
+
+define <16 x i32> @clmulr_v16i32(<16 x i32> %x, <16 x i32> %y) nounwind {
+; RV32-LABEL: clmulr_v16i32:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -80
+; RV32-NEXT:    sw ra, 76(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s0, 72(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s1, 68(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s2, 64(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s3, 60(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s4, 56(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s5, 52(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s6, 48(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s7, 44(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s8, 40(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s9, 36(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s10, 32(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s11, 28(sp) # 4-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    sub sp, sp, a0
+; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
+; RV32-NEXT:    vsrl.vi v12, v8, 8
+; RV32-NEXT:    lui a5, 16
+; RV32-NEXT:    vsrl.vi v16, v8, 24
+; RV32-NEXT:    vsll.vi v20, v8, 24
+; RV32-NEXT:    lui a1, 61681
+; RV32-NEXT:    lui a2, 209715
+; RV32-NEXT:    lui ra, 349525
+; RV32-NEXT:    li s11, 16
+; RV32-NEXT:    li s10, 32
+; RV32-NEXT:    li s9, 64
+; RV32-NEXT:    li a7, 512
+; RV32-NEXT:    li t0, 1024
+; RV32-NEXT:    li a0, 1
+; RV32-NEXT:    lui t1, 1
+; RV32-NEXT:    lui t2, 2
+; RV32-NEXT:    lui t3, 4
+; RV32-NEXT:    lui t4, 8
+; RV32-NEXT:    lui t5, 32
+; RV32-NEXT:    lui t6, 64
+; RV32-NEXT:    lui s0, 128
+; RV32-NEXT:    lui s1, 256
+; RV32-NEXT:    lui s2, 512
+; RV32-NEXT:    lui s3, 1024
+; RV32-NEXT:    lui s4, 2048
+; RV32-NEXT:    lui s5, 4096
+; RV32-NEXT:    lui s6, 8192
+; RV32-NEXT:    lui s7, 16384
+; RV32-NEXT:    lui s8, 32768
+; RV32-NEXT:    addi a4, a5, -256
+; RV32-NEXT:    addi a3, a1, -241
+; RV32-NEXT:    addi a2, a2, 819
+; RV32-NEXT:    addi a1, ra, 1365
+; RV32-NEXT:    vand.vx v12, v12, a4
+; RV32-NEXT:    vand.vx v8, v8, a4
+; RV32-NEXT:    vor.vv v12, v12, v16
+; RV32-NEXT:    vsll.vi v8, v8, 8
+; RV32-NEXT:    vor.vv v8, v20, v8
+; RV32-NEXT:    vor.vv v8, v8, v12
+; RV32-NEXT:    vsrl.vi v12, v8, 4
+; RV32-NEXT:    vand.vx v8, v8, a3
+; RV32-NEXT:    vand.vx v12, v12, a3
+; RV32-NEXT:    vsll.vi v8, v8, 4
+; RV32-NEXT:    vor.vv v8, v12, v8
+; RV32-NEXT:    vsrl.vi v12, v8, 2
+; RV32-NEXT:    vand.vx v8, v8, a2
+; RV32-NEXT:    vand.vx v12, v12, a2
+; RV32-NEXT:    vsll.vi v8, v8, 2
+; RV32-NEXT:    vor.vv v8, v12, v8
+; RV32-NEXT:    vsrl.vi v12, v8, 1
+; RV32-NEXT:    vand.vx v8, v8, a1
+; RV32-NEXT:    vand.vx v12, v12, a1
+; RV32-NEXT:    vadd.vv v8, v8, v8
+; RV32-NEXT:    vor.vv v8, v12, v8
+; RV32-NEXT:    vand.vx v12, v8, s11
+; RV32-NEXT:    lui s11, 65536
+; RV32-NEXT:    vand.vx v16, v8, s10
+; RV32-NEXT:    lui s10, 131072
+; RV32-NEXT:    vand.vx v20, v8, s9
+; RV32-NEXT:    lui s9, 262144
+; RV32-NEXT:    slli ra, a0, 11
+; RV32-NEXT:    vand.vi v24, v8, 2
+; RV32-NEXT:    vand.vi v28, v8, 1
+; RV32-NEXT:    vand.vi v4, v8, 4
+; RV32-NEXT:    vand.vi v0, v8, 8
+; RV32-NEXT:    vmul.vv v24, v8, v24
+; RV32-NEXT:    sw a4, 4(sp) # 4-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a4, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a4, a4, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a4
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs4r.v v24, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vmul.vv v24, v8, v28
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    mv a4, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a4
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs4r.v v24, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vmul.vv v24, v8, v4
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a4, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a4, a4, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a4, a4, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a4
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs4r.v v24, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vmul.vv v24, v8, v0
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a4, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a4, a4, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a4
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs4r.v v24, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a4, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a4, a4, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a4
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vmul.vv v12, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a4, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a4
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vmul.vv v12, v8, v20
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a4, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a4, a4, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a4
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    lui a0, 524288
+; RV32-NEXT:    li a6, 128
+; RV32-NEXT:    vand.vx v12, v8, a6
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a4, vlenb
+; RV32-NEXT:    slli a4, a4, 3
+; RV32-NEXT:    mv a6, a4
+; RV32-NEXT:    slli a4, a4, 3
+; RV32-NEXT:    add a4, a4, a6
+; RV32-NEXT:    add a4, sp, a4
+; RV32-NEXT:    addi a4, a4, 16
+; RV32-NEXT:    vs4r.v v12, (a4) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    li a6, 256
+; RV32-NEXT:    vand.vx v12, v8, a6
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a6, vlenb
+; RV32-NEXT:    slli a6, a6, 2
+; RV32-NEXT:    mv a4, a6
+; RV32-NEXT:    slli a6, a6, 4
+; RV32-NEXT:    add a6, a6, a4
+; RV32-NEXT:    lw a4, 4(sp) # 4-byte Folded Reload
+; RV32-NEXT:    add a6, sp, a6
+; RV32-NEXT:    addi a6, a6, 16
+; RV32-NEXT:    vs4r.v v12, (a6) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vx v12, v8, a7
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a6, vlenb
+; RV32-NEXT:    slli a6, a6, 6
+; RV32-NEXT:    add a6, sp, a6
+; RV32-NEXT:    addi a6, a6, 16
+; RV32-NEXT:    vs4r.v v12, (a6) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vx v12, v8, t0
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a6, vlenb
+; RV32-NEXT:    slli a6, a6, 2
+; RV32-NEXT:    mv a7, a6
+; RV32-NEXT:    slli a6, a6, 1
+; RV32-NEXT:    add a7, a7, a6
+; RV32-NEXT:    slli a6, a6, 1
+; RV32-NEXT:    add a7, a7, a6
+; RV32-NEXT:    slli a6, a6, 1
+; RV32-NEXT:    add a6, a6, a7
+; RV32-NEXT:    add a6, sp, a6
+; RV32-NEXT:    addi a6, a6, 16
+; RV32-NEXT:    vs4r.v v12, (a6) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vx v12, v8, ra
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a6, vlenb
+; RV32-NEXT:    slli a6, a6, 3
+; RV32-NEXT:    mv a7, a6
+; RV32-NEXT:    slli a6, a6, 1
+; RV32-NEXT:    add a7, a7, a6
+; RV32-NEXT:    slli a6, a6, 1
+; RV32-NEXT:    add a6, a6, a7
+; RV32-NEXT:    add a6, sp, a6
+; RV32-NEXT:    addi a6, a6, 16
+; RV32-NEXT:    vs4r.v v12, (a6) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vx v12, v8, t1
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a6, vlenb
+; RV32-NEXT:    slli a6, a6, 2
+; RV32-NEXT:    mv a7, a6
+; RV32-NEXT:    slli a6, a6, 2
+; RV32-NEXT:    add a7, a7, a6
+; RV32-NEXT:    slli a6, a6, 1
+; RV32-NEXT:    add a6, a6, a7
+; RV32-NEXT:    add a6, sp, a6
+; RV32-NEXT:    addi a6, a6, 16
+; RV32-NEXT:    vs4r.v v12, (a6) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vx v12, v8, t2
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a6, vlenb
+; RV32-NEXT:    slli a6, a6, 4
+; RV32-NEXT:    mv a7, a6
+; RV32-NEXT:    slli a6, a6, 1
+; RV32-NEXT:    add a6, a6, a7
+; RV32-NEXT:    add a6, sp, a6
+; RV32-NEXT:    addi a6, a6, 16
+; RV32-NEXT:    vs4r.v v12, (a6) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vx v12, v8, t3
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a6, vlenb
+; RV32-NEXT:    slli a6, a6, 2
+; RV32-NEXT:    mv a7, a6
+; RV32-NEXT:    slli a6, a6, 1
+; RV32-NEXT:    add a7, a7, a6
+; RV32-NEXT:    slli a6, a6, 2
+; RV32-NEXT:    add a6, a6, a7
+; RV32-NEXT:    add a6, sp, a6
+; RV32-NEXT:    addi a6, a6, 16
+; RV32-NEXT:    vs4r.v v12, (a6) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vx v12, v8, t4
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a6, vlenb
+; RV32-NEXT:    slli a6, a6, 3
+; RV32-NEXT:    mv a7, a6
+; RV32-NEXT:    slli a6, a6, 2
+; RV32-NEXT:    add a6, a6, a7
+; RV32-NEXT:    add a6, sp, a6
+; RV32-NEXT:    addi a6, a6, 16
+; RV32-NEXT:    vs4r.v v12, (a6) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vx v12, v8, a5
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a5, vlenb
+; RV32-NEXT:    slli a5, a5, 2
+; RV32-NEXT:    mv a6, a5
+; RV32-NEXT:    slli a5, a5, 3
+; RV32-NEXT:    add a5, a5, a6
+; RV32-NEXT:    add a5, sp, a5
+; RV32-NEXT:    addi a5, a5, 16
+; RV32-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vx v12, v8, t5
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a5, vlenb
+; RV32-NEXT:    slli a5, a5, 5
+; RV32-NEXT:    add a5, sp, a5
+; RV32-NEXT:    addi a5, a5, 16
+; RV32-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vx v12, v8, t6
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a5, vlenb
+; RV32-NEXT:    slli a5, a5, 2
+; RV32-NEXT:    mv a6, a5
+; RV32-NEXT:    slli a5, a5, 1
+; RV32-NEXT:    add a6, a6, a5
+; RV32-NEXT:    slli a5, a5, 1
+; RV32-NEXT:    add a5, a5, a6
+; RV32-NEXT:    add a5, sp, a5
+; RV32-NEXT:    addi a5, a5, 16
+; RV32-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vx v12, v8, s0
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a5, vlenb
+; RV32-NEXT:    slli a5, a5, 3
+; RV32-NEXT:    mv a6, a5
+; RV32-NEXT:    slli a5, a5, 1
+; RV32-NEXT:    add a5, a5, a6
+; RV32-NEXT:    add a5, sp, a5
+; RV32-NEXT:    addi a5, a5, 16
+; RV32-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vx v12, v8, s1
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a5, vlenb
+; RV32-NEXT:    slli a5, a5, 2
+; RV32-NEXT:    mv a6, a5
+; RV32-NEXT:    slli a5, a5, 2
+; RV32-NEXT:    add a5, a5, a6
+; RV32-NEXT:    add a5, sp, a5
+; RV32-NEXT:    addi a5, a5, 16
+; RV32-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vx v12, v8, s2
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a5, vlenb
+; RV32-NEXT:    slli a5, a5, 4
+; RV32-NEXT:    add a5, sp, a5
+; RV32-NEXT:    addi a5, a5, 16
+; RV32-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vx v12, v8, s3
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a5, vlenb
+; RV32-NEXT:    slli a5, a5, 2
+; RV32-NEXT:    mv a6, a5
+; RV32-NEXT:    slli a5, a5, 1
+; RV32-NEXT:    add a5, a5, a6
+; RV32-NEXT:    add a5, sp, a5
+; RV32-NEXT:    addi a5, a5, 16
+; RV32-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vx v12, v8, s4
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a5, vlenb
+; RV32-NEXT:    slli a5, a5, 3
+; RV32-NEXT:    add a5, sp, a5
+; RV32-NEXT:    addi a5, a5, 16
+; RV32-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vx v12, v8, s5
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a5, vlenb
+; RV32-NEXT:    slli a5, a5, 2
+; RV32-NEXT:    add a5, sp, a5
+; RV32-NEXT:    addi a5, a5, 16
+; RV32-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vx v12, v8, s6
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    addi a5, sp, 16
+; RV32-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vx v12, v8, s7
+; RV32-NEXT:    vmul.vv v28, v8, v12
+; RV32-NEXT:    vand.vx v12, v8, s8
+; RV32-NEXT:    vmul.vv v24, v8, v12
+; RV32-NEXT:    vand.vx v12, v8, s11
+; RV32-NEXT:    vmul.vv v20, v8, v12
+; RV32-NEXT:    vand.vx v12, v8, s10
+; RV32-NEXT:    vmul.vv v16, v8, v12
+; RV32-NEXT:    vand.vx v12, v8, s9
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    vand.vx v0, v8, a0
+; RV32-NEXT:    vmul.vv v8, v8, v0
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a5, a5, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl4r.v v0, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v0, v4, v0
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a5, a5, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a5, a5, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v0, v0, v4
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a5, a5, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v0, v0, v4
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a5, a5, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v0, v0, v4
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v0, v0, v4
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a5, a5, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v0, v0, v4
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v0, v0, v4
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v0, v0, v4
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 6
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v0, v0, v4
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a5, a5, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a5, a5, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v0, v0, v4
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a5, a5, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v0, v0, v4
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a5, a5, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v0, v0, v4
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v0, v0, v4
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a5, a5, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v0, v0, v4
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v0, v0, v4
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v0, v0, v4
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v0, v0, v4
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a5, a5, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v0, v0, v4
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v0, v0, v4
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v0, v0, v4
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v0, v0, v4
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v0, v0, v4
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v0, v0, v4
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v0, v0, v4
+; RV32-NEXT:    addi a0, sp, 16
+; RV32-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v4, v0, v4
+; RV32-NEXT:    vxor.vv v28, v4, v28
+; RV32-NEXT:    vxor.vv v24, v28, v24
+; RV32-NEXT:    vxor.vv v20, v24, v20
+; RV32-NEXT:    vxor.vv v16, v20, v16
+; RV32-NEXT:    vxor.vv v12, v16, v12
+; RV32-NEXT:    vxor.vv v8, v12, v8
+; RV32-NEXT:    vsrl.vi v12, v8, 8
+; RV32-NEXT:    vsrl.vi v16, v8, 24
+; RV32-NEXT:    vand.vx v12, v12, a4
+; RV32-NEXT:    vor.vv v12, v12, v16
+; RV32-NEXT:    vsll.vi v16, v8, 24
+; RV32-NEXT:    vand.vx v8, v8, a4
+; RV32-NEXT:    vsll.vi v8, v8, 8
+; RV32-NEXT:    vor.vv v8, v16, v8
+; RV32-NEXT:    vor.vv v8, v8, v12
+; RV32-NEXT:    vsrl.vi v12, v8, 4
+; RV32-NEXT:    vand.vx v8, v8, a3
+; RV32-NEXT:    vand.vx v12, v12, a3
+; RV32-NEXT:    vsll.vi v8, v8, 4
+; RV32-NEXT:    vor.vv v8, v12, v8
+; RV32-NEXT:    vsrl.vi v12, v8, 2
+; RV32-NEXT:    vand.vx v8, v8, a2
+; RV32-NEXT:    vand.vx v12, v12, a2
+; RV32-NEXT:    vsll.vi v8, v8, 2
+; RV32-NEXT:    vor.vv v8, v12, v8
+; RV32-NEXT:    vsrl.vi v12, v8, 1
+; RV32-NEXT:    vand.vx v8, v8, a1
+; RV32-NEXT:    vand.vx v12, v12, a1
+; RV32-NEXT:    vadd.vv v8, v8, v8
+; RV32-NEXT:    vor.vv v8, v12, v8
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add sp, sp, a0
+; RV32-NEXT:    lw ra, 76(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s0, 72(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s1, 68(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s2, 64(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s3, 60(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s4, 56(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s5, 52(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s6, 48(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s7, 44(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s8, 40(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s9, 36(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s10, 32(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s11, 28(sp) # 4-byte Folded Reload
+; RV32-NEXT:    addi sp, sp, 80
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: clmulr_v16i32:
+; RV64:       # %bb.0:
+; RV64-NEXT:    addi sp, sp, -144
+; RV64-NEXT:    sd ra, 136(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s0, 128(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s1, 120(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s2, 112(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s3, 104(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s4, 96(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s5, 88(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s6, 80(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s7, 72(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s8, 64(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s9, 56(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s10, 48(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s11, 40(sp) # 8-byte Folded Spill
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    sub sp, sp, a0
+; RV64-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
+; RV64-NEXT:    vsrl.vi v12, v8, 8
+; RV64-NEXT:    lui a5, 16
+; RV64-NEXT:    vsrl.vi v16, v8, 24
+; RV64-NEXT:    vsll.vi v20, v8, 24
+; RV64-NEXT:    lui a1, 61681
+; RV64-NEXT:    lui a2, 209715
+; RV64-NEXT:    lui ra, 349525
+; RV64-NEXT:    li s11, 16
+; RV64-NEXT:    li s10, 32
+; RV64-NEXT:    li s9, 64
+; RV64-NEXT:    li a7, 512
+; RV64-NEXT:    li t0, 1024
+; RV64-NEXT:    li a0, 1
+; RV64-NEXT:    lui t1, 1
+; RV64-NEXT:    lui t2, 2
+; RV64-NEXT:    lui t3, 4
+; RV64-NEXT:    lui t4, 8
+; RV64-NEXT:    lui t5, 32
+; RV64-NEXT:    lui t6, 64
+; RV64-NEXT:    lui s0, 128
+; RV64-NEXT:    lui s1, 256
+; RV64-NEXT:    lui s2, 512
+; RV64-NEXT:    lui s3, 1024
+; RV64-NEXT:    lui s4, 2048
+; RV64-NEXT:    lui s5, 4096
+; RV64-NEXT:    lui s6, 8192
+; RV64-NEXT:    lui s7, 16384
+; RV64-NEXT:    lui s8, 32768
+; RV64-NEXT:    addi a4, a5, -256
+; RV64-NEXT:    addi a3, a1, -241
+; RV64-NEXT:    addi a2, a2, 819
+; RV64-NEXT:    addi a1, ra, 1365
+; RV64-NEXT:    vand.vx v12, v12, a4
+; RV64-NEXT:    vand.vx v8, v8, a4
+; RV64-NEXT:    vor.vv v12, v12, v16
+; RV64-NEXT:    vsll.vi v8, v8, 8
+; RV64-NEXT:    vor.vv v8, v20, v8
+; RV64-NEXT:    vor.vv v8, v8, v12
+; RV64-NEXT:    vsrl.vi v12, v8, 4
+; RV64-NEXT:    vand.vx v8, v8, a3
+; RV64-NEXT:    vand.vx v12, v12, a3
+; RV64-NEXT:    vsll.vi v8, v8, 4
+; RV64-NEXT:    vor.vv v8, v12, v8
+; RV64-NEXT:    vsrl.vi v12, v8, 2
+; RV64-NEXT:    vand.vx v8, v8, a2
+; RV64-NEXT:    vand.vx v12, v12, a2
+; RV64-NEXT:    vsll.vi v8, v8, 2
+; RV64-NEXT:    vor.vv v8, v12, v8
+; RV64-NEXT:    vsrl.vi v12, v8, 1
+; RV64-NEXT:    vand.vx v8, v8, a1
+; RV64-NEXT:    vand.vx v12, v12, a1
+; RV64-NEXT:    vadd.vv v8, v8, v8
+; RV64-NEXT:    vor.vv v8, v12, v8
+; RV64-NEXT:    vand.vx v12, v8, s11
+; RV64-NEXT:    lui s11, 65536
+; RV64-NEXT:    vand.vx v16, v8, s10
+; RV64-NEXT:    lui s10, 131072
+; RV64-NEXT:    vand.vx v20, v8, s9
+; RV64-NEXT:    lui s9, 262144
+; RV64-NEXT:    slli ra, a0, 11
+; RV64-NEXT:    vand.vi v24, v8, 2
+; RV64-NEXT:    vand.vi v28, v8, 1
+; RV64-NEXT:    vand.vi v4, v8, 4
+; RV64-NEXT:    vand.vi v0, v8, 8
+; RV64-NEXT:    vmul.vv v24, v8, v24
+; RV64-NEXT:    sd a4, 8(sp) # 8-byte Folded Spill
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a4, a0
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a4, a4, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a4
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vs4r.v v24, (a0) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    vmul.vv v24, v8, v28
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 5
+; RV64-NEXT:    mv a4, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a4
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vs4r.v v24, (a0) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    vmul.vv v24, v8, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a4, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a4, a4, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a4, a4, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a4
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vs4r.v v24, (a0) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    vmul.vv v24, v8, v0
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    mv a4, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a4, a4, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a4
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vs4r.v v24, (a0) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a4, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a4, a4, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a4
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    vmul.vv v12, v8, v16
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    mv a4, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a4
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    vmul.vv v12, v8, v20
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a4, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a4, a4, a0
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, a0, a4
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    lui a0, 524288
+; RV64-NEXT:    li a6, 128
+; RV64-NEXT:    vand.vx v12, v8, a6
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a4, vlenb
+; RV64-NEXT:    slli a4, a4, 3
+; RV64-NEXT:    mv a6, a4
+; RV64-NEXT:    slli a4, a4, 3
+; RV64-NEXT:    add a4, a4, a6
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    addi a4, a4, 32
+; RV64-NEXT:    vs4r.v v12, (a4) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    li a6, 256
+; RV64-NEXT:    vand.vx v12, v8, a6
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a6, vlenb
+; RV64-NEXT:    slli a6, a6, 2
+; RV64-NEXT:    mv a4, a6
+; RV64-NEXT:    slli a6, a6, 4
+; RV64-NEXT:    add a6, a6, a4
+; RV64-NEXT:    ld a4, 8(sp) # 8-byte Folded Reload
+; RV64-NEXT:    add a6, sp, a6
+; RV64-NEXT:    addi a6, a6, 32
+; RV64-NEXT:    vs4r.v v12, (a6) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    vand.vx v12, v8, a7
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a6, vlenb
+; RV64-NEXT:    slli a6, a6, 6
+; RV64-NEXT:    add a6, sp, a6
+; RV64-NEXT:    addi a6, a6, 32
+; RV64-NEXT:    vs4r.v v12, (a6) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    vand.vx v12, v8, t0
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a6, vlenb
+; RV64-NEXT:    slli a6, a6, 2
+; RV64-NEXT:    mv a7, a6
+; RV64-NEXT:    slli a6, a6, 1
+; RV64-NEXT:    add a7, a7, a6
+; RV64-NEXT:    slli a6, a6, 1
+; RV64-NEXT:    add a7, a7, a6
+; RV64-NEXT:    slli a6, a6, 1
+; RV64-NEXT:    add a6, a6, a7
+; RV64-NEXT:    add a6, sp, a6
+; RV64-NEXT:    addi a6, a6, 32
+; RV64-NEXT:    vs4r.v v12, (a6) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    vand.vx v12, v8, ra
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a6, vlenb
+; RV64-NEXT:    slli a6, a6, 3
+; RV64-NEXT:    mv a7, a6
+; RV64-NEXT:    slli a6, a6, 1
+; RV64-NEXT:    add a7, a7, a6
+; RV64-NEXT:    slli a6, a6, 1
+; RV64-NEXT:    add a6, a6, a7
+; RV64-NEXT:    add a6, sp, a6
+; RV64-NEXT:    addi a6, a6, 32
+; RV64-NEXT:    vs4r.v v12, (a6) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    vand.vx v12, v8, t1
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a6, vlenb
+; RV64-NEXT:    slli a6, a6, 2
+; RV64-NEXT:    mv a7, a6
+; RV64-NEXT:    slli a6, a6, 2
+; RV64-NEXT:    add a7, a7, a6
+; RV64-NEXT:    slli a6, a6, 1
+; RV64-NEXT:    add a6, a6, a7
+; RV64-NEXT:    add a6, sp, a6
+; RV64-NEXT:    addi a6, a6, 32
+; RV64-NEXT:    vs4r.v v12, (a6) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    vand.vx v12, v8, t2
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a6, vlenb
+; RV64-NEXT:    slli a6, a6, 4
+; RV64-NEXT:    mv a7, a6
+; RV64-NEXT:    slli a6, a6, 1
+; RV64-NEXT:    add a6, a6, a7
+; RV64-NEXT:    add a6, sp, a6
+; RV64-NEXT:    addi a6, a6, 32
+; RV64-NEXT:    vs4r.v v12, (a6) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    vand.vx v12, v8, t3
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a6, vlenb
+; RV64-NEXT:    slli a6, a6, 2
+; RV64-NEXT:    mv a7, a6
+; RV64-NEXT:    slli a6, a6, 1
+; RV64-NEXT:    add a7, a7, a6
+; RV64-NEXT:    slli a6, a6, 2
+; RV64-NEXT:    add a6, a6, a7
+; RV64-NEXT:    add a6, sp, a6
+; RV64-NEXT:    addi a6, a6, 32
+; RV64-NEXT:    vs4r.v v12, (a6) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    vand.vx v12, v8, t4
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a6, vlenb
+; RV64-NEXT:    slli a6, a6, 3
+; RV64-NEXT:    mv a7, a6
+; RV64-NEXT:    slli a6, a6, 2
+; RV64-NEXT:    add a6, a6, a7
+; RV64-NEXT:    add a6, sp, a6
+; RV64-NEXT:    addi a6, a6, 32
+; RV64-NEXT:    vs4r.v v12, (a6) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    vand.vx v12, v8, a5
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    mv a6, a5
+; RV64-NEXT:    slli a5, a5, 3
+; RV64-NEXT:    add a5, a5, a6
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 32
+; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    vand.vx v12, v8, t5
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 32
+; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    vand.vx v12, v8, t6
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    mv a6, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add a6, a6, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add a5, a5, a6
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 32
+; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    vand.vx v12, v8, s0
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 3
+; RV64-NEXT:    mv a6, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add a5, a5, a6
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 32
+; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    vand.vx v12, v8, s1
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    mv a6, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add a5, a5, a6
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 32
+; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    vand.vx v12, v8, s2
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 4
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 32
+; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    vand.vx v12, v8, s3
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    mv a6, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add a5, a5, a6
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 32
+; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    vand.vx v12, v8, s4
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 3
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 32
+; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    vand.vx v12, v8, s5
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 32
+; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    vand.vx v12, v8, s6
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    addi a5, sp, 32
+; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    vand.vx v12, v8, s7
+; RV64-NEXT:    vmul.vv v28, v8, v12
+; RV64-NEXT:    vand.vx v12, v8, s8
+; RV64-NEXT:    vmul.vv v24, v8, v12
+; RV64-NEXT:    vand.vx v12, v8, s11
+; RV64-NEXT:    vmul.vv v20, v8, v12
+; RV64-NEXT:    vand.vx v12, v8, s10
+; RV64-NEXT:    vmul.vv v16, v8, v12
+; RV64-NEXT:    vand.vx v12, v8, s9
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    vand.vx v0, v8, a0
+; RV64-NEXT:    vmul.vv v8, v8, v0
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a5, a5, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl4r.v v0, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 5
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v4, v0
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a5, a5, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a5, a5, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a5, a5, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a5, a5, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a5, a5, a0
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 6
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a5, a5, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a5, a5, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a5, a5, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a5, a5, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a5, a5, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a5, a5, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a5, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 32
+; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v4
+; RV64-NEXT:    addi a0, sp, 32
+; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v4, v0, v4
+; RV64-NEXT:    vxor.vv v28, v4, v28
+; RV64-NEXT:    vxor.vv v24, v28, v24
+; RV64-NEXT:    vxor.vv v20, v24, v20
+; RV64-NEXT:    vxor.vv v16, v20, v16
+; RV64-NEXT:    vxor.vv v12, v16, v12
+; RV64-NEXT:    vxor.vv v8, v12, v8
+; RV64-NEXT:    vsrl.vi v12, v8, 8
+; RV64-NEXT:    vsrl.vi v16, v8, 24
+; RV64-NEXT:    vand.vx v12, v12, a4
+; RV64-NEXT:    vor.vv v12, v12, v16
+; RV64-NEXT:    vsll.vi v16, v8, 24
+; RV64-NEXT:    vand.vx v8, v8, a4
+; RV64-NEXT:    vsll.vi v8, v8, 8
+; RV64-NEXT:    vor.vv v8, v16, v8
+; RV64-NEXT:    vor.vv v8, v8, v12
+; RV64-NEXT:    vsrl.vi v12, v8, 4
+; RV64-NEXT:    vand.vx v8, v8, a3
+; RV64-NEXT:    vand.vx v12, v12, a3
+; RV64-NEXT:    vsll.vi v8, v8, 4
+; RV64-NEXT:    vor.vv v8, v12, v8
+; RV64-NEXT:    vsrl.vi v12, v8, 2
+; RV64-NEXT:    vand.vx v8, v8, a2
+; RV64-NEXT:    vand.vx v12, v12, a2
+; RV64-NEXT:    vsll.vi v8, v8, 2
+; RV64-NEXT:    vor.vv v8, v12, v8
+; RV64-NEXT:    vsrl.vi v12, v8, 1
+; RV64-NEXT:    vand.vx v8, v8, a1
+; RV64-NEXT:    vand.vx v12, v12, a1
+; RV64-NEXT:    vadd.vv v8, v8, v8
+; RV64-NEXT:    vor.vv v8, v12, v8
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add sp, sp, a0
+; RV64-NEXT:    ld ra, 136(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s0, 128(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s1, 120(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s2, 112(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s3, 104(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s4, 96(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s5, 88(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s6, 80(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s7, 72(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s8, 64(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s9, 56(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s10, 48(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s11, 40(sp) # 8-byte Folded Reload
+; RV64-NEXT:    addi sp, sp, 144
+; RV64-NEXT:    ret
+  %a = call <16 x i32> @llvm.clmulr.v16i32(<16 x i32> %x, <16 x i32> %y)
+  ret <16 x i32> %a
+}
+
+define <1 x i64> @clmulr_v1i64(<1 x i64> %x, <1 x i64> %y) nounwind {
+; RV32-LABEL: clmulr_v1i64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -352
+; RV32-NEXT:    sw ra, 348(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s0, 344(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s1, 340(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s2, 336(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s3, 332(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s4, 328(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s5, 324(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s6, 320(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s7, 316(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s8, 312(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s9, 308(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s10, 304(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s11, 300(sp) # 4-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    sub sp, sp, a0
+; RV32-NEXT:    lui s7, 1044480
+; RV32-NEXT:    lui a7, 524288
+; RV32-NEXT:    li s11, 1
+; RV32-NEXT:    li s8, 2
+; RV32-NEXT:    li s9, 4
+; RV32-NEXT:    li s10, 8
+; RV32-NEXT:    li a3, 16
+; RV32-NEXT:    li a4, 32
+; RV32-NEXT:    li a5, 64
+; RV32-NEXT:    li a6, 128
+; RV32-NEXT:    li ra, 256
+; RV32-NEXT:    li a0, 512
+; RV32-NEXT:    li a1, 1024
+; RV32-NEXT:    lui a2, 1
+; RV32-NEXT:    lui t0, 2
+; RV32-NEXT:    lui t1, 4
+; RV32-NEXT:    lui t2, 8
+; RV32-NEXT:    lui t3, 16
+; RV32-NEXT:    lui t4, 32
+; RV32-NEXT:    lui t5, 64
+; RV32-NEXT:    lui t6, 128
+; RV32-NEXT:    lui s0, 256
+; RV32-NEXT:    lui s1, 512
+; RV32-NEXT:    lui s2, 1024
+; RV32-NEXT:    lui s3, 2048
+; RV32-NEXT:    lui s4, 4096
+; RV32-NEXT:    lui s5, 8192
+; RV32-NEXT:    lui s6, 16384
+; RV32-NEXT:    sw s7, 272(sp)
+; RV32-NEXT:    lui s7, 32768
+; RV32-NEXT:    sw zero, 276(sp)
+; RV32-NEXT:    sw a7, 264(sp)
+; RV32-NEXT:    sw zero, 268(sp)
+; RV32-NEXT:    sw zero, 256(sp)
+; RV32-NEXT:    sw s11, 260(sp)
+; RV32-NEXT:    sw zero, 248(sp)
+; RV32-NEXT:    sw s8, 252(sp)
+; RV32-NEXT:    lui s8, 65536
+; RV32-NEXT:    sw zero, 240(sp)
+; RV32-NEXT:    sw s9, 244(sp)
+; RV32-NEXT:    lui s9, 131072
+; RV32-NEXT:    sw zero, 232(sp)
+; RV32-NEXT:    sw s10, 236(sp)
+; RV32-NEXT:    lui s10, 262144
+; RV32-NEXT:    sw zero, 224(sp)
+; RV32-NEXT:    sw a3, 228(sp)
+; RV32-NEXT:    sw zero, 216(sp)
+; RV32-NEXT:    sw a4, 220(sp)
+; RV32-NEXT:    sw zero, 208(sp)
+; RV32-NEXT:    sw a5, 212(sp)
+; RV32-NEXT:    sw zero, 200(sp)
+; RV32-NEXT:    sw a6, 204(sp)
+; RV32-NEXT:    sw zero, 192(sp)
+; RV32-NEXT:    sw ra, 196(sp)
+; RV32-NEXT:    sw zero, 184(sp)
+; RV32-NEXT:    sw a0, 188(sp)
+; RV32-NEXT:    sw zero, 176(sp)
+; RV32-NEXT:    sw a1, 180(sp)
+; RV32-NEXT:    slli s11, s11, 11
+; RV32-NEXT:    sw zero, 168(sp)
+; RV32-NEXT:    sw s11, 172(sp)
+; RV32-NEXT:    sw zero, 160(sp)
+; RV32-NEXT:    sw a2, 164(sp)
+; RV32-NEXT:    sw zero, 152(sp)
+; RV32-NEXT:    sw t0, 156(sp)
+; RV32-NEXT:    sw zero, 144(sp)
+; RV32-NEXT:    sw t1, 148(sp)
+; RV32-NEXT:    sw zero, 136(sp)
+; RV32-NEXT:    sw t2, 140(sp)
+; RV32-NEXT:    sw zero, 128(sp)
+; RV32-NEXT:    sw t3, 132(sp)
+; RV32-NEXT:    sw zero, 120(sp)
+; RV32-NEXT:    sw t4, 124(sp)
+; RV32-NEXT:    sw zero, 112(sp)
+; RV32-NEXT:    sw t5, 116(sp)
+; RV32-NEXT:    sw zero, 104(sp)
+; RV32-NEXT:    sw t6, 108(sp)
+; RV32-NEXT:    sw zero, 96(sp)
+; RV32-NEXT:    sw s0, 100(sp)
+; RV32-NEXT:    sw zero, 88(sp)
+; RV32-NEXT:    sw s1, 92(sp)
+; RV32-NEXT:    sw zero, 80(sp)
+; RV32-NEXT:    sw s2, 84(sp)
+; RV32-NEXT:    sw zero, 72(sp)
+; RV32-NEXT:    sw s3, 76(sp)
+; RV32-NEXT:    sw zero, 64(sp)
+; RV32-NEXT:    sw s4, 68(sp)
+; RV32-NEXT:    sw zero, 56(sp)
+; RV32-NEXT:    sw s5, 60(sp)
+; RV32-NEXT:    sw zero, 48(sp)
+; RV32-NEXT:    sw s6, 52(sp)
+; RV32-NEXT:    sw zero, 40(sp)
+; RV32-NEXT:    sw s7, 44(sp)
+; RV32-NEXT:    sw zero, 32(sp)
+; RV32-NEXT:    sw s8, 36(sp)
+; RV32-NEXT:    sw zero, 24(sp)
+; RV32-NEXT:    sw s9, 28(sp)
+; RV32-NEXT:    sw zero, 16(sp)
+; RV32-NEXT:    sw s10, 20(sp)
+; RV32-NEXT:    sw zero, 8(sp)
+; RV32-NEXT:    sw a7, 12(sp)
+; RV32-NEXT:    lui a0, 61681
+; RV32-NEXT:    addi a0, a0, -241
+; RV32-NEXT:    vsetivli zero, 2, e32, m1, ta, ma
+; RV32-NEXT:    vmv.v.x v3, a0
+; RV32-NEXT:    lui a0, 209715
+; RV32-NEXT:    addi a0, a0, 819
+; RV32-NEXT:    vmv.v.x v2, a0
+; RV32-NEXT:    lui a0, 349525
+; RV32-NEXT:    addi a0, a0, 1365
+; RV32-NEXT:    vmv.v.x v1, a0
+; RV32-NEXT:    addi a0, sp, 272
+; RV32-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
+; RV32-NEXT:    vlse64.v v0, (a0), zero
+; RV32-NEXT:    addi a0, sp, 264
+; RV32-NEXT:    vlse64.v v13, (a0), zero
+; RV32-NEXT:    addi a0, sp, 256
+; RV32-NEXT:    vlse64.v v14, (a0), zero
+; RV32-NEXT:    addi a0, sp, 248
+; RV32-NEXT:    vlse64.v v15, (a0), zero
+; RV32-NEXT:    addi a0, sp, 240
+; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    addi a0, sp, 232
+; RV32-NEXT:    vlse64.v v17, (a0), zero
+; RV32-NEXT:    addi a0, sp, 224
+; RV32-NEXT:    vlse64.v v18, (a0), zero
+; RV32-NEXT:    addi a0, sp, 216
+; RV32-NEXT:    vlse64.v v19, (a0), zero
+; RV32-NEXT:    addi a0, sp, 208
+; RV32-NEXT:    vlse64.v v20, (a0), zero
+; RV32-NEXT:    addi a0, sp, 200
+; RV32-NEXT:    vlse64.v v21, (a0), zero
+; RV32-NEXT:    addi a0, sp, 192
+; RV32-NEXT:    vlse64.v v22, (a0), zero
+; RV32-NEXT:    addi a0, sp, 184
+; RV32-NEXT:    vlse64.v v23, (a0), zero
+; RV32-NEXT:    addi a0, sp, 176
+; RV32-NEXT:    vlse64.v v24, (a0), zero
+; RV32-NEXT:    addi a0, sp, 168
+; RV32-NEXT:    vlse64.v v25, (a0), zero
+; RV32-NEXT:    addi a0, sp, 160
+; RV32-NEXT:    vlse64.v v26, (a0), zero
+; RV32-NEXT:    addi a0, sp, 152
+; RV32-NEXT:    vlse64.v v9, (a0), zero
+; RV32-NEXT:    addi a0, sp, 144
+; RV32-NEXT:    vlse64.v v10, (a0), zero
+; RV32-NEXT:    addi a0, sp, 136
+; RV32-NEXT:    vlse64.v v29, (a0), zero
+; RV32-NEXT:    addi a0, sp, 128
+; RV32-NEXT:    vlse64.v v30, (a0), zero
+; RV32-NEXT:    addi a0, sp, 120
+; RV32-NEXT:    vlse64.v v31, (a0), zero
+; RV32-NEXT:    addi a0, sp, 112
+; RV32-NEXT:    vlse64.v v11, (a0), zero
+; RV32-NEXT:    addi a0, sp, 104
+; RV32-NEXT:    vlse64.v v12, (a0), zero
+; RV32-NEXT:    addi a0, sp, 96
+; RV32-NEXT:    vlse64.v v5, (a0), zero
+; RV32-NEXT:    addi a0, sp, 88
+; RV32-NEXT:    vlse64.v v4, (a0), zero
+; RV32-NEXT:    li a6, 56
+; RV32-NEXT:    vsrl.vi v27, v8, 24
+; RV32-NEXT:    vsrl.vx v28, v8, a6
+; RV32-NEXT:    li ra, 40
+; RV32-NEXT:    vsrl.vx v7, v8, ra
+; RV32-NEXT:    vsll.vx v6, v8, a6
+; RV32-NEXT:    addi a4, t3, -256
+; RV32-NEXT:    vand.vx v7, v7, a4
+; RV32-NEXT:    vor.vv v28, v7, v28
+; RV32-NEXT:    vand.vx v7, v8, a4
+; RV32-NEXT:    vsll.vx v7, v7, ra
+; RV32-NEXT:    vor.vv v7, v6, v7
+; RV32-NEXT:    vsrl.vi v6, v8, 8
+; RV32-NEXT:    lui a5, 4080
+; RV32-NEXT:    vand.vx v27, v27, a5
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v0, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vv v6, v6, v0
+; RV32-NEXT:    vor.vv v27, v6, v27
+; RV32-NEXT:    addi a3, sp, 80
+; RV32-NEXT:    vlse64.v v6, (a3), zero
+; RV32-NEXT:    vor.vv v27, v27, v28
+; RV32-NEXT:    vand.vx v28, v8, a5
+; RV32-NEXT:    vsll.vi v28, v28, 24
+; RV32-NEXT:    vand.vv v8, v8, v0
+; RV32-NEXT:    vsll.vi v8, v8, 8
+; RV32-NEXT:    vor.vv v8, v28, v8
+; RV32-NEXT:    addi a3, sp, 72
+; RV32-NEXT:    vlse64.v v28, (a3), zero
+; RV32-NEXT:    vor.vv v8, v7, v8
+; RV32-NEXT:    addi a3, sp, 64
+; RV32-NEXT:    vlse64.v v7, (a3), zero
+; RV32-NEXT:    vor.vv v8, v8, v27
+; RV32-NEXT:    vsrl.vi v27, v8, 4
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v3, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vv v8, v8, v3
+; RV32-NEXT:    vand.vv v27, v27, v3
+; RV32-NEXT:    vsll.vi v8, v8, 4
+; RV32-NEXT:    vor.vv v8, v27, v8
+; RV32-NEXT:    vsrl.vi v27, v8, 2
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v2, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vv v8, v8, v2
+; RV32-NEXT:    vand.vv v27, v27, v2
+; RV32-NEXT:    vsll.vi v8, v8, 2
+; RV32-NEXT:    vor.vv v8, v27, v8
+; RV32-NEXT:    vsrl.vi v27, v8, 1
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v1, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vv v8, v8, v1
+; RV32-NEXT:    vand.vv v27, v27, v1
+; RV32-NEXT:    vadd.vv v8, v8, v8
+; RV32-NEXT:    vor.vv v8, v27, v8
+; RV32-NEXT:    addi a3, sp, 56
+; RV32-NEXT:    vlse64.v v27, (a3), zero
+; RV32-NEXT:    vand.vv v13, v8, v13
+; RV32-NEXT:    vand.vv v14, v8, v14
+; RV32-NEXT:    vand.vv v15, v8, v15
+; RV32-NEXT:    vand.vv v16, v8, v16
+; RV32-NEXT:    vand.vv v17, v8, v17
+; RV32-NEXT:    vand.vv v18, v8, v18
+; RV32-NEXT:    vand.vv v19, v8, v19
+; RV32-NEXT:    vand.vv v20, v8, v20
+; RV32-NEXT:    vand.vv v21, v8, v21
+; RV32-NEXT:    vand.vv v22, v8, v22
+; RV32-NEXT:    vand.vv v23, v8, v23
+; RV32-NEXT:    vand.vv v24, v8, v24
+; RV32-NEXT:    vand.vv v25, v8, v25
+; RV32-NEXT:    vand.vv v26, v8, v26
+; RV32-NEXT:    vand.vv v3, v8, v9
+; RV32-NEXT:    vand.vv v2, v8, v10
+; RV32-NEXT:    vand.vv v29, v8, v29
+; RV32-NEXT:    vand.vv v30, v8, v30
+; RV32-NEXT:    vand.vv v31, v8, v31
+; RV32-NEXT:    vand.vv v0, v8, v11
+; RV32-NEXT:    vand.vv v9, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vv v5, v8, v5
+; RV32-NEXT:    vand.vv v4, v8, v4
+; RV32-NEXT:    vand.vv v6, v8, v6
+; RV32-NEXT:    vand.vv v9, v8, v28
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    addi a3, sp, 48
+; RV32-NEXT:    addi a0, sp, 40
+; RV32-NEXT:    vlse64.v v9, (a3), zero
+; RV32-NEXT:    vlse64.v v10, (a0), zero
+; RV32-NEXT:    vand.vv v11, v8, v7
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v11, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vv v11, v8, v27
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v11, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vv v9, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    addi a2, sp, 32
+; RV32-NEXT:    addi a3, sp, 24
+; RV32-NEXT:    addi a1, sp, 16
+; RV32-NEXT:    addi a0, sp, 8
+; RV32-NEXT:    vlse64.v v9, (a2), zero
+; RV32-NEXT:    vlse64.v v10, (a3), zero
+; RV32-NEXT:    vlse64.v v11, (a1), zero
+; RV32-NEXT:    vlse64.v v12, (a0), zero
+; RV32-NEXT:    vand.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a1, a0, 5
+; RV32-NEXT:    add a0, a1, a0
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vv v9, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vv v9, v8, v11
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a1, a0, 5
+; RV32-NEXT:    sub a0, a1, a0
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vv v9, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vi v9, v8, 2
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vi v9, v8, 1
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vi v9, v8, 4
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vi v9, v8, 8
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    li a0, 16
+; RV32-NEXT:    vand.vx v9, v8, a0
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    li a0, 32
+; RV32-NEXT:    vand.vx v9, v8, a0
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    li a0, 64
+; RV32-NEXT:    vand.vx v9, v8, a0
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    li a0, 128
+; RV32-NEXT:    vand.vx v9, v8, a0
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    li a0, 256
+; RV32-NEXT:    vand.vx v9, v8, a0
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    li a0, 512
+; RV32-NEXT:    vand.vx v9, v8, a0
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    li a0, 1024
+; RV32-NEXT:    vand.vx v9, v8, a0
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vx v9, v8, s11
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    lui a0, 1
+; RV32-NEXT:    vand.vx v9, v8, a0
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vx v9, v8, t0
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a1, a0, 4
+; RV32-NEXT:    add a0, a1, a0
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vx v9, v8, t1
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vx v9, v8, t2
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a1, a0, 4
+; RV32-NEXT:    sub a0, a1, a0
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vx v9, v8, t3
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vx v9, v8, t4
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vx v9, v8, t5
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vx v9, v8, t6
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vx v9, v8, s0
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vx v9, v8, s1
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a1, a0, 3
+; RV32-NEXT:    add a0, a1, a0
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vx v9, v8, s2
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vx v9, v8, s3
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a1, a0, 3
+; RV32-NEXT:    sub a0, a1, a0
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vx v9, v8, s4
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vx v9, v8, s5
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a1, a0, 2
+; RV32-NEXT:    add a0, a1, a0
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vx v9, v8, s6
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vx v9, v8, s7
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a1, a0, 1
+; RV32-NEXT:    add a0, a1, a0
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vx v9, v8, s8
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vx v9, v8, s9
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vx v1, v8, s10
+; RV32-NEXT:    vmul.vv v1, v8, v1
+; RV32-NEXT:    vmul.vv v9, v8, v13
+; RV32-NEXT:    addi a0, sp, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vmul.vv v10, v8, v14
+; RV32-NEXT:    vmul.vv v11, v8, v15
+; RV32-NEXT:    vmul.vv v12, v8, v16
+; RV32-NEXT:    vmul.vv v13, v8, v17
+; RV32-NEXT:    vmul.vv v14, v8, v18
+; RV32-NEXT:    vmul.vv v15, v8, v19
+; RV32-NEXT:    vmul.vv v16, v8, v20
+; RV32-NEXT:    vmul.vv v17, v8, v21
+; RV32-NEXT:    vmul.vv v18, v8, v22
+; RV32-NEXT:    vmul.vv v19, v8, v23
+; RV32-NEXT:    vmul.vv v20, v8, v24
+; RV32-NEXT:    vmul.vv v21, v8, v25
+; RV32-NEXT:    vmul.vv v22, v8, v26
+; RV32-NEXT:    vmul.vv v23, v8, v3
+; RV32-NEXT:    vmul.vv v24, v8, v2
+; RV32-NEXT:    vmul.vv v25, v8, v29
+; RV32-NEXT:    vmul.vv v26, v8, v30
+; RV32-NEXT:    vmul.vv v27, v8, v31
+; RV32-NEXT:    vmul.vv v28, v8, v0
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v29, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vmul.vv v29, v8, v29
+; RV32-NEXT:    vmul.vv v30, v8, v5
+; RV32-NEXT:    vmul.vv v31, v8, v4
+; RV32-NEXT:    vmul.vv v7, v8, v6
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v6, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vmul.vv v6, v8, v6
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v5, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vmul.vv v5, v8, v5
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v4, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vmul.vv v4, v8, v4
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v3, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vmul.vv v3, v8, v3
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vmul.vv v2, v8, v2
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a1, a0, 5
+; RV32-NEXT:    add a0, a1, a0
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vmul.vv v0, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a1, a0, 5
+; RV32-NEXT:    sub a0, a1, a0
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vmul.vv v8, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vi v8, v8, 0
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a1, a0, 4
+; RV32-NEXT:    add a0, a1, a0
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a1, a0, 4
+; RV32-NEXT:    sub a0, a1, a0
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a1, a0, 3
+; RV32-NEXT:    add a0, a1, a0
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a1, a0, 3
+; RV32-NEXT:    sub a0, a1, a0
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a1, a0, 2
+; RV32-NEXT:    add a0, a1, a0
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a1, a0, 1
+; RV32-NEXT:    add a0, a1, a0
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    vxor.vv v8, v8, v1
+; RV32-NEXT:    addi a0, sp, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    vxor.vv v8, v8, v11
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    vxor.vv v8, v8, v13
+; RV32-NEXT:    vxor.vv v8, v8, v14
+; RV32-NEXT:    vxor.vv v8, v8, v15
+; RV32-NEXT:    vxor.vv v8, v8, v16
+; RV32-NEXT:    vxor.vv v8, v8, v17
+; RV32-NEXT:    vxor.vv v8, v8, v18
+; RV32-NEXT:    vxor.vv v8, v8, v19
+; RV32-NEXT:    vxor.vv v8, v8, v20
+; RV32-NEXT:    vxor.vv v8, v8, v21
+; RV32-NEXT:    vxor.vv v8, v8, v22
+; RV32-NEXT:    vxor.vv v8, v8, v23
+; RV32-NEXT:    vxor.vv v8, v8, v24
+; RV32-NEXT:    vxor.vv v8, v8, v25
+; RV32-NEXT:    vxor.vv v8, v8, v26
+; RV32-NEXT:    vxor.vv v8, v8, v27
+; RV32-NEXT:    vxor.vv v8, v8, v28
+; RV32-NEXT:    vxor.vv v8, v8, v29
+; RV32-NEXT:    vxor.vv v8, v8, v30
+; RV32-NEXT:    vxor.vv v8, v8, v31
+; RV32-NEXT:    vxor.vv v8, v8, v7
+; RV32-NEXT:    vxor.vv v8, v8, v6
+; RV32-NEXT:    vxor.vv v8, v8, v5
+; RV32-NEXT:    vxor.vv v8, v8, v4
+; RV32-NEXT:    vxor.vv v8, v8, v3
+; RV32-NEXT:    vxor.vv v8, v8, v2
+; RV32-NEXT:    vxor.vv v8, v8, v0
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    vsrl.vx v9, v8, a6
+; RV32-NEXT:    vsll.vx v10, v8, a6
+; RV32-NEXT:    vsrl.vx v11, v8, ra
+; RV32-NEXT:    vand.vx v12, v8, a4
+; RV32-NEXT:    vand.vx v11, v11, a4
+; RV32-NEXT:    vsrl.vi v13, v8, 24
+; RV32-NEXT:    vand.vx v14, v8, a5
+; RV32-NEXT:    vand.vx v13, v13, a5
+; RV32-NEXT:    vsll.vx v12, v12, ra
+; RV32-NEXT:    vsrl.vi v15, v8, 8
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v16, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vand.vv v8, v8, v16
+; RV32-NEXT:    vand.vv v15, v15, v16
+; RV32-NEXT:    vor.vv v9, v11, v9
+; RV32-NEXT:    vor.vv v11, v15, v13
+; RV32-NEXT:    vsll.vi v8, v8, 8
+; RV32-NEXT:    vsll.vi v13, v14, 24
+; RV32-NEXT:    vor.vv v8, v13, v8
+; RV32-NEXT:    vor.vv v10, v10, v12
+; RV32-NEXT:    vor.vv v9, v11, v9
+; RV32-NEXT:    vor.vv v8, v10, v8
+; RV32-NEXT:    vor.vv v8, v8, v9
+; RV32-NEXT:    vsrl.vi v9, v8, 4
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v10, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vand.vv v8, v8, v10
+; RV32-NEXT:    vand.vv v9, v9, v10
+; RV32-NEXT:    vsll.vi v8, v8, 4
+; RV32-NEXT:    vor.vv v8, v9, v8
+; RV32-NEXT:    vsrl.vi v9, v8, 2
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v10, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vand.vv v8, v8, v10
+; RV32-NEXT:    vand.vv v9, v9, v10
+; RV32-NEXT:    vsll.vi v8, v8, 2
+; RV32-NEXT:    vor.vv v8, v9, v8
+; RV32-NEXT:    vsrl.vi v9, v8, 1
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v10, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vand.vv v8, v8, v10
+; RV32-NEXT:    vand.vv v9, v9, v10
+; RV32-NEXT:    vadd.vv v8, v8, v8
+; RV32-NEXT:    vor.vv v8, v9, v8
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add sp, sp, a0
+; RV32-NEXT:    lw ra, 348(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s0, 344(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s1, 340(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s2, 336(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s3, 332(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s4, 328(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s5, 324(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s6, 320(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s7, 316(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s8, 312(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s9, 308(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s10, 304(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s11, 300(sp) # 4-byte Folded Reload
+; RV32-NEXT:    addi sp, sp, 352
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: clmulr_v1i64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    addi sp, sp, -224
+; RV64-NEXT:    sd ra, 216(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s0, 208(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s1, 200(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s2, 192(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s3, 184(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s4, 176(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s5, 168(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s6, 160(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s7, 152(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s8, 144(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s9, 136(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s10, 128(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s11, 120(sp) # 8-byte Folded Spill
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    sub sp, sp, a0
+; RV64-NEXT:    li s11, 56
+; RV64-NEXT:    li ra, 40
+; RV64-NEXT:    lui a0, 16
+; RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT:    vsrl.vi v10, v8, 24
+; RV64-NEXT:    vsrl.vi v9, v8, 8
+; RV64-NEXT:    li t2, 255
+; RV64-NEXT:    lui t6, 61681
+; RV64-NEXT:    lui s0, 209715
+; RV64-NEXT:    lui s1, 349525
+; RV64-NEXT:    li s10, 16
+; RV64-NEXT:    li s9, 32
+; RV64-NEXT:    li s8, 64
+; RV64-NEXT:    li s7, 128
+; RV64-NEXT:    li s5, 256
+; RV64-NEXT:    li t5, 512
+; RV64-NEXT:    li t3, 1024
+; RV64-NEXT:    li t0, 1
+; RV64-NEXT:    lui s6, 1
+; RV64-NEXT:    lui s4, 2
+; RV64-NEXT:    lui t4, 4
+; RV64-NEXT:    lui t1, 8
+; RV64-NEXT:    lui a7, 32
+; RV64-NEXT:    lui a6, 64
+; RV64-NEXT:    lui a5, 128
+; RV64-NEXT:    lui a4, 256
+; RV64-NEXT:    lui a3, 512
+; RV64-NEXT:    lui a2, 1024
+; RV64-NEXT:    vsrl.vx v11, v8, s11
+; RV64-NEXT:    vsrl.vx v12, v8, ra
+; RV64-NEXT:    addi t6, t6, -241
+; RV64-NEXT:    addi s2, s0, 819
+; RV64-NEXT:    addi s3, s1, 1365
+; RV64-NEXT:    slli s1, t6, 32
+; RV64-NEXT:    add s1, t6, s1
+; RV64-NEXT:    slli t6, s2, 32
+; RV64-NEXT:    add s2, s2, t6
+; RV64-NEXT:    slli t6, s3, 32
+; RV64-NEXT:    add s3, s3, t6
+; RV64-NEXT:    addi s0, a0, -256
+; RV64-NEXT:    lui a1, 16
+; RV64-NEXT:    lui a0, 4080
+; RV64-NEXT:    vand.vx v10, v10, a0
+; RV64-NEXT:    slli t6, t2, 24
+; RV64-NEXT:    vand.vx v13, v8, a0
+; RV64-NEXT:    vsll.vx v14, v8, s11
+; RV64-NEXT:    vand.vx v12, v12, s0
+; RV64-NEXT:    vand.vx v9, v9, t6
+; RV64-NEXT:    vsll.vi v13, v13, 24
+; RV64-NEXT:    vand.vx v15, v8, t6
+; RV64-NEXT:    vand.vx v8, v8, s0
+; RV64-NEXT:    vor.vv v11, v12, v11
+; RV64-NEXT:    vor.vv v9, v9, v10
+; RV64-NEXT:    vsll.vi v10, v15, 8
+; RV64-NEXT:    vsll.vx v8, v8, ra
+; RV64-NEXT:    vor.vv v9, v9, v11
+; RV64-NEXT:    vor.vv v10, v13, v10
+; RV64-NEXT:    vor.vv v8, v14, v8
+; RV64-NEXT:    vor.vv v8, v8, v10
+; RV64-NEXT:    vor.vv v8, v8, v9
+; RV64-NEXT:    vsrl.vi v9, v8, 4
+; RV64-NEXT:    vand.vx v8, v8, s1
+; RV64-NEXT:    vand.vx v9, v9, s1
+; RV64-NEXT:    vsll.vi v8, v8, 4
+; RV64-NEXT:    vor.vv v8, v9, v8
+; RV64-NEXT:    vsrl.vi v9, v8, 2
+; RV64-NEXT:    vand.vx v8, v8, s2
+; RV64-NEXT:    vand.vx v9, v9, s2
+; RV64-NEXT:    vsll.vi v8, v8, 2
+; RV64-NEXT:    vor.vv v8, v9, v8
+; RV64-NEXT:    vsrl.vi v9, v8, 1
+; RV64-NEXT:    vand.vx v8, v8, s3
+; RV64-NEXT:    vand.vx v9, v9, s3
+; RV64-NEXT:    vadd.vv v8, v8, v8
+; RV64-NEXT:    vor.vv v8, v9, v8
+; RV64-NEXT:    vand.vx v9, v8, s10
+; RV64-NEXT:    lui t2, 4096
+; RV64-NEXT:    vand.vx v10, v8, s9
+; RV64-NEXT:    lui s9, 8192
+; RV64-NEXT:    vand.vx v11, v8, s8
+; RV64-NEXT:    lui s8, 16384
+; RV64-NEXT:    vand.vx v12, v8, s7
+; RV64-NEXT:    lui s10, 32768
+; RV64-NEXT:    vand.vx v13, v8, s5
+; RV64-NEXT:    lui s11, 65536
+; RV64-NEXT:    vand.vx v14, v8, t5
+; RV64-NEXT:    lui t5, 131072
+; RV64-NEXT:    vand.vx v15, v8, t3
+; RV64-NEXT:    slli t3, t0, 11
+; RV64-NEXT:    vand.vx v16, v8, t3
+; RV64-NEXT:    lui t3, 262144
+; RV64-NEXT:    vand.vx v17, v8, s6
+; RV64-NEXT:    slli a0, t0, 31
+; RV64-NEXT:    sd a0, 96(sp) # 8-byte Folded Spill
+; RV64-NEXT:    vand.vx v18, v8, s4
+; RV64-NEXT:    slli a0, t0, 32
+; RV64-NEXT:    sd a0, 88(sp) # 8-byte Folded Spill
+; RV64-NEXT:    vand.vx v19, v8, t4
+; RV64-NEXT:    slli a0, t0, 33
+; RV64-NEXT:    sd a0, 80(sp) # 8-byte Folded Spill
+; RV64-NEXT:    vand.vx v20, v8, t1
+; RV64-NEXT:    slli a0, t0, 34
+; RV64-NEXT:    sd a0, 72(sp) # 8-byte Folded Spill
+; RV64-NEXT:    vand.vx v21, v8, a1
+; RV64-NEXT:    slli a0, t0, 35
+; RV64-NEXT:    sd a0, 64(sp) # 8-byte Folded Spill
+; RV64-NEXT:    vand.vx v22, v8, a7
+; RV64-NEXT:    slli a0, t0, 36
+; RV64-NEXT:    sd a0, 56(sp) # 8-byte Folded Spill
+; RV64-NEXT:    vand.vx v23, v8, a6
+; RV64-NEXT:    slli a0, t0, 37
+; RV64-NEXT:    sd a0, 48(sp) # 8-byte Folded Spill
+; RV64-NEXT:    vand.vx v24, v8, a5
+; RV64-NEXT:    slli a0, t0, 38
+; RV64-NEXT:    sd a0, 40(sp) # 8-byte Folded Spill
+; RV64-NEXT:    vand.vx v25, v8, a4
+; RV64-NEXT:    slli a0, t0, 39
+; RV64-NEXT:    sd a0, 32(sp) # 8-byte Folded Spill
+; RV64-NEXT:    vand.vx v26, v8, a3
+; RV64-NEXT:    slli a0, t0, 40
+; RV64-NEXT:    sd a0, 24(sp) # 8-byte Folded Spill
+; RV64-NEXT:    vand.vx v27, v8, a2
+; RV64-NEXT:    slli a0, t0, 41
+; RV64-NEXT:    sd a0, 16(sp) # 8-byte Folded Spill
+; RV64-NEXT:    lui a0, 2048
+; RV64-NEXT:    vand.vx v28, v8, a0
+; RV64-NEXT:    slli s5, t0, 42
+; RV64-NEXT:    vand.vx v29, v8, t2
+; RV64-NEXT:    slli s6, t0, 43
+; RV64-NEXT:    vand.vx v30, v8, s9
+; RV64-NEXT:    slli s7, t0, 44
+; RV64-NEXT:    vand.vx v31, v8, s8
+; RV64-NEXT:    slli s8, t0, 45
+; RV64-NEXT:    vand.vx v7, v8, s10
+; RV64-NEXT:    slli s9, t0, 46
+; RV64-NEXT:    vand.vx v6, v8, s11
+; RV64-NEXT:    slli s10, t0, 47
+; RV64-NEXT:    vand.vx v5, v8, t5
+; RV64-NEXT:    slli s11, t0, 48
+; RV64-NEXT:    vand.vx v0, v8, t3
+; RV64-NEXT:    slli ra, t0, 49
+; RV64-NEXT:    slli t5, t0, 50
+; RV64-NEXT:    slli t4, t0, 51
+; RV64-NEXT:    slli t3, t0, 52
+; RV64-NEXT:    slli t2, t0, 53
+; RV64-NEXT:    slli t1, t0, 54
+; RV64-NEXT:    slli a7, t0, 55
+; RV64-NEXT:    slli a6, t0, 56
+; RV64-NEXT:    slli a5, t0, 57
+; RV64-NEXT:    slli a4, t0, 58
+; RV64-NEXT:    slli a3, t0, 59
+; RV64-NEXT:    slli a2, t0, 60
+; RV64-NEXT:    slli a1, t0, 61
+; RV64-NEXT:    slli t0, t0, 62
+; RV64-NEXT:    li a0, -1
+; RV64-NEXT:    slli a0, a0, 63
+; RV64-NEXT:    vand.vi v4, v8, 2
+; RV64-NEXT:    vand.vi v3, v8, 1
+; RV64-NEXT:    vand.vi v2, v8, 4
+; RV64-NEXT:    vand.vi v1, v8, 8
+; RV64-NEXT:    vmul.vv v4, v8, v4
+; RV64-NEXT:    sd t6, 8(sp) # 8-byte Folded Spill
+; RV64-NEXT:    csrr t6, vlenb
+; RV64-NEXT:    slli s4, t6, 5
+; RV64-NEXT:    add t6, s4, t6
+; RV64-NEXT:    add t6, sp, t6
+; RV64-NEXT:    addi t6, t6, 112
+; RV64-NEXT:    vs1r.v v4, (t6) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    vmul.vv v4, v8, v3
+; RV64-NEXT:    csrr t6, vlenb
+; RV64-NEXT:    slli t6, t6, 5
+; RV64-NEXT:    add t6, sp, t6
+; RV64-NEXT:    addi t6, t6, 112
+; RV64-NEXT:    vs1r.v v4, (t6) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    vmul.vv v4, v8, v2
+; RV64-NEXT:    csrr t6, vlenb
+; RV64-NEXT:    slli s4, t6, 5
+; RV64-NEXT:    sub t6, s4, t6
+; RV64-NEXT:    add t6, sp, t6
+; RV64-NEXT:    addi t6, t6, 112
+; RV64-NEXT:    vs1r.v v4, (t6) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    vmul.vv v4, v8, v1
+; RV64-NEXT:    csrr t6, vlenb
+; RV64-NEXT:    slli t6, t6, 1
+; RV64-NEXT:    mv s4, t6
+; RV64-NEXT:    slli t6, t6, 1
+; RV64-NEXT:    add s4, s4, t6
+; RV64-NEXT:    slli t6, t6, 1
+; RV64-NEXT:    add s4, s4, t6
+; RV64-NEXT:    slli t6, t6, 1
+; RV64-NEXT:    add t6, t6, s4
+; RV64-NEXT:    add t6, sp, t6
+; RV64-NEXT:    addi t6, t6, 112
+; RV64-NEXT:    vs1r.v v4, (t6) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    vmul.vv v9, v8, v9
+; RV64-NEXT:    csrr t6, vlenb
+; RV64-NEXT:    mv s4, t6
+; RV64-NEXT:    slli t6, t6, 2
+; RV64-NEXT:    add s4, s4, t6
+; RV64-NEXT:    slli t6, t6, 1
+; RV64-NEXT:    add s4, s4, t6
+; RV64-NEXT:    slli t6, t6, 1
+; RV64-NEXT:    add t6, t6, s4
+; RV64-NEXT:    add t6, sp, t6
+; RV64-NEXT:    addi t6, t6, 112
+; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    vmul.vv v9, v8, v10
+; RV64-NEXT:    csrr t6, vlenb
+; RV64-NEXT:    slli t6, t6, 2
+; RV64-NEXT:    mv s4, t6
+; RV64-NEXT:    slli t6, t6, 1
+; RV64-NEXT:    add s4, s4, t6
+; RV64-NEXT:    slli t6, t6, 1
+; RV64-NEXT:    add t6, t6, s4
+; RV64-NEXT:    add t6, sp, t6
+; RV64-NEXT:    addi t6, t6, 112
+; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    vmul.vv v9, v8, v11
+; RV64-NEXT:    csrr t6, vlenb
+; RV64-NEXT:    mv s4, t6
+; RV64-NEXT:    slli t6, t6, 1
+; RV64-NEXT:    add s4, s4, t6
+; RV64-NEXT:    slli t6, t6, 2
+; RV64-NEXT:    add s4, s4, t6
+; RV64-NEXT:    slli t6, t6, 1
+; RV64-NEXT:    add t6, t6, s4
+; RV64-NEXT:    add t6, sp, t6
+; RV64-NEXT:    addi t6, t6, 112
+; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    vmul.vv v9, v8, v12
+; RV64-NEXT:    csrr t6, vlenb
+; RV64-NEXT:    slli t6, t6, 1
+; RV64-NEXT:    mv s4, t6
+; RV64-NEXT:    slli t6, t6, 2
+; RV64-NEXT:    add s4, s4, t6
+; RV64-NEXT:    slli t6, t6, 1
+; RV64-NEXT:    add t6, t6, s4
+; RV64-NEXT:    add t6, sp, t6
+; RV64-NEXT:    addi t6, t6, 112
+; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    vmul.vv v9, v8, v13
+; RV64-NEXT:    csrr t6, vlenb
+; RV64-NEXT:    mv s4, t6
+; RV64-NEXT:    slli t6, t6, 3
+; RV64-NEXT:    add s4, s4, t6
+; RV64-NEXT:    slli t6, t6, 1
+; RV64-NEXT:    add t6, t6, s4
+; RV64-NEXT:    add t6, sp, t6
+; RV64-NEXT:    addi t6, t6, 112
+; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    vmul.vv v9, v8, v14
+; RV64-NEXT:    csrr t6, vlenb
+; RV64-NEXT:    slli t6, t6, 3
+; RV64-NEXT:    mv s4, t6
+; RV64-NEXT:    slli t6, t6, 1
+; RV64-NEXT:    add t6, t6, s4
+; RV64-NEXT:    add t6, sp, t6
+; RV64-NEXT:    addi t6, t6, 112
+; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    vmul.vv v9, v8, v15
+; RV64-NEXT:    csrr t6, vlenb
+; RV64-NEXT:    mv s4, t6
+; RV64-NEXT:    slli t6, t6, 1
+; RV64-NEXT:    add s4, s4, t6
+; RV64-NEXT:    slli t6, t6, 1
+; RV64-NEXT:    add s4, s4, t6
+; RV64-NEXT:    slli t6, t6, 2
+; RV64-NEXT:    add t6, t6, s4
+; RV64-NEXT:    add t6, sp, t6
+; RV64-NEXT:    addi t6, t6, 112
+; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    vmul.vv v9, v8, v16
+; RV64-NEXT:    csrr t6, vlenb
+; RV64-NEXT:    slli t6, t6, 1
+; RV64-NEXT:    mv s4, t6
+; RV64-NEXT:    slli t6, t6, 1
+; RV64-NEXT:    add s4, s4, t6
+; RV64-NEXT:    slli t6, t6, 2
+; RV64-NEXT:    add t6, t6, s4
+; RV64-NEXT:    add t6, sp, t6
+; RV64-NEXT:    addi t6, t6, 112
+; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    vmul.vv v9, v8, v17
+; RV64-NEXT:    csrr t6, vlenb
+; RV64-NEXT:    mv s4, t6
+; RV64-NEXT:    slli t6, t6, 2
+; RV64-NEXT:    add s4, s4, t6
+; RV64-NEXT:    slli t6, t6, 2
+; RV64-NEXT:    add t6, t6, s4
+; RV64-NEXT:    add t6, sp, t6
+; RV64-NEXT:    addi t6, t6, 112
+; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    vmul.vv v9, v8, v18
+; RV64-NEXT:    csrr t6, vlenb
+; RV64-NEXT:    slli t6, t6, 2
+; RV64-NEXT:    mv s4, t6
+; RV64-NEXT:    slli t6, t6, 2
+; RV64-NEXT:    add t6, t6, s4
+; RV64-NEXT:    add t6, sp, t6
+; RV64-NEXT:    addi t6, t6, 112
+; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    vmul.vv v9, v8, v19
+; RV64-NEXT:    csrr t6, vlenb
+; RV64-NEXT:    mv s4, t6
+; RV64-NEXT:    slli t6, t6, 1
+; RV64-NEXT:    add s4, s4, t6
+; RV64-NEXT:    slli t6, t6, 3
+; RV64-NEXT:    add t6, t6, s4
+; RV64-NEXT:    add t6, sp, t6
+; RV64-NEXT:    addi t6, t6, 112
+; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    vmul.vv v9, v8, v20
+; RV64-NEXT:    csrr t6, vlenb
+; RV64-NEXT:    slli t6, t6, 1
+; RV64-NEXT:    mv s4, t6
+; RV64-NEXT:    slli t6, t6, 3
+; RV64-NEXT:    add t6, t6, s4
+; RV64-NEXT:    add t6, sp, t6
+; RV64-NEXT:    addi t6, t6, 112
+; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    vmul.vv v9, v8, v21
+; RV64-NEXT:    csrr t6, vlenb
+; RV64-NEXT:    slli s4, t6, 4
+; RV64-NEXT:    add t6, s4, t6
+; RV64-NEXT:    add t6, sp, t6
+; RV64-NEXT:    addi t6, t6, 112
+; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    vmul.vv v9, v8, v22
+; RV64-NEXT:    csrr t6, vlenb
+; RV64-NEXT:    slli t6, t6, 4
+; RV64-NEXT:    add t6, sp, t6
+; RV64-NEXT:    addi t6, t6, 112
+; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    vmul.vv v9, v8, v23
+; RV64-NEXT:    csrr t6, vlenb
+; RV64-NEXT:    slli s4, t6, 4
+; RV64-NEXT:    sub t6, s4, t6
+; RV64-NEXT:    add t6, sp, t6
+; RV64-NEXT:    addi t6, t6, 112
+; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    vmul.vv v9, v8, v24
+; RV64-NEXT:    csrr t6, vlenb
+; RV64-NEXT:    slli t6, t6, 1
+; RV64-NEXT:    mv s4, t6
+; RV64-NEXT:    slli t6, t6, 1
+; RV64-NEXT:    add s4, s4, t6
+; RV64-NEXT:    slli t6, t6, 1
+; RV64-NEXT:    add t6, t6, s4
+; RV64-NEXT:    add t6, sp, t6
+; RV64-NEXT:    addi t6, t6, 112
+; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    vmul.vv v9, v8, v25
+; RV64-NEXT:    csrr t6, vlenb
+; RV64-NEXT:    mv s4, t6
+; RV64-NEXT:    slli t6, t6, 2
+; RV64-NEXT:    add s4, s4, t6
+; RV64-NEXT:    slli t6, t6, 1
+; RV64-NEXT:    add t6, t6, s4
+; RV64-NEXT:    add t6, sp, t6
+; RV64-NEXT:    addi t6, t6, 112
+; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    vmul.vv v9, v8, v26
+; RV64-NEXT:    csrr t6, vlenb
+; RV64-NEXT:    slli t6, t6, 2
+; RV64-NEXT:    mv s4, t6
+; RV64-NEXT:    slli t6, t6, 1
+; RV64-NEXT:    add t6, t6, s4
+; RV64-NEXT:    add t6, sp, t6
+; RV64-NEXT:    addi t6, t6, 112
+; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    vmul.vv v9, v8, v27
+; RV64-NEXT:    csrr t6, vlenb
+; RV64-NEXT:    mv s4, t6
+; RV64-NEXT:    slli t6, t6, 1
+; RV64-NEXT:    add s4, s4, t6
+; RV64-NEXT:    slli t6, t6, 2
+; RV64-NEXT:    add t6, t6, s4
+; RV64-NEXT:    add t6, sp, t6
+; RV64-NEXT:    addi t6, t6, 112
+; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    vmul.vv v9, v8, v28
+; RV64-NEXT:    csrr t6, vlenb
+; RV64-NEXT:    slli t6, t6, 1
+; RV64-NEXT:    mv s4, t6
+; RV64-NEXT:    slli t6, t6, 2
+; RV64-NEXT:    add t6, t6, s4
+; RV64-NEXT:    add t6, sp, t6
+; RV64-NEXT:    addi t6, t6, 112
+; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    vmul.vv v9, v8, v29
+; RV64-NEXT:    csrr t6, vlenb
+; RV64-NEXT:    slli s4, t6, 3
+; RV64-NEXT:    add t6, s4, t6
+; RV64-NEXT:    add t6, sp, t6
+; RV64-NEXT:    addi t6, t6, 112
+; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    vmul.vv v9, v8, v30
+; RV64-NEXT:    csrr t6, vlenb
+; RV64-NEXT:    slli t6, t6, 3
+; RV64-NEXT:    add t6, sp, t6
+; RV64-NEXT:    addi t6, t6, 112
+; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    vmul.vv v9, v8, v31
+; RV64-NEXT:    csrr t6, vlenb
+; RV64-NEXT:    slli s4, t6, 3
+; RV64-NEXT:    sub t6, s4, t6
+; RV64-NEXT:    add t6, sp, t6
+; RV64-NEXT:    addi t6, t6, 112
+; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    vmul.vv v9, v8, v7
+; RV64-NEXT:    csrr t6, vlenb
+; RV64-NEXT:    slli t6, t6, 1
+; RV64-NEXT:    mv s4, t6
+; RV64-NEXT:    slli t6, t6, 1
+; RV64-NEXT:    add t6, t6, s4
+; RV64-NEXT:    add t6, sp, t6
+; RV64-NEXT:    addi t6, t6, 112
+; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    vmul.vv v9, v8, v6
+; RV64-NEXT:    csrr t6, vlenb
+; RV64-NEXT:    slli s4, t6, 2
+; RV64-NEXT:    add t6, s4, t6
+; RV64-NEXT:    add t6, sp, t6
+; RV64-NEXT:    addi t6, t6, 112
+; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    vmul.vv v9, v8, v5
+; RV64-NEXT:    csrr t6, vlenb
+; RV64-NEXT:    slli t6, t6, 2
+; RV64-NEXT:    add t6, sp, t6
+; RV64-NEXT:    addi t6, t6, 112
+; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    vmul.vv v9, v8, v0
+; RV64-NEXT:    csrr s4, vlenb
+; RV64-NEXT:    slli t6, s4, 1
+; RV64-NEXT:    add s4, t6, s4
+; RV64-NEXT:    ld t6, 8(sp) # 8-byte Folded Reload
+; RV64-NEXT:    add s4, sp, s4
+; RV64-NEXT:    addi s4, s4, 112
+; RV64-NEXT:    vs1r.v v9, (s4) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    ld s4, 96(sp) # 8-byte Folded Reload
+; RV64-NEXT:    vand.vx v9, v8, s4
+; RV64-NEXT:    vmul.vv v9, v8, v9
+; RV64-NEXT:    csrr s4, vlenb
+; RV64-NEXT:    slli s4, s4, 1
+; RV64-NEXT:    add s4, sp, s4
+; RV64-NEXT:    addi s4, s4, 112
+; RV64-NEXT:    vs1r.v v9, (s4) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    ld s4, 88(sp) # 8-byte Folded Reload
+; RV64-NEXT:    vand.vx v9, v8, s4
+; RV64-NEXT:    vmul.vv v9, v8, v9
+; RV64-NEXT:    csrr s4, vlenb
+; RV64-NEXT:    add s4, sp, s4
+; RV64-NEXT:    addi s4, s4, 112
+; RV64-NEXT:    vs1r.v v9, (s4) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    ld s4, 80(sp) # 8-byte Folded Reload
+; RV64-NEXT:    vand.vx v9, v8, s4
+; RV64-NEXT:    vmul.vv v9, v8, v9
+; RV64-NEXT:    addi s4, sp, 112
+; RV64-NEXT:    vs1r.v v9, (s4) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    ld s4, 72(sp) # 8-byte Folded Reload
+; RV64-NEXT:    vand.vx v9, v8, s4
+; RV64-NEXT:    vmul.vv v3, v8, v9
+; RV64-NEXT:    ld s4, 64(sp) # 8-byte Folded Reload
+; RV64-NEXT:    vand.vx v9, v8, s4
+; RV64-NEXT:    vmul.vv v4, v8, v9
+; RV64-NEXT:    ld s4, 56(sp) # 8-byte Folded Reload
+; RV64-NEXT:    vand.vx v9, v8, s4
+; RV64-NEXT:    vmul.vv v5, v8, v9
+; RV64-NEXT:    ld s4, 48(sp) # 8-byte Folded Reload
+; RV64-NEXT:    vand.vx v9, v8, s4
+; RV64-NEXT:    vmul.vv v6, v8, v9
+; RV64-NEXT:    ld s4, 40(sp) # 8-byte Folded Reload
+; RV64-NEXT:    vand.vx v9, v8, s4
+; RV64-NEXT:    vmul.vv v7, v8, v9
+; RV64-NEXT:    ld s4, 32(sp) # 8-byte Folded Reload
+; RV64-NEXT:    vand.vx v9, v8, s4
+; RV64-NEXT:    vmul.vv v31, v8, v9
+; RV64-NEXT:    ld s4, 24(sp) # 8-byte Folded Reload
+; RV64-NEXT:    vand.vx v9, v8, s4
+; RV64-NEXT:    vmul.vv v30, v8, v9
+; RV64-NEXT:    ld s4, 16(sp) # 8-byte Folded Reload
+; RV64-NEXT:    vand.vx v9, v8, s4
+; RV64-NEXT:    vmul.vv v29, v8, v9
+; RV64-NEXT:    vand.vx v9, v8, s5
+; RV64-NEXT:    vmul.vv v28, v8, v9
+; RV64-NEXT:    vand.vx v9, v8, s6
+; RV64-NEXT:    vmul.vv v27, v8, v9
+; RV64-NEXT:    vand.vx v9, v8, s7
+; RV64-NEXT:    vmul.vv v26, v8, v9
+; RV64-NEXT:    vand.vx v9, v8, s8
+; RV64-NEXT:    vmul.vv v25, v8, v9
+; RV64-NEXT:    vand.vx v9, v8, s9
+; RV64-NEXT:    vmul.vv v24, v8, v9
+; RV64-NEXT:    vand.vx v9, v8, s10
+; RV64-NEXT:    vmul.vv v23, v8, v9
+; RV64-NEXT:    vand.vx v9, v8, s11
+; RV64-NEXT:    vmul.vv v22, v8, v9
+; RV64-NEXT:    vand.vx v9, v8, ra
+; RV64-NEXT:    vmul.vv v21, v8, v9
+; RV64-NEXT:    vand.vx v9, v8, t5
+; RV64-NEXT:    vmul.vv v20, v8, v9
+; RV64-NEXT:    vand.vx v9, v8, t4
+; RV64-NEXT:    vmul.vv v19, v8, v9
+; RV64-NEXT:    vand.vx v9, v8, t3
+; RV64-NEXT:    vmul.vv v18, v8, v9
+; RV64-NEXT:    vand.vx v9, v8, t2
+; RV64-NEXT:    vmul.vv v17, v8, v9
+; RV64-NEXT:    vand.vx v9, v8, t1
+; RV64-NEXT:    vmul.vv v16, v8, v9
+; RV64-NEXT:    vand.vx v9, v8, a7
+; RV64-NEXT:    vmul.vv v15, v8, v9
+; RV64-NEXT:    vand.vx v9, v8, a6
+; RV64-NEXT:    vmul.vv v14, v8, v9
+; RV64-NEXT:    vand.vx v9, v8, a5
+; RV64-NEXT:    vmul.vv v13, v8, v9
+; RV64-NEXT:    vand.vx v9, v8, a4
+; RV64-NEXT:    vmul.vv v12, v8, v9
+; RV64-NEXT:    vand.vx v9, v8, a3
+; RV64-NEXT:    vmul.vv v11, v8, v9
+; RV64-NEXT:    vand.vx v9, v8, a2
+; RV64-NEXT:    vmul.vv v10, v8, v9
+; RV64-NEXT:    vand.vx v9, v8, a1
+; RV64-NEXT:    vmul.vv v9, v8, v9
+; RV64-NEXT:    vand.vx v0, v8, t0
+; RV64-NEXT:    vmul.vv v0, v8, v0
+; RV64-NEXT:    vand.vx v1, v8, a0
+; RV64-NEXT:    vmul.vv v8, v8, v1
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a1, a0, 5
+; RV64-NEXT:    add a0, a1, a0
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl1r.v v1, (a0) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v1, v2, v1
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a1, a0, 5
+; RV64-NEXT:    sub a0, a1, a0
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v1, v1, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v1, v1, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v1, v1, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v1, v1, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v1, v1, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v1, v1, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v1, v1, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v1, v1, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v1, v1, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v1, v1, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v1, v1, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v1, v1, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v1, v1, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v1, v1, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a1, a0, 4
+; RV64-NEXT:    add a0, a1, a0
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v1, v1, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v1, v1, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a1, a0, 4
+; RV64-NEXT:    sub a0, a1, a0
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v1, v1, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v1, v1, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v1, v1, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v1, v1, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v1, v1, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v1, v1, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a1, a0, 3
+; RV64-NEXT:    add a0, a1, a0
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v1, v1, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v1, v1, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a1, a0, 3
+; RV64-NEXT:    sub a0, a1, a0
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v1, v1, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v1, v1, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a1, a0, 2
+; RV64-NEXT:    add a0, a1, a0
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v1, v1, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v1, v1, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a1, a0, 1
+; RV64-NEXT:    add a0, a1, a0
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v1, v1, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v1, v1, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v1, v1, v2
+; RV64-NEXT:    addi a0, sp, 112
+; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v2, v1, v2
+; RV64-NEXT:    vxor.vv v3, v2, v3
+; RV64-NEXT:    vxor.vv v4, v3, v4
+; RV64-NEXT:    vxor.vv v5, v4, v5
+; RV64-NEXT:    vxor.vv v6, v5, v6
+; RV64-NEXT:    vxor.vv v7, v6, v7
+; RV64-NEXT:    vxor.vv v31, v7, v31
+; RV64-NEXT:    vxor.vv v30, v31, v30
+; RV64-NEXT:    vxor.vv v29, v30, v29
+; RV64-NEXT:    vxor.vv v28, v29, v28
+; RV64-NEXT:    vxor.vv v27, v28, v27
+; RV64-NEXT:    vxor.vv v26, v27, v26
+; RV64-NEXT:    vxor.vv v25, v26, v25
+; RV64-NEXT:    vxor.vv v24, v25, v24
+; RV64-NEXT:    vxor.vv v23, v24, v23
+; RV64-NEXT:    vxor.vv v22, v23, v22
+; RV64-NEXT:    vxor.vv v21, v22, v21
+; RV64-NEXT:    vxor.vv v20, v21, v20
+; RV64-NEXT:    vxor.vv v19, v20, v19
+; RV64-NEXT:    vxor.vv v18, v19, v18
+; RV64-NEXT:    vxor.vv v17, v18, v17
+; RV64-NEXT:    vxor.vv v16, v17, v16
+; RV64-NEXT:    vxor.vv v15, v16, v15
+; RV64-NEXT:    vxor.vv v14, v15, v14
+; RV64-NEXT:    vxor.vv v13, v14, v13
+; RV64-NEXT:    vxor.vv v12, v13, v12
+; RV64-NEXT:    vxor.vv v11, v12, v11
+; RV64-NEXT:    vxor.vv v10, v11, v10
+; RV64-NEXT:    vxor.vv v9, v10, v9
+; RV64-NEXT:    vxor.vv v9, v9, v0
+; RV64-NEXT:    vxor.vv v8, v9, v8
+; RV64-NEXT:    li a0, 56
+; RV64-NEXT:    vsrl.vx v9, v8, a0
+; RV64-NEXT:    li a1, 40
+; RV64-NEXT:    vsrl.vx v10, v8, a1
+; RV64-NEXT:    vsrl.vi v11, v8, 24
+; RV64-NEXT:    vsrl.vi v12, v8, 8
+; RV64-NEXT:    vand.vx v10, v10, s0
+; RV64-NEXT:    vor.vv v9, v10, v9
+; RV64-NEXT:    vand.vx v10, v8, t6
+; RV64-NEXT:    lui a2, 4080
+; RV64-NEXT:    vand.vx v11, v11, a2
+; RV64-NEXT:    vand.vx v12, v12, t6
+; RV64-NEXT:    vor.vv v11, v12, v11
+; RV64-NEXT:    vand.vx v12, v8, a2
+; RV64-NEXT:    vsll.vi v10, v10, 8
+; RV64-NEXT:    vsll.vi v12, v12, 24
+; RV64-NEXT:    vor.vv v10, v12, v10
+; RV64-NEXT:    vsll.vx v12, v8, a0
+; RV64-NEXT:    vand.vx v8, v8, s0
+; RV64-NEXT:    vsll.vx v8, v8, a1
+; RV64-NEXT:    vor.vv v8, v12, v8
+; RV64-NEXT:    vor.vv v9, v11, v9
+; RV64-NEXT:    vor.vv v8, v8, v10
+; RV64-NEXT:    vor.vv v8, v8, v9
+; RV64-NEXT:    vsrl.vi v9, v8, 4
+; RV64-NEXT:    vand.vx v8, v8, s1
+; RV64-NEXT:    vand.vx v9, v9, s1
+; RV64-NEXT:    vsll.vi v8, v8, 4
+; RV64-NEXT:    vor.vv v8, v9, v8
+; RV64-NEXT:    vsrl.vi v9, v8, 2
+; RV64-NEXT:    vand.vx v8, v8, s2
+; RV64-NEXT:    vand.vx v9, v9, s2
+; RV64-NEXT:    vsll.vi v8, v8, 2
+; RV64-NEXT:    vor.vv v8, v9, v8
+; RV64-NEXT:    vsrl.vi v9, v8, 1
+; RV64-NEXT:    vand.vx v8, v8, s3
+; RV64-NEXT:    vand.vx v9, v9, s3
+; RV64-NEXT:    vadd.vv v8, v8, v8
+; RV64-NEXT:    vor.vv v8, v9, v8
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add sp, sp, a0
+; RV64-NEXT:    ld ra, 216(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s0, 208(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s1, 200(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s2, 192(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s3, 184(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s4, 176(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s5, 168(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s6, 160(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s7, 152(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s8, 144(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s9, 136(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s10, 128(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s11, 120(sp) # 8-byte Folded Reload
+; RV64-NEXT:    addi sp, sp, 224
+; RV64-NEXT:    ret
+  %a = call <1 x i64> @llvm.clmulr.v1i64(<1 x i64> %x, <1 x i64> %y)
+  ret <1 x i64> %a
+}
+
+define <2 x i64> @clmulr_v2i64(<2 x i64> %x, <2 x i64> %y) nounwind {
+; RV32-LABEL: clmulr_v2i64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -352
+; RV32-NEXT:    sw ra, 348(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s0, 344(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s1, 340(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s2, 336(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s3, 332(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s4, 328(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s5, 324(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s6, 320(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s7, 316(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s8, 312(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s9, 308(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s10, 304(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s11, 300(sp) # 4-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    sub sp, sp, a0
+; RV32-NEXT:    lui s7, 1044480
+; RV32-NEXT:    lui a7, 524288
+; RV32-NEXT:    li s11, 1
+; RV32-NEXT:    li s8, 2
+; RV32-NEXT:    li s9, 4
+; RV32-NEXT:    li s10, 8
+; RV32-NEXT:    li a3, 16
+; RV32-NEXT:    li a4, 32
+; RV32-NEXT:    li a5, 64
+; RV32-NEXT:    li a6, 128
+; RV32-NEXT:    li ra, 256
+; RV32-NEXT:    li a0, 512
+; RV32-NEXT:    li a1, 1024
+; RV32-NEXT:    lui a2, 1
+; RV32-NEXT:    lui t0, 2
+; RV32-NEXT:    lui t1, 4
+; RV32-NEXT:    lui t2, 8
+; RV32-NEXT:    lui t3, 16
+; RV32-NEXT:    lui t4, 32
+; RV32-NEXT:    lui t5, 64
+; RV32-NEXT:    lui t6, 128
+; RV32-NEXT:    lui s0, 256
+; RV32-NEXT:    lui s1, 512
+; RV32-NEXT:    lui s2, 1024
+; RV32-NEXT:    lui s3, 2048
+; RV32-NEXT:    lui s4, 4096
+; RV32-NEXT:    lui s5, 8192
+; RV32-NEXT:    lui s6, 16384
+; RV32-NEXT:    sw s7, 272(sp)
+; RV32-NEXT:    lui s7, 32768
+; RV32-NEXT:    sw zero, 276(sp)
+; RV32-NEXT:    sw a7, 264(sp)
+; RV32-NEXT:    sw zero, 268(sp)
+; RV32-NEXT:    sw zero, 256(sp)
+; RV32-NEXT:    sw s11, 260(sp)
+; RV32-NEXT:    sw zero, 248(sp)
+; RV32-NEXT:    sw s8, 252(sp)
+; RV32-NEXT:    lui s8, 65536
+; RV32-NEXT:    sw zero, 240(sp)
+; RV32-NEXT:    sw s9, 244(sp)
+; RV32-NEXT:    lui s9, 131072
+; RV32-NEXT:    sw zero, 232(sp)
+; RV32-NEXT:    sw s10, 236(sp)
+; RV32-NEXT:    lui s10, 262144
+; RV32-NEXT:    sw zero, 224(sp)
+; RV32-NEXT:    sw a3, 228(sp)
+; RV32-NEXT:    sw zero, 216(sp)
+; RV32-NEXT:    sw a4, 220(sp)
+; RV32-NEXT:    sw zero, 208(sp)
+; RV32-NEXT:    sw a5, 212(sp)
+; RV32-NEXT:    sw zero, 200(sp)
+; RV32-NEXT:    sw a6, 204(sp)
+; RV32-NEXT:    sw zero, 192(sp)
+; RV32-NEXT:    sw ra, 196(sp)
+; RV32-NEXT:    sw zero, 184(sp)
+; RV32-NEXT:    sw a0, 188(sp)
+; RV32-NEXT:    sw zero, 176(sp)
+; RV32-NEXT:    sw a1, 180(sp)
+; RV32-NEXT:    slli s11, s11, 11
+; RV32-NEXT:    sw zero, 168(sp)
+; RV32-NEXT:    sw s11, 172(sp)
+; RV32-NEXT:    sw zero, 160(sp)
+; RV32-NEXT:    sw a2, 164(sp)
+; RV32-NEXT:    sw zero, 152(sp)
+; RV32-NEXT:    sw t0, 156(sp)
+; RV32-NEXT:    sw zero, 144(sp)
+; RV32-NEXT:    sw t1, 148(sp)
+; RV32-NEXT:    sw zero, 136(sp)
+; RV32-NEXT:    sw t2, 140(sp)
+; RV32-NEXT:    sw zero, 128(sp)
+; RV32-NEXT:    sw t3, 132(sp)
+; RV32-NEXT:    sw zero, 120(sp)
+; RV32-NEXT:    sw t4, 124(sp)
+; RV32-NEXT:    sw zero, 112(sp)
+; RV32-NEXT:    sw t5, 116(sp)
+; RV32-NEXT:    sw zero, 104(sp)
+; RV32-NEXT:    sw t6, 108(sp)
+; RV32-NEXT:    sw zero, 96(sp)
+; RV32-NEXT:    sw s0, 100(sp)
+; RV32-NEXT:    sw zero, 88(sp)
+; RV32-NEXT:    sw s1, 92(sp)
+; RV32-NEXT:    sw zero, 80(sp)
+; RV32-NEXT:    sw s2, 84(sp)
+; RV32-NEXT:    sw zero, 72(sp)
+; RV32-NEXT:    sw s3, 76(sp)
+; RV32-NEXT:    sw zero, 64(sp)
+; RV32-NEXT:    sw s4, 68(sp)
+; RV32-NEXT:    sw zero, 56(sp)
+; RV32-NEXT:    sw s5, 60(sp)
+; RV32-NEXT:    sw zero, 48(sp)
+; RV32-NEXT:    sw s6, 52(sp)
+; RV32-NEXT:    sw zero, 40(sp)
+; RV32-NEXT:    sw s7, 44(sp)
+; RV32-NEXT:    sw zero, 32(sp)
+; RV32-NEXT:    sw s8, 36(sp)
+; RV32-NEXT:    sw zero, 24(sp)
+; RV32-NEXT:    sw s9, 28(sp)
+; RV32-NEXT:    sw zero, 16(sp)
+; RV32-NEXT:    sw s10, 20(sp)
+; RV32-NEXT:    sw zero, 8(sp)
+; RV32-NEXT:    sw a7, 12(sp)
+; RV32-NEXT:    lui a0, 61681
+; RV32-NEXT:    addi a0, a0, -241
+; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; RV32-NEXT:    vmv.v.x v3, a0
+; RV32-NEXT:    lui a0, 209715
+; RV32-NEXT:    addi a0, a0, 819
+; RV32-NEXT:    vmv.v.x v2, a0
+; RV32-NEXT:    lui a0, 349525
+; RV32-NEXT:    addi a0, a0, 1365
+; RV32-NEXT:    vmv.v.x v1, a0
+; RV32-NEXT:    addi a0, sp, 272
+; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV32-NEXT:    vlse64.v v0, (a0), zero
+; RV32-NEXT:    addi a0, sp, 264
+; RV32-NEXT:    vlse64.v v13, (a0), zero
+; RV32-NEXT:    addi a0, sp, 256
+; RV32-NEXT:    vlse64.v v14, (a0), zero
+; RV32-NEXT:    addi a0, sp, 248
+; RV32-NEXT:    vlse64.v v15, (a0), zero
+; RV32-NEXT:    addi a0, sp, 240
+; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    addi a0, sp, 232
+; RV32-NEXT:    vlse64.v v17, (a0), zero
+; RV32-NEXT:    addi a0, sp, 224
+; RV32-NEXT:    vlse64.v v18, (a0), zero
+; RV32-NEXT:    addi a0, sp, 216
+; RV32-NEXT:    vlse64.v v19, (a0), zero
+; RV32-NEXT:    addi a0, sp, 208
+; RV32-NEXT:    vlse64.v v20, (a0), zero
+; RV32-NEXT:    addi a0, sp, 200
+; RV32-NEXT:    vlse64.v v21, (a0), zero
+; RV32-NEXT:    addi a0, sp, 192
+; RV32-NEXT:    vlse64.v v22, (a0), zero
+; RV32-NEXT:    addi a0, sp, 184
+; RV32-NEXT:    vlse64.v v23, (a0), zero
+; RV32-NEXT:    addi a0, sp, 176
+; RV32-NEXT:    vlse64.v v24, (a0), zero
+; RV32-NEXT:    addi a0, sp, 168
+; RV32-NEXT:    vlse64.v v25, (a0), zero
+; RV32-NEXT:    addi a0, sp, 160
+; RV32-NEXT:    vlse64.v v26, (a0), zero
+; RV32-NEXT:    addi a0, sp, 152
+; RV32-NEXT:    vlse64.v v9, (a0), zero
+; RV32-NEXT:    addi a0, sp, 144
+; RV32-NEXT:    vlse64.v v10, (a0), zero
+; RV32-NEXT:    addi a0, sp, 136
+; RV32-NEXT:    vlse64.v v29, (a0), zero
+; RV32-NEXT:    addi a0, sp, 128
+; RV32-NEXT:    vlse64.v v30, (a0), zero
+; RV32-NEXT:    addi a0, sp, 120
+; RV32-NEXT:    vlse64.v v31, (a0), zero
+; RV32-NEXT:    addi a0, sp, 112
+; RV32-NEXT:    vlse64.v v11, (a0), zero
+; RV32-NEXT:    addi a0, sp, 104
+; RV32-NEXT:    vlse64.v v12, (a0), zero
+; RV32-NEXT:    addi a0, sp, 96
+; RV32-NEXT:    vlse64.v v5, (a0), zero
+; RV32-NEXT:    addi a0, sp, 88
+; RV32-NEXT:    vlse64.v v4, (a0), zero
+; RV32-NEXT:    li a6, 56
+; RV32-NEXT:    vsrl.vi v27, v8, 24
+; RV32-NEXT:    vsrl.vx v28, v8, a6
+; RV32-NEXT:    li ra, 40
+; RV32-NEXT:    vsrl.vx v7, v8, ra
+; RV32-NEXT:    vsll.vx v6, v8, a6
+; RV32-NEXT:    addi a4, t3, -256
+; RV32-NEXT:    vand.vx v7, v7, a4
+; RV32-NEXT:    vor.vv v28, v7, v28
+; RV32-NEXT:    vand.vx v7, v8, a4
+; RV32-NEXT:    vsll.vx v7, v7, ra
+; RV32-NEXT:    vor.vv v7, v6, v7
+; RV32-NEXT:    vsrl.vi v6, v8, 8
+; RV32-NEXT:    lui a5, 4080
+; RV32-NEXT:    vand.vx v27, v27, a5
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v0, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vv v6, v6, v0
+; RV32-NEXT:    vor.vv v27, v6, v27
+; RV32-NEXT:    addi a3, sp, 80
+; RV32-NEXT:    vlse64.v v6, (a3), zero
+; RV32-NEXT:    vor.vv v27, v27, v28
+; RV32-NEXT:    vand.vx v28, v8, a5
+; RV32-NEXT:    vsll.vi v28, v28, 24
+; RV32-NEXT:    vand.vv v8, v8, v0
+; RV32-NEXT:    vsll.vi v8, v8, 8
+; RV32-NEXT:    vor.vv v8, v28, v8
+; RV32-NEXT:    addi a3, sp, 72
+; RV32-NEXT:    vlse64.v v28, (a3), zero
+; RV32-NEXT:    vor.vv v8, v7, v8
+; RV32-NEXT:    addi a3, sp, 64
+; RV32-NEXT:    vlse64.v v7, (a3), zero
+; RV32-NEXT:    vor.vv v8, v8, v27
+; RV32-NEXT:    vsrl.vi v27, v8, 4
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v3, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vv v8, v8, v3
+; RV32-NEXT:    vand.vv v27, v27, v3
+; RV32-NEXT:    vsll.vi v8, v8, 4
+; RV32-NEXT:    vor.vv v8, v27, v8
+; RV32-NEXT:    vsrl.vi v27, v8, 2
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v2, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vv v8, v8, v2
+; RV32-NEXT:    vand.vv v27, v27, v2
+; RV32-NEXT:    vsll.vi v8, v8, 2
+; RV32-NEXT:    vor.vv v8, v27, v8
+; RV32-NEXT:    vsrl.vi v27, v8, 1
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v1, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vv v8, v8, v1
+; RV32-NEXT:    vand.vv v27, v27, v1
+; RV32-NEXT:    vadd.vv v8, v8, v8
+; RV32-NEXT:    vor.vv v8, v27, v8
+; RV32-NEXT:    addi a3, sp, 56
+; RV32-NEXT:    vlse64.v v27, (a3), zero
+; RV32-NEXT:    vand.vv v13, v8, v13
+; RV32-NEXT:    vand.vv v14, v8, v14
+; RV32-NEXT:    vand.vv v15, v8, v15
+; RV32-NEXT:    vand.vv v16, v8, v16
+; RV32-NEXT:    vand.vv v17, v8, v17
+; RV32-NEXT:    vand.vv v18, v8, v18
+; RV32-NEXT:    vand.vv v19, v8, v19
+; RV32-NEXT:    vand.vv v20, v8, v20
+; RV32-NEXT:    vand.vv v21, v8, v21
+; RV32-NEXT:    vand.vv v22, v8, v22
+; RV32-NEXT:    vand.vv v23, v8, v23
+; RV32-NEXT:    vand.vv v24, v8, v24
+; RV32-NEXT:    vand.vv v25, v8, v25
+; RV32-NEXT:    vand.vv v26, v8, v26
+; RV32-NEXT:    vand.vv v3, v8, v9
+; RV32-NEXT:    vand.vv v2, v8, v10
+; RV32-NEXT:    vand.vv v29, v8, v29
+; RV32-NEXT:    vand.vv v30, v8, v30
+; RV32-NEXT:    vand.vv v31, v8, v31
+; RV32-NEXT:    vand.vv v0, v8, v11
+; RV32-NEXT:    vand.vv v9, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vv v5, v8, v5
+; RV32-NEXT:    vand.vv v4, v8, v4
+; RV32-NEXT:    vand.vv v6, v8, v6
+; RV32-NEXT:    vand.vv v9, v8, v28
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    addi a3, sp, 48
+; RV32-NEXT:    addi a0, sp, 40
+; RV32-NEXT:    vlse64.v v9, (a3), zero
+; RV32-NEXT:    vlse64.v v10, (a0), zero
+; RV32-NEXT:    vand.vv v11, v8, v7
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v11, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vv v11, v8, v27
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v11, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vv v9, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    addi a2, sp, 32
+; RV32-NEXT:    addi a3, sp, 24
+; RV32-NEXT:    addi a1, sp, 16
+; RV32-NEXT:    addi a0, sp, 8
+; RV32-NEXT:    vlse64.v v9, (a2), zero
+; RV32-NEXT:    vlse64.v v10, (a3), zero
+; RV32-NEXT:    vlse64.v v11, (a1), zero
+; RV32-NEXT:    vlse64.v v12, (a0), zero
+; RV32-NEXT:    vand.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a1, a0, 5
+; RV32-NEXT:    add a0, a1, a0
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vv v9, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vv v9, v8, v11
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a1, a0, 5
+; RV32-NEXT:    sub a0, a1, a0
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vv v9, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vi v9, v8, 2
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vi v9, v8, 1
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vi v9, v8, 4
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vi v9, v8, 8
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    li a0, 16
+; RV32-NEXT:    vand.vx v9, v8, a0
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    li a0, 32
+; RV32-NEXT:    vand.vx v9, v8, a0
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    li a0, 64
+; RV32-NEXT:    vand.vx v9, v8, a0
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    li a0, 128
+; RV32-NEXT:    vand.vx v9, v8, a0
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    li a0, 256
+; RV32-NEXT:    vand.vx v9, v8, a0
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    li a0, 512
+; RV32-NEXT:    vand.vx v9, v8, a0
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    li a0, 1024
+; RV32-NEXT:    vand.vx v9, v8, a0
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vx v9, v8, s11
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    lui a0, 1
+; RV32-NEXT:    vand.vx v9, v8, a0
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vx v9, v8, t0
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a1, a0, 4
+; RV32-NEXT:    add a0, a1, a0
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vx v9, v8, t1
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vx v9, v8, t2
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a1, a0, 4
+; RV32-NEXT:    sub a0, a1, a0
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vx v9, v8, t3
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vx v9, v8, t4
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vx v9, v8, t5
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vx v9, v8, t6
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vx v9, v8, s0
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vx v9, v8, s1
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a1, a0, 3
+; RV32-NEXT:    add a0, a1, a0
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vx v9, v8, s2
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vx v9, v8, s3
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a1, a0, 3
+; RV32-NEXT:    sub a0, a1, a0
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vx v9, v8, s4
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vx v9, v8, s5
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a1, a0, 2
+; RV32-NEXT:    add a0, a1, a0
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vx v9, v8, s6
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vx v9, v8, s7
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a1, a0, 1
+; RV32-NEXT:    add a0, a1, a0
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vx v9, v8, s8
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vx v9, v8, s9
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vand.vx v1, v8, s10
+; RV32-NEXT:    vmul.vv v1, v8, v1
+; RV32-NEXT:    vmul.vv v9, v8, v13
+; RV32-NEXT:    addi a0, sp, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    vmul.vv v10, v8, v14
+; RV32-NEXT:    vmul.vv v11, v8, v15
+; RV32-NEXT:    vmul.vv v12, v8, v16
+; RV32-NEXT:    vmul.vv v13, v8, v17
+; RV32-NEXT:    vmul.vv v14, v8, v18
+; RV32-NEXT:    vmul.vv v15, v8, v19
+; RV32-NEXT:    vmul.vv v16, v8, v20
+; RV32-NEXT:    vmul.vv v17, v8, v21
+; RV32-NEXT:    vmul.vv v18, v8, v22
+; RV32-NEXT:    vmul.vv v19, v8, v23
+; RV32-NEXT:    vmul.vv v20, v8, v24
+; RV32-NEXT:    vmul.vv v21, v8, v25
+; RV32-NEXT:    vmul.vv v22, v8, v26
+; RV32-NEXT:    vmul.vv v23, v8, v3
+; RV32-NEXT:    vmul.vv v24, v8, v2
+; RV32-NEXT:    vmul.vv v25, v8, v29
+; RV32-NEXT:    vmul.vv v26, v8, v30
+; RV32-NEXT:    vmul.vv v27, v8, v31
+; RV32-NEXT:    vmul.vv v28, v8, v0
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v29, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vmul.vv v29, v8, v29
+; RV32-NEXT:    vmul.vv v30, v8, v5
+; RV32-NEXT:    vmul.vv v31, v8, v4
+; RV32-NEXT:    vmul.vv v7, v8, v6
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v6, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vmul.vv v6, v8, v6
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v5, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vmul.vv v5, v8, v5
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v4, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vmul.vv v4, v8, v4
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v3, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vmul.vv v3, v8, v3
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vmul.vv v2, v8, v2
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a1, a0, 5
+; RV32-NEXT:    add a0, a1, a0
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vmul.vv v0, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a1, a0, 5
+; RV32-NEXT:    sub a0, a1, a0
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vmul.vv v9, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vmul.vv v8, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vi v8, v8, 0
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a1, a0, 4
+; RV32-NEXT:    add a0, a1, a0
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a1, a0, 4
+; RV32-NEXT:    sub a0, a1, a0
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a1, a0, 3
+; RV32-NEXT:    add a0, a1, a0
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a1, a0, 3
+; RV32-NEXT:    sub a0, a1, a0
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a1, a0, 2
+; RV32-NEXT:    add a0, a1, a0
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a1, a0, 1
+; RV32-NEXT:    add a0, a1, a0
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    vxor.vv v8, v8, v1
+; RV32-NEXT:    addi a0, sp, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    vxor.vv v8, v8, v11
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    vxor.vv v8, v8, v13
+; RV32-NEXT:    vxor.vv v8, v8, v14
+; RV32-NEXT:    vxor.vv v8, v8, v15
+; RV32-NEXT:    vxor.vv v8, v8, v16
+; RV32-NEXT:    vxor.vv v8, v8, v17
+; RV32-NEXT:    vxor.vv v8, v8, v18
+; RV32-NEXT:    vxor.vv v8, v8, v19
+; RV32-NEXT:    vxor.vv v8, v8, v20
+; RV32-NEXT:    vxor.vv v8, v8, v21
+; RV32-NEXT:    vxor.vv v8, v8, v22
+; RV32-NEXT:    vxor.vv v8, v8, v23
+; RV32-NEXT:    vxor.vv v8, v8, v24
+; RV32-NEXT:    vxor.vv v8, v8, v25
+; RV32-NEXT:    vxor.vv v8, v8, v26
+; RV32-NEXT:    vxor.vv v8, v8, v27
+; RV32-NEXT:    vxor.vv v8, v8, v28
+; RV32-NEXT:    vxor.vv v8, v8, v29
+; RV32-NEXT:    vxor.vv v8, v8, v30
+; RV32-NEXT:    vxor.vv v8, v8, v31
+; RV32-NEXT:    vxor.vv v8, v8, v7
+; RV32-NEXT:    vxor.vv v8, v8, v6
+; RV32-NEXT:    vxor.vv v8, v8, v5
+; RV32-NEXT:    vxor.vv v8, v8, v4
+; RV32-NEXT:    vxor.vv v8, v8, v3
+; RV32-NEXT:    vxor.vv v8, v8, v2
+; RV32-NEXT:    vxor.vv v8, v8, v0
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v9
+; RV32-NEXT:    vsrl.vx v9, v8, a6
+; RV32-NEXT:    vsll.vx v10, v8, a6
+; RV32-NEXT:    vsrl.vx v11, v8, ra
+; RV32-NEXT:    vand.vx v12, v8, a4
+; RV32-NEXT:    vand.vx v11, v11, a4
+; RV32-NEXT:    vsrl.vi v13, v8, 24
+; RV32-NEXT:    vand.vx v14, v8, a5
+; RV32-NEXT:    vand.vx v13, v13, a5
+; RV32-NEXT:    vsll.vx v12, v12, ra
+; RV32-NEXT:    vsrl.vi v15, v8, 8
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v16, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vand.vv v8, v8, v16
+; RV32-NEXT:    vand.vv v15, v15, v16
+; RV32-NEXT:    vor.vv v9, v11, v9
+; RV32-NEXT:    vor.vv v11, v15, v13
+; RV32-NEXT:    vsll.vi v8, v8, 8
+; RV32-NEXT:    vsll.vi v13, v14, 24
+; RV32-NEXT:    vor.vv v8, v13, v8
+; RV32-NEXT:    vor.vv v10, v10, v12
+; RV32-NEXT:    vor.vv v9, v11, v9
+; RV32-NEXT:    vor.vv v8, v10, v8
+; RV32-NEXT:    vor.vv v8, v8, v9
+; RV32-NEXT:    vsrl.vi v9, v8, 4
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v10, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vand.vv v8, v8, v10
+; RV32-NEXT:    vand.vv v9, v9, v10
+; RV32-NEXT:    vsll.vi v8, v8, 4
+; RV32-NEXT:    vor.vv v8, v9, v8
+; RV32-NEXT:    vsrl.vi v9, v8, 2
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v10, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vand.vv v8, v8, v10
+; RV32-NEXT:    vand.vv v9, v9, v10
+; RV32-NEXT:    vsll.vi v8, v8, 2
+; RV32-NEXT:    vor.vv v8, v9, v8
+; RV32-NEXT:    vsrl.vi v9, v8, 1
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl1r.v v10, (a0) # vscale x 8-byte Folded Reload
+; RV32-NEXT:    vand.vv v8, v8, v10
+; RV32-NEXT:    vand.vv v9, v9, v10
+; RV32-NEXT:    vadd.vv v8, v8, v8
+; RV32-NEXT:    vor.vv v8, v9, v8
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add sp, sp, a0
+; RV32-NEXT:    lw ra, 348(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s0, 344(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s1, 340(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s2, 336(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s3, 332(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s4, 328(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s5, 324(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s6, 320(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s7, 316(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s8, 312(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s9, 308(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s10, 304(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s11, 300(sp) # 4-byte Folded Reload
+; RV32-NEXT:    addi sp, sp, 352
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: clmulr_v2i64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    addi sp, sp, -224
+; RV64-NEXT:    sd ra, 216(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s0, 208(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s1, 200(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s2, 192(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s3, 184(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s4, 176(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s5, 168(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s6, 160(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s7, 152(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s8, 144(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s9, 136(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s10, 128(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s11, 120(sp) # 8-byte Folded Spill
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    sub sp, sp, a0
+; RV64-NEXT:    li s11, 56
+; RV64-NEXT:    li ra, 40
+; RV64-NEXT:    lui a0, 16
+; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV64-NEXT:    vsrl.vi v10, v8, 24
+; RV64-NEXT:    vsrl.vi v9, v8, 8
+; RV64-NEXT:    li t2, 255
+; RV64-NEXT:    lui t6, 61681
+; RV64-NEXT:    lui s0, 209715
+; RV64-NEXT:    lui s1, 349525
+; RV64-NEXT:    li s10, 16
+; RV64-NEXT:    li s9, 32
+; RV64-NEXT:    li s8, 64
+; RV64-NEXT:    li s7, 128
+; RV64-NEXT:    li s5, 256
+; RV64-NEXT:    li t5, 512
+; RV64-NEXT:    li t3, 1024
+; RV64-NEXT:    li t0, 1
+; RV64-NEXT:    lui s6, 1
+; RV64-NEXT:    lui s4, 2
+; RV64-NEXT:    lui t4, 4
+; RV64-NEXT:    lui t1, 8
+; RV64-NEXT:    lui a7, 32
+; RV64-NEXT:    lui a6, 64
+; RV64-NEXT:    lui a5, 128
+; RV64-NEXT:    lui a4, 256
+; RV64-NEXT:    lui a3, 512
+; RV64-NEXT:    lui a2, 1024
+; RV64-NEXT:    vsrl.vx v11, v8, s11
+; RV64-NEXT:    vsrl.vx v12, v8, ra
+; RV64-NEXT:    addi t6, t6, -241
+; RV64-NEXT:    addi s2, s0, 819
+; RV64-NEXT:    addi s3, s1, 1365
+; RV64-NEXT:    slli s1, t6, 32
+; RV64-NEXT:    add s1, t6, s1
+; RV64-NEXT:    slli t6, s2, 32
+; RV64-NEXT:    add s2, s2, t6
+; RV64-NEXT:    slli t6, s3, 32
+; RV64-NEXT:    add s3, s3, t6
+; RV64-NEXT:    addi s0, a0, -256
+; RV64-NEXT:    lui a1, 16
+; RV64-NEXT:    lui a0, 4080
+; RV64-NEXT:    vand.vx v10, v10, a0
+; RV64-NEXT:    slli t6, t2, 24
+; RV64-NEXT:    vand.vx v13, v8, a0
+; RV64-NEXT:    vsll.vx v14, v8, s11
+; RV64-NEXT:    vand.vx v12, v12, s0
+; RV64-NEXT:    vand.vx v9, v9, t6
+; RV64-NEXT:    vsll.vi v13, v13, 24
+; RV64-NEXT:    vand.vx v15, v8, t6
+; RV64-NEXT:    vand.vx v8, v8, s0
+; RV64-NEXT:    vor.vv v11, v12, v11
+; RV64-NEXT:    vor.vv v9, v9, v10
+; RV64-NEXT:    vsll.vi v10, v15, 8
+; RV64-NEXT:    vsll.vx v8, v8, ra
+; RV64-NEXT:    vor.vv v9, v9, v11
+; RV64-NEXT:    vor.vv v10, v13, v10
+; RV64-NEXT:    vor.vv v8, v14, v8
+; RV64-NEXT:    vor.vv v8, v8, v10
+; RV64-NEXT:    vor.vv v8, v8, v9
+; RV64-NEXT:    vsrl.vi v9, v8, 4
+; RV64-NEXT:    vand.vx v8, v8, s1
+; RV64-NEXT:    vand.vx v9, v9, s1
+; RV64-NEXT:    vsll.vi v8, v8, 4
+; RV64-NEXT:    vor.vv v8, v9, v8
+; RV64-NEXT:    vsrl.vi v9, v8, 2
+; RV64-NEXT:    vand.vx v8, v8, s2
+; RV64-NEXT:    vand.vx v9, v9, s2
+; RV64-NEXT:    vsll.vi v8, v8, 2
+; RV64-NEXT:    vor.vv v8, v9, v8
+; RV64-NEXT:    vsrl.vi v9, v8, 1
+; RV64-NEXT:    vand.vx v8, v8, s3
+; RV64-NEXT:    vand.vx v9, v9, s3
+; RV64-NEXT:    vadd.vv v8, v8, v8
+; RV64-NEXT:    vor.vv v8, v9, v8
+; RV64-NEXT:    vand.vx v9, v8, s10
+; RV64-NEXT:    lui t2, 4096
+; RV64-NEXT:    vand.vx v10, v8, s9
+; RV64-NEXT:    lui s9, 8192
+; RV64-NEXT:    vand.vx v11, v8, s8
+; RV64-NEXT:    lui s8, 16384
+; RV64-NEXT:    vand.vx v12, v8, s7
+; RV64-NEXT:    lui s10, 32768
+; RV64-NEXT:    vand.vx v13, v8, s5
+; RV64-NEXT:    lui s11, 65536
+; RV64-NEXT:    vand.vx v14, v8, t5
+; RV64-NEXT:    lui t5, 131072
+; RV64-NEXT:    vand.vx v15, v8, t3
+; RV64-NEXT:    slli t3, t0, 11
+; RV64-NEXT:    vand.vx v16, v8, t3
+; RV64-NEXT:    lui t3, 262144
+; RV64-NEXT:    vand.vx v17, v8, s6
+; RV64-NEXT:    slli a0, t0, 31
+; RV64-NEXT:    sd a0, 96(sp) # 8-byte Folded Spill
+; RV64-NEXT:    vand.vx v18, v8, s4
+; RV64-NEXT:    slli a0, t0, 32
+; RV64-NEXT:    sd a0, 88(sp) # 8-byte Folded Spill
+; RV64-NEXT:    vand.vx v19, v8, t4
+; RV64-NEXT:    slli a0, t0, 33
+; RV64-NEXT:    sd a0, 80(sp) # 8-byte Folded Spill
+; RV64-NEXT:    vand.vx v20, v8, t1
+; RV64-NEXT:    slli a0, t0, 34
+; RV64-NEXT:    sd a0, 72(sp) # 8-byte Folded Spill
+; RV64-NEXT:    vand.vx v21, v8, a1
+; RV64-NEXT:    slli a0, t0, 35
+; RV64-NEXT:    sd a0, 64(sp) # 8-byte Folded Spill
+; RV64-NEXT:    vand.vx v22, v8, a7
+; RV64-NEXT:    slli a0, t0, 36
+; RV64-NEXT:    sd a0, 56(sp) # 8-byte Folded Spill
+; RV64-NEXT:    vand.vx v23, v8, a6
+; RV64-NEXT:    slli a0, t0, 37
+; RV64-NEXT:    sd a0, 48(sp) # 8-byte Folded Spill
+; RV64-NEXT:    vand.vx v24, v8, a5
+; RV64-NEXT:    slli a0, t0, 38
+; RV64-NEXT:    sd a0, 40(sp) # 8-byte Folded Spill
+; RV64-NEXT:    vand.vx v25, v8, a4
+; RV64-NEXT:    slli a0, t0, 39
+; RV64-NEXT:    sd a0, 32(sp) # 8-byte Folded Spill
+; RV64-NEXT:    vand.vx v26, v8, a3
+; RV64-NEXT:    slli a0, t0, 40
+; RV64-NEXT:    sd a0, 24(sp) # 8-byte Folded Spill
+; RV64-NEXT:    vand.vx v27, v8, a2
+; RV64-NEXT:    slli a0, t0, 41
+; RV64-NEXT:    sd a0, 16(sp) # 8-byte Folded Spill
+; RV64-NEXT:    lui a0, 2048
+; RV64-NEXT:    vand.vx v28, v8, a0
+; RV64-NEXT:    slli s5, t0, 42
+; RV64-NEXT:    vand.vx v29, v8, t2
+; RV64-NEXT:    slli s6, t0, 43
+; RV64-NEXT:    vand.vx v30, v8, s9
+; RV64-NEXT:    slli s7, t0, 44
+; RV64-NEXT:    vand.vx v31, v8, s8
+; RV64-NEXT:    slli s8, t0, 45
+; RV64-NEXT:    vand.vx v7, v8, s10
+; RV64-NEXT:    slli s9, t0, 46
+; RV64-NEXT:    vand.vx v6, v8, s11
+; RV64-NEXT:    slli s10, t0, 47
+; RV64-NEXT:    vand.vx v5, v8, t5
+; RV64-NEXT:    slli s11, t0, 48
+; RV64-NEXT:    vand.vx v0, v8, t3
+; RV64-NEXT:    slli ra, t0, 49
+; RV64-NEXT:    slli t5, t0, 50
+; RV64-NEXT:    slli t4, t0, 51
+; RV64-NEXT:    slli t3, t0, 52
+; RV64-NEXT:    slli t2, t0, 53
+; RV64-NEXT:    slli t1, t0, 54
+; RV64-NEXT:    slli a7, t0, 55
+; RV64-NEXT:    slli a6, t0, 56
+; RV64-NEXT:    slli a5, t0, 57
+; RV64-NEXT:    slli a4, t0, 58
+; RV64-NEXT:    slli a3, t0, 59
+; RV64-NEXT:    slli a2, t0, 60
+; RV64-NEXT:    slli a1, t0, 61
+; RV64-NEXT:    slli t0, t0, 62
+; RV64-NEXT:    li a0, -1
+; RV64-NEXT:    slli a0, a0, 63
+; RV64-NEXT:    vand.vi v4, v8, 2
+; RV64-NEXT:    vand.vi v3, v8, 1
+; RV64-NEXT:    vand.vi v2, v8, 4
+; RV64-NEXT:    vand.vi v1, v8, 8
+; RV64-NEXT:    vmul.vv v4, v8, v4
+; RV64-NEXT:    sd t6, 8(sp) # 8-byte Folded Spill
+; RV64-NEXT:    csrr t6, vlenb
+; RV64-NEXT:    slli s4, t6, 5
+; RV64-NEXT:    add t6, s4, t6
+; RV64-NEXT:    add t6, sp, t6
+; RV64-NEXT:    addi t6, t6, 112
+; RV64-NEXT:    vs1r.v v4, (t6) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    vmul.vv v4, v8, v3
+; RV64-NEXT:    csrr t6, vlenb
+; RV64-NEXT:    slli t6, t6, 5
+; RV64-NEXT:    add t6, sp, t6
+; RV64-NEXT:    addi t6, t6, 112
+; RV64-NEXT:    vs1r.v v4, (t6) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    vmul.vv v4, v8, v2
+; RV64-NEXT:    csrr t6, vlenb
+; RV64-NEXT:    slli s4, t6, 5
+; RV64-NEXT:    sub t6, s4, t6
+; RV64-NEXT:    add t6, sp, t6
+; RV64-NEXT:    addi t6, t6, 112
+; RV64-NEXT:    vs1r.v v4, (t6) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    vmul.vv v4, v8, v1
+; RV64-NEXT:    csrr t6, vlenb
+; RV64-NEXT:    slli t6, t6, 1
+; RV64-NEXT:    mv s4, t6
+; RV64-NEXT:    slli t6, t6, 1
+; RV64-NEXT:    add s4, s4, t6
+; RV64-NEXT:    slli t6, t6, 1
+; RV64-NEXT:    add s4, s4, t6
+; RV64-NEXT:    slli t6, t6, 1
+; RV64-NEXT:    add t6, t6, s4
+; RV64-NEXT:    add t6, sp, t6
+; RV64-NEXT:    addi t6, t6, 112
+; RV64-NEXT:    vs1r.v v4, (t6) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    vmul.vv v9, v8, v9
+; RV64-NEXT:    csrr t6, vlenb
+; RV64-NEXT:    mv s4, t6
+; RV64-NEXT:    slli t6, t6, 2
+; RV64-NEXT:    add s4, s4, t6
+; RV64-NEXT:    slli t6, t6, 1
+; RV64-NEXT:    add s4, s4, t6
+; RV64-NEXT:    slli t6, t6, 1
+; RV64-NEXT:    add t6, t6, s4
+; RV64-NEXT:    add t6, sp, t6
+; RV64-NEXT:    addi t6, t6, 112
+; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    vmul.vv v9, v8, v10
+; RV64-NEXT:    csrr t6, vlenb
+; RV64-NEXT:    slli t6, t6, 2
+; RV64-NEXT:    mv s4, t6
+; RV64-NEXT:    slli t6, t6, 1
+; RV64-NEXT:    add s4, s4, t6
+; RV64-NEXT:    slli t6, t6, 1
+; RV64-NEXT:    add t6, t6, s4
+; RV64-NEXT:    add t6, sp, t6
+; RV64-NEXT:    addi t6, t6, 112
+; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    vmul.vv v9, v8, v11
+; RV64-NEXT:    csrr t6, vlenb
+; RV64-NEXT:    mv s4, t6
+; RV64-NEXT:    slli t6, t6, 1
+; RV64-NEXT:    add s4, s4, t6
+; RV64-NEXT:    slli t6, t6, 2
+; RV64-NEXT:    add s4, s4, t6
+; RV64-NEXT:    slli t6, t6, 1
+; RV64-NEXT:    add t6, t6, s4
+; RV64-NEXT:    add t6, sp, t6
+; RV64-NEXT:    addi t6, t6, 112
+; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    vmul.vv v9, v8, v12
+; RV64-NEXT:    csrr t6, vlenb
+; RV64-NEXT:    slli t6, t6, 1
+; RV64-NEXT:    mv s4, t6
+; RV64-NEXT:    slli t6, t6, 2
+; RV64-NEXT:    add s4, s4, t6
+; RV64-NEXT:    slli t6, t6, 1
+; RV64-NEXT:    add t6, t6, s4
+; RV64-NEXT:    add t6, sp, t6
+; RV64-NEXT:    addi t6, t6, 112
+; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    vmul.vv v9, v8, v13
+; RV64-NEXT:    csrr t6, vlenb
+; RV64-NEXT:    mv s4, t6
+; RV64-NEXT:    slli t6, t6, 3
+; RV64-NEXT:    add s4, s4, t6
+; RV64-NEXT:    slli t6, t6, 1
+; RV64-NEXT:    add t6, t6, s4
+; RV64-NEXT:    add t6, sp, t6
+; RV64-NEXT:    addi t6, t6, 112
+; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    vmul.vv v9, v8, v14
+; RV64-NEXT:    csrr t6, vlenb
+; RV64-NEXT:    slli t6, t6, 3
+; RV64-NEXT:    mv s4, t6
+; RV64-NEXT:    slli t6, t6, 1
+; RV64-NEXT:    add t6, t6, s4
+; RV64-NEXT:    add t6, sp, t6
+; RV64-NEXT:    addi t6, t6, 112
+; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    vmul.vv v9, v8, v15
+; RV64-NEXT:    csrr t6, vlenb
+; RV64-NEXT:    mv s4, t6
+; RV64-NEXT:    slli t6, t6, 1
+; RV64-NEXT:    add s4, s4, t6
+; RV64-NEXT:    slli t6, t6, 1
+; RV64-NEXT:    add s4, s4, t6
+; RV64-NEXT:    slli t6, t6, 2
+; RV64-NEXT:    add t6, t6, s4
+; RV64-NEXT:    add t6, sp, t6
+; RV64-NEXT:    addi t6, t6, 112
+; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    vmul.vv v9, v8, v16
+; RV64-NEXT:    csrr t6, vlenb
+; RV64-NEXT:    slli t6, t6, 1
+; RV64-NEXT:    mv s4, t6
+; RV64-NEXT:    slli t6, t6, 1
+; RV64-NEXT:    add s4, s4, t6
+; RV64-NEXT:    slli t6, t6, 2
+; RV64-NEXT:    add t6, t6, s4
+; RV64-NEXT:    add t6, sp, t6
+; RV64-NEXT:    addi t6, t6, 112
+; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    vmul.vv v9, v8, v17
+; RV64-NEXT:    csrr t6, vlenb
+; RV64-NEXT:    mv s4, t6
+; RV64-NEXT:    slli t6, t6, 2
+; RV64-NEXT:    add s4, s4, t6
+; RV64-NEXT:    slli t6, t6, 2
+; RV64-NEXT:    add t6, t6, s4
+; RV64-NEXT:    add t6, sp, t6
+; RV64-NEXT:    addi t6, t6, 112
+; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    vmul.vv v9, v8, v18
+; RV64-NEXT:    csrr t6, vlenb
+; RV64-NEXT:    slli t6, t6, 2
+; RV64-NEXT:    mv s4, t6
+; RV64-NEXT:    slli t6, t6, 2
+; RV64-NEXT:    add t6, t6, s4
+; RV64-NEXT:    add t6, sp, t6
+; RV64-NEXT:    addi t6, t6, 112
+; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    vmul.vv v9, v8, v19
+; RV64-NEXT:    csrr t6, vlenb
+; RV64-NEXT:    mv s4, t6
+; RV64-NEXT:    slli t6, t6, 1
+; RV64-NEXT:    add s4, s4, t6
+; RV64-NEXT:    slli t6, t6, 3
+; RV64-NEXT:    add t6, t6, s4
+; RV64-NEXT:    add t6, sp, t6
+; RV64-NEXT:    addi t6, t6, 112
+; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    vmul.vv v9, v8, v20
+; RV64-NEXT:    csrr t6, vlenb
+; RV64-NEXT:    slli t6, t6, 1
+; RV64-NEXT:    mv s4, t6
+; RV64-NEXT:    slli t6, t6, 3
+; RV64-NEXT:    add t6, t6, s4
+; RV64-NEXT:    add t6, sp, t6
+; RV64-NEXT:    addi t6, t6, 112
+; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    vmul.vv v9, v8, v21
+; RV64-NEXT:    csrr t6, vlenb
+; RV64-NEXT:    slli s4, t6, 4
+; RV64-NEXT:    add t6, s4, t6
+; RV64-NEXT:    add t6, sp, t6
+; RV64-NEXT:    addi t6, t6, 112
+; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    vmul.vv v9, v8, v22
+; RV64-NEXT:    csrr t6, vlenb
+; RV64-NEXT:    slli t6, t6, 4
+; RV64-NEXT:    add t6, sp, t6
+; RV64-NEXT:    addi t6, t6, 112
+; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    vmul.vv v9, v8, v23
+; RV64-NEXT:    csrr t6, vlenb
+; RV64-NEXT:    slli s4, t6, 4
+; RV64-NEXT:    sub t6, s4, t6
+; RV64-NEXT:    add t6, sp, t6
+; RV64-NEXT:    addi t6, t6, 112
+; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    vmul.vv v9, v8, v24
+; RV64-NEXT:    csrr t6, vlenb
+; RV64-NEXT:    slli t6, t6, 1
+; RV64-NEXT:    mv s4, t6
+; RV64-NEXT:    slli t6, t6, 1
+; RV64-NEXT:    add s4, s4, t6
+; RV64-NEXT:    slli t6, t6, 1
+; RV64-NEXT:    add t6, t6, s4
+; RV64-NEXT:    add t6, sp, t6
+; RV64-NEXT:    addi t6, t6, 112
+; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    vmul.vv v9, v8, v25
+; RV64-NEXT:    csrr t6, vlenb
+; RV64-NEXT:    mv s4, t6
+; RV64-NEXT:    slli t6, t6, 2
+; RV64-NEXT:    add s4, s4, t6
+; RV64-NEXT:    slli t6, t6, 1
+; RV64-NEXT:    add t6, t6, s4
+; RV64-NEXT:    add t6, sp, t6
+; RV64-NEXT:    addi t6, t6, 112
+; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    vmul.vv v9, v8, v26
+; RV64-NEXT:    csrr t6, vlenb
+; RV64-NEXT:    slli t6, t6, 2
+; RV64-NEXT:    mv s4, t6
+; RV64-NEXT:    slli t6, t6, 1
+; RV64-NEXT:    add t6, t6, s4
+; RV64-NEXT:    add t6, sp, t6
+; RV64-NEXT:    addi t6, t6, 112
+; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    vmul.vv v9, v8, v27
+; RV64-NEXT:    csrr t6, vlenb
+; RV64-NEXT:    mv s4, t6
+; RV64-NEXT:    slli t6, t6, 1
+; RV64-NEXT:    add s4, s4, t6
+; RV64-NEXT:    slli t6, t6, 2
+; RV64-NEXT:    add t6, t6, s4
+; RV64-NEXT:    add t6, sp, t6
+; RV64-NEXT:    addi t6, t6, 112
+; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    vmul.vv v9, v8, v28
+; RV64-NEXT:    csrr t6, vlenb
+; RV64-NEXT:    slli t6, t6, 1
+; RV64-NEXT:    mv s4, t6
+; RV64-NEXT:    slli t6, t6, 2
+; RV64-NEXT:    add t6, t6, s4
+; RV64-NEXT:    add t6, sp, t6
+; RV64-NEXT:    addi t6, t6, 112
+; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    vmul.vv v9, v8, v29
+; RV64-NEXT:    csrr t6, vlenb
+; RV64-NEXT:    slli s4, t6, 3
+; RV64-NEXT:    add t6, s4, t6
+; RV64-NEXT:    add t6, sp, t6
+; RV64-NEXT:    addi t6, t6, 112
+; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    vmul.vv v9, v8, v30
+; RV64-NEXT:    csrr t6, vlenb
+; RV64-NEXT:    slli t6, t6, 3
+; RV64-NEXT:    add t6, sp, t6
+; RV64-NEXT:    addi t6, t6, 112
+; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    vmul.vv v9, v8, v31
+; RV64-NEXT:    csrr t6, vlenb
+; RV64-NEXT:    slli s4, t6, 3
+; RV64-NEXT:    sub t6, s4, t6
+; RV64-NEXT:    add t6, sp, t6
+; RV64-NEXT:    addi t6, t6, 112
+; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    vmul.vv v9, v8, v7
+; RV64-NEXT:    csrr t6, vlenb
+; RV64-NEXT:    slli t6, t6, 1
+; RV64-NEXT:    mv s4, t6
+; RV64-NEXT:    slli t6, t6, 1
+; RV64-NEXT:    add t6, t6, s4
+; RV64-NEXT:    add t6, sp, t6
+; RV64-NEXT:    addi t6, t6, 112
+; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    vmul.vv v9, v8, v6
+; RV64-NEXT:    csrr t6, vlenb
+; RV64-NEXT:    slli s4, t6, 2
+; RV64-NEXT:    add t6, s4, t6
+; RV64-NEXT:    add t6, sp, t6
+; RV64-NEXT:    addi t6, t6, 112
+; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    vmul.vv v9, v8, v5
+; RV64-NEXT:    csrr t6, vlenb
+; RV64-NEXT:    slli t6, t6, 2
+; RV64-NEXT:    add t6, sp, t6
+; RV64-NEXT:    addi t6, t6, 112
+; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    vmul.vv v9, v8, v0
+; RV64-NEXT:    csrr s4, vlenb
+; RV64-NEXT:    slli t6, s4, 1
+; RV64-NEXT:    add s4, t6, s4
+; RV64-NEXT:    ld t6, 8(sp) # 8-byte Folded Reload
+; RV64-NEXT:    add s4, sp, s4
+; RV64-NEXT:    addi s4, s4, 112
+; RV64-NEXT:    vs1r.v v9, (s4) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    ld s4, 96(sp) # 8-byte Folded Reload
+; RV64-NEXT:    vand.vx v9, v8, s4
+; RV64-NEXT:    vmul.vv v9, v8, v9
+; RV64-NEXT:    csrr s4, vlenb
+; RV64-NEXT:    slli s4, s4, 1
+; RV64-NEXT:    add s4, sp, s4
+; RV64-NEXT:    addi s4, s4, 112
+; RV64-NEXT:    vs1r.v v9, (s4) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    ld s4, 88(sp) # 8-byte Folded Reload
+; RV64-NEXT:    vand.vx v9, v8, s4
+; RV64-NEXT:    vmul.vv v9, v8, v9
+; RV64-NEXT:    csrr s4, vlenb
+; RV64-NEXT:    add s4, sp, s4
+; RV64-NEXT:    addi s4, s4, 112
+; RV64-NEXT:    vs1r.v v9, (s4) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    ld s4, 80(sp) # 8-byte Folded Reload
+; RV64-NEXT:    vand.vx v9, v8, s4
+; RV64-NEXT:    vmul.vv v9, v8, v9
+; RV64-NEXT:    addi s4, sp, 112
+; RV64-NEXT:    vs1r.v v9, (s4) # vscale x 8-byte Folded Spill
+; RV64-NEXT:    ld s4, 72(sp) # 8-byte Folded Reload
+; RV64-NEXT:    vand.vx v9, v8, s4
+; RV64-NEXT:    vmul.vv v3, v8, v9
+; RV64-NEXT:    ld s4, 64(sp) # 8-byte Folded Reload
+; RV64-NEXT:    vand.vx v9, v8, s4
+; RV64-NEXT:    vmul.vv v4, v8, v9
+; RV64-NEXT:    ld s4, 56(sp) # 8-byte Folded Reload
+; RV64-NEXT:    vand.vx v9, v8, s4
+; RV64-NEXT:    vmul.vv v5, v8, v9
+; RV64-NEXT:    ld s4, 48(sp) # 8-byte Folded Reload
+; RV64-NEXT:    vand.vx v9, v8, s4
+; RV64-NEXT:    vmul.vv v6, v8, v9
+; RV64-NEXT:    ld s4, 40(sp) # 8-byte Folded Reload
+; RV64-NEXT:    vand.vx v9, v8, s4
+; RV64-NEXT:    vmul.vv v7, v8, v9
+; RV64-NEXT:    ld s4, 32(sp) # 8-byte Folded Reload
+; RV64-NEXT:    vand.vx v9, v8, s4
+; RV64-NEXT:    vmul.vv v31, v8, v9
+; RV64-NEXT:    ld s4, 24(sp) # 8-byte Folded Reload
+; RV64-NEXT:    vand.vx v9, v8, s4
+; RV64-NEXT:    vmul.vv v30, v8, v9
+; RV64-NEXT:    ld s4, 16(sp) # 8-byte Folded Reload
+; RV64-NEXT:    vand.vx v9, v8, s4
+; RV64-NEXT:    vmul.vv v29, v8, v9
+; RV64-NEXT:    vand.vx v9, v8, s5
+; RV64-NEXT:    vmul.vv v28, v8, v9
+; RV64-NEXT:    vand.vx v9, v8, s6
+; RV64-NEXT:    vmul.vv v27, v8, v9
+; RV64-NEXT:    vand.vx v9, v8, s7
+; RV64-NEXT:    vmul.vv v26, v8, v9
+; RV64-NEXT:    vand.vx v9, v8, s8
+; RV64-NEXT:    vmul.vv v25, v8, v9
+; RV64-NEXT:    vand.vx v9, v8, s9
+; RV64-NEXT:    vmul.vv v24, v8, v9
+; RV64-NEXT:    vand.vx v9, v8, s10
+; RV64-NEXT:    vmul.vv v23, v8, v9
+; RV64-NEXT:    vand.vx v9, v8, s11
+; RV64-NEXT:    vmul.vv v22, v8, v9
+; RV64-NEXT:    vand.vx v9, v8, ra
+; RV64-NEXT:    vmul.vv v21, v8, v9
+; RV64-NEXT:    vand.vx v9, v8, t5
+; RV64-NEXT:    vmul.vv v20, v8, v9
+; RV64-NEXT:    vand.vx v9, v8, t4
+; RV64-NEXT:    vmul.vv v19, v8, v9
+; RV64-NEXT:    vand.vx v9, v8, t3
+; RV64-NEXT:    vmul.vv v18, v8, v9
+; RV64-NEXT:    vand.vx v9, v8, t2
+; RV64-NEXT:    vmul.vv v17, v8, v9
+; RV64-NEXT:    vand.vx v9, v8, t1
+; RV64-NEXT:    vmul.vv v16, v8, v9
+; RV64-NEXT:    vand.vx v9, v8, a7
+; RV64-NEXT:    vmul.vv v15, v8, v9
+; RV64-NEXT:    vand.vx v9, v8, a6
+; RV64-NEXT:    vmul.vv v14, v8, v9
+; RV64-NEXT:    vand.vx v9, v8, a5
+; RV64-NEXT:    vmul.vv v13, v8, v9
+; RV64-NEXT:    vand.vx v9, v8, a4
+; RV64-NEXT:    vmul.vv v12, v8, v9
+; RV64-NEXT:    vand.vx v9, v8, a3
+; RV64-NEXT:    vmul.vv v11, v8, v9
+; RV64-NEXT:    vand.vx v9, v8, a2
+; RV64-NEXT:    vmul.vv v10, v8, v9
+; RV64-NEXT:    vand.vx v9, v8, a1
+; RV64-NEXT:    vmul.vv v9, v8, v9
+; RV64-NEXT:    vand.vx v0, v8, t0
+; RV64-NEXT:    vmul.vv v0, v8, v0
+; RV64-NEXT:    vand.vx v1, v8, a0
+; RV64-NEXT:    vmul.vv v8, v8, v1
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a1, a0, 5
+; RV64-NEXT:    add a0, a1, a0
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl1r.v v1, (a0) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v1, v2, v1
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a1, a0, 5
+; RV64-NEXT:    sub a0, a1, a0
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v1, v1, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v1, v1, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v1, v1, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v1, v1, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v1, v1, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v1, v1, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v1, v1, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v1, v1, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v1, v1, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v1, v1, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v1, v1, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v1, v1, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v1, v1, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v1, v1, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a1, a0, 4
+; RV64-NEXT:    add a0, a1, a0
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v1, v1, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v1, v1, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a1, a0, 4
+; RV64-NEXT:    sub a0, a1, a0
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v1, v1, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v1, v1, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v1, v1, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v1, v1, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v1, v1, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v1, v1, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a1, a0, 3
+; RV64-NEXT:    add a0, a1, a0
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v1, v1, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v1, v1, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a1, a0, 3
+; RV64-NEXT:    sub a0, a1, a0
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v1, v1, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v1, v1, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a1, a0, 2
+; RV64-NEXT:    add a0, a1, a0
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v1, v1, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v1, v1, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a1, a0, 1
+; RV64-NEXT:    add a0, a1, a0
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v1, v1, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v1, v1, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v1, v1, v2
+; RV64-NEXT:    addi a0, sp, 112
+; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
+; RV64-NEXT:    vxor.vv v2, v1, v2
+; RV64-NEXT:    vxor.vv v3, v2, v3
+; RV64-NEXT:    vxor.vv v4, v3, v4
+; RV64-NEXT:    vxor.vv v5, v4, v5
+; RV64-NEXT:    vxor.vv v6, v5, v6
+; RV64-NEXT:    vxor.vv v7, v6, v7
+; RV64-NEXT:    vxor.vv v31, v7, v31
+; RV64-NEXT:    vxor.vv v30, v31, v30
+; RV64-NEXT:    vxor.vv v29, v30, v29
+; RV64-NEXT:    vxor.vv v28, v29, v28
+; RV64-NEXT:    vxor.vv v27, v28, v27
+; RV64-NEXT:    vxor.vv v26, v27, v26
+; RV64-NEXT:    vxor.vv v25, v26, v25
+; RV64-NEXT:    vxor.vv v24, v25, v24
+; RV64-NEXT:    vxor.vv v23, v24, v23
+; RV64-NEXT:    vxor.vv v22, v23, v22
+; RV64-NEXT:    vxor.vv v21, v22, v21
+; RV64-NEXT:    vxor.vv v20, v21, v20
+; RV64-NEXT:    vxor.vv v19, v20, v19
+; RV64-NEXT:    vxor.vv v18, v19, v18
+; RV64-NEXT:    vxor.vv v17, v18, v17
+; RV64-NEXT:    vxor.vv v16, v17, v16
+; RV64-NEXT:    vxor.vv v15, v16, v15
+; RV64-NEXT:    vxor.vv v14, v15, v14
+; RV64-NEXT:    vxor.vv v13, v14, v13
+; RV64-NEXT:    vxor.vv v12, v13, v12
+; RV64-NEXT:    vxor.vv v11, v12, v11
+; RV64-NEXT:    vxor.vv v10, v11, v10
+; RV64-NEXT:    vxor.vv v9, v10, v9
+; RV64-NEXT:    vxor.vv v9, v9, v0
+; RV64-NEXT:    vxor.vv v8, v9, v8
+; RV64-NEXT:    li a0, 56
+; RV64-NEXT:    vsrl.vx v9, v8, a0
+; RV64-NEXT:    li a1, 40
+; RV64-NEXT:    vsrl.vx v10, v8, a1
+; RV64-NEXT:    vsrl.vi v11, v8, 24
+; RV64-NEXT:    vsrl.vi v12, v8, 8
+; RV64-NEXT:    vand.vx v10, v10, s0
+; RV64-NEXT:    vor.vv v9, v10, v9
+; RV64-NEXT:    vand.vx v10, v8, t6
+; RV64-NEXT:    lui a2, 4080
+; RV64-NEXT:    vand.vx v11, v11, a2
+; RV64-NEXT:    vand.vx v12, v12, t6
+; RV64-NEXT:    vor.vv v11, v12, v11
+; RV64-NEXT:    vand.vx v12, v8, a2
+; RV64-NEXT:    vsll.vi v10, v10, 8
+; RV64-NEXT:    vsll.vi v12, v12, 24
+; RV64-NEXT:    vor.vv v10, v12, v10
+; RV64-NEXT:    vsll.vx v12, v8, a0
+; RV64-NEXT:    vand.vx v8, v8, s0
+; RV64-NEXT:    vsll.vx v8, v8, a1
+; RV64-NEXT:    vor.vv v8, v12, v8
+; RV64-NEXT:    vor.vv v9, v11, v9
+; RV64-NEXT:    vor.vv v8, v8, v10
+; RV64-NEXT:    vor.vv v8, v8, v9
+; RV64-NEXT:    vsrl.vi v9, v8, 4
+; RV64-NEXT:    vand.vx v8, v8, s1
+; RV64-NEXT:    vand.vx v9, v9, s1
+; RV64-NEXT:    vsll.vi v8, v8, 4
+; RV64-NEXT:    vor.vv v8, v9, v8
+; RV64-NEXT:    vsrl.vi v9, v8, 2
+; RV64-NEXT:    vand.vx v8, v8, s2
+; RV64-NEXT:    vand.vx v9, v9, s2
+; RV64-NEXT:    vsll.vi v8, v8, 2
+; RV64-NEXT:    vor.vv v8, v9, v8
+; RV64-NEXT:    vsrl.vi v9, v8, 1
+; RV64-NEXT:    vand.vx v8, v8, s3
+; RV64-NEXT:    vand.vx v9, v9, s3
+; RV64-NEXT:    vadd.vv v8, v8, v8
+; RV64-NEXT:    vor.vv v8, v9, v8
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add sp, sp, a0
+; RV64-NEXT:    ld ra, 216(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s0, 208(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s1, 200(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s2, 192(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s3, 184(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s4, 176(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s5, 168(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s6, 160(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s7, 152(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s8, 144(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s9, 136(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s10, 128(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s11, 120(sp) # 8-byte Folded Reload
+; RV64-NEXT:    addi sp, sp, 224
+; RV64-NEXT:    ret
+  %a = call <2 x i64> @llvm.clmulr.v2i64(<2 x i64> %x, <2 x i64> %y)
+  ret <2 x i64> %a
+}
+
+define <4 x i64> @clmulr_v4i64(<4 x i64> %x, <4 x i64> %y) nounwind {
+; RV32-LABEL: clmulr_v4i64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -352
+; RV32-NEXT:    sw ra, 348(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s0, 344(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s1, 340(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s2, 336(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s3, 332(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s4, 328(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s5, 324(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s6, 320(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s7, 316(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s8, 312(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s9, 308(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s10, 304(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s11, 300(sp) # 4-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    sub sp, sp, a0
+; RV32-NEXT:    lui s7, 1044480
+; RV32-NEXT:    lui a7, 524288
+; RV32-NEXT:    li a1, 1
+; RV32-NEXT:    li s8, 2
+; RV32-NEXT:    li s9, 4
+; RV32-NEXT:    li s10, 8
+; RV32-NEXT:    li a3, 16
+; RV32-NEXT:    li a4, 32
+; RV32-NEXT:    li a5, 64
+; RV32-NEXT:    li a6, 128
+; RV32-NEXT:    li s11, 256
+; RV32-NEXT:    li ra, 512
+; RV32-NEXT:    li a0, 1024
+; RV32-NEXT:    lui a2, 1
+; RV32-NEXT:    lui t0, 2
+; RV32-NEXT:    lui t1, 4
+; RV32-NEXT:    lui t2, 8
+; RV32-NEXT:    lui t3, 16
+; RV32-NEXT:    lui t4, 32
+; RV32-NEXT:    lui t5, 64
+; RV32-NEXT:    lui t6, 128
+; RV32-NEXT:    lui s0, 256
+; RV32-NEXT:    lui s1, 512
+; RV32-NEXT:    lui s2, 1024
+; RV32-NEXT:    lui s3, 2048
+; RV32-NEXT:    lui s4, 4096
+; RV32-NEXT:    lui s5, 8192
+; RV32-NEXT:    lui s6, 16384
+; RV32-NEXT:    sw s7, 272(sp)
+; RV32-NEXT:    lui s7, 32768
+; RV32-NEXT:    sw zero, 276(sp)
+; RV32-NEXT:    sw a7, 264(sp)
+; RV32-NEXT:    sw zero, 268(sp)
+; RV32-NEXT:    sw zero, 256(sp)
+; RV32-NEXT:    sw a1, 260(sp)
+; RV32-NEXT:    sw zero, 248(sp)
+; RV32-NEXT:    sw s8, 252(sp)
+; RV32-NEXT:    lui s8, 65536
+; RV32-NEXT:    sw zero, 240(sp)
+; RV32-NEXT:    sw s9, 244(sp)
+; RV32-NEXT:    lui s9, 131072
+; RV32-NEXT:    sw zero, 232(sp)
+; RV32-NEXT:    sw s10, 236(sp)
+; RV32-NEXT:    lui s10, 262144
+; RV32-NEXT:    sw zero, 224(sp)
+; RV32-NEXT:    sw a3, 228(sp)
+; RV32-NEXT:    sw zero, 216(sp)
+; RV32-NEXT:    sw a4, 220(sp)
+; RV32-NEXT:    sw zero, 208(sp)
+; RV32-NEXT:    sw a5, 212(sp)
+; RV32-NEXT:    sw zero, 200(sp)
+; RV32-NEXT:    sw a6, 204(sp)
+; RV32-NEXT:    sw zero, 192(sp)
+; RV32-NEXT:    sw s11, 196(sp)
+; RV32-NEXT:    sw zero, 184(sp)
+; RV32-NEXT:    sw ra, 188(sp)
+; RV32-NEXT:    sw zero, 176(sp)
+; RV32-NEXT:    sw a0, 180(sp)
+; RV32-NEXT:    slli a5, a1, 11
+; RV32-NEXT:    sw zero, 168(sp)
+; RV32-NEXT:    sw a5, 172(sp)
+; RV32-NEXT:    sw zero, 160(sp)
+; RV32-NEXT:    sw a2, 164(sp)
+; RV32-NEXT:    sw zero, 152(sp)
+; RV32-NEXT:    sw t0, 156(sp)
+; RV32-NEXT:    sw zero, 144(sp)
+; RV32-NEXT:    sw t1, 148(sp)
+; RV32-NEXT:    sw zero, 136(sp)
+; RV32-NEXT:    sw t2, 140(sp)
+; RV32-NEXT:    sw zero, 128(sp)
+; RV32-NEXT:    sw t3, 132(sp)
+; RV32-NEXT:    sw zero, 120(sp)
+; RV32-NEXT:    sw t4, 124(sp)
+; RV32-NEXT:    sw zero, 112(sp)
+; RV32-NEXT:    sw t5, 116(sp)
+; RV32-NEXT:    sw zero, 104(sp)
+; RV32-NEXT:    sw t6, 108(sp)
+; RV32-NEXT:    sw zero, 96(sp)
+; RV32-NEXT:    sw s0, 100(sp)
+; RV32-NEXT:    sw zero, 88(sp)
+; RV32-NEXT:    sw s1, 92(sp)
+; RV32-NEXT:    sw zero, 80(sp)
+; RV32-NEXT:    sw s2, 84(sp)
+; RV32-NEXT:    sw zero, 72(sp)
+; RV32-NEXT:    sw s3, 76(sp)
+; RV32-NEXT:    sw zero, 64(sp)
+; RV32-NEXT:    sw s4, 68(sp)
+; RV32-NEXT:    sw zero, 56(sp)
+; RV32-NEXT:    sw s5, 60(sp)
+; RV32-NEXT:    sw zero, 48(sp)
+; RV32-NEXT:    sw s6, 52(sp)
+; RV32-NEXT:    sw zero, 40(sp)
+; RV32-NEXT:    sw s7, 44(sp)
+; RV32-NEXT:    sw zero, 32(sp)
+; RV32-NEXT:    sw s8, 36(sp)
+; RV32-NEXT:    sw zero, 24(sp)
+; RV32-NEXT:    sw s9, 28(sp)
+; RV32-NEXT:    sw zero, 16(sp)
+; RV32-NEXT:    sw s10, 20(sp)
+; RV32-NEXT:    sw zero, 8(sp)
+; RV32-NEXT:    sw a7, 12(sp)
+; RV32-NEXT:    lui a0, 61681
+; RV32-NEXT:    addi a0, a0, -241
+; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; RV32-NEXT:    vmv.v.x v4, a0
+; RV32-NEXT:    lui a0, 209715
+; RV32-NEXT:    addi a0, a0, 819
+; RV32-NEXT:    vmv.v.x v2, a0
+; RV32-NEXT:    lui a0, 349525
+; RV32-NEXT:    addi a0, a0, 1365
+; RV32-NEXT:    vmv.v.x v0, a0
+; RV32-NEXT:    addi a0, sp, 272
+; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV32-NEXT:    vlse64.v v6, (a0), zero
+; RV32-NEXT:    addi a0, sp, 264
+; RV32-NEXT:    vlse64.v v10, (a0), zero
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    addi a0, sp, 256
+; RV32-NEXT:    vlse64.v v12, (a0), zero
+; RV32-NEXT:    addi a0, sp, 248
+; RV32-NEXT:    vlse64.v v14, (a0), zero
+; RV32-NEXT:    addi a0, sp, 240
+; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    addi a0, sp, 232
+; RV32-NEXT:    vlse64.v v18, (a0), zero
+; RV32-NEXT:    addi a0, sp, 224
+; RV32-NEXT:    vlse64.v v20, (a0), zero
+; RV32-NEXT:    addi a0, sp, 216
+; RV32-NEXT:    vlse64.v v22, (a0), zero
+; RV32-NEXT:    li ra, 56
+; RV32-NEXT:    vsrl.vi v24, v8, 24
+; RV32-NEXT:    vsrl.vx v26, v8, ra
+; RV32-NEXT:    li s11, 40
+; RV32-NEXT:    vsrl.vx v28, v8, s11
+; RV32-NEXT:    vsll.vx v30, v8, ra
+; RV32-NEXT:    addi a4, t3, -256
+; RV32-NEXT:    vand.vx v28, v28, a4
+; RV32-NEXT:    vor.vv v26, v28, v26
+; RV32-NEXT:    vand.vx v28, v8, a4
+; RV32-NEXT:    vsll.vx v28, v28, s11
+; RV32-NEXT:    vor.vv v30, v30, v28
+; RV32-NEXT:    vsrl.vi v28, v8, 8
+; RV32-NEXT:    lui a6, 4080
+; RV32-NEXT:    vand.vx v24, v24, a6
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v6, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vv v28, v28, v6
+; RV32-NEXT:    vor.vv v28, v28, v24
+; RV32-NEXT:    addi a3, sp, 208
+; RV32-NEXT:    vlse64.v v24, (a3), zero
+; RV32-NEXT:    vor.vv v10, v28, v26
+; RV32-NEXT:    vand.vx v26, v8, a6
+; RV32-NEXT:    vsll.vi v26, v26, 24
+; RV32-NEXT:    vand.vv v8, v8, v6
+; RV32-NEXT:    vsll.vi v8, v8, 8
+; RV32-NEXT:    vor.vv v8, v26, v8
+; RV32-NEXT:    addi a3, sp, 200
+; RV32-NEXT:    vlse64.v v28, (a3), zero
+; RV32-NEXT:    vor.vv v8, v30, v8
+; RV32-NEXT:    addi a3, sp, 192
+; RV32-NEXT:    vlse64.v v26, (a3), zero
+; RV32-NEXT:    vor.vv v8, v8, v10
+; RV32-NEXT:    vsrl.vi v30, v8, 4
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v4, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vv v8, v8, v4
+; RV32-NEXT:    vand.vv v30, v30, v4
+; RV32-NEXT:    vsll.vi v8, v8, 4
+; RV32-NEXT:    vor.vv v8, v30, v8
+; RV32-NEXT:    vsrl.vi v30, v8, 2
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v2, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vv v8, v8, v2
+; RV32-NEXT:    vand.vv v30, v30, v2
+; RV32-NEXT:    vsll.vi v8, v8, 2
+; RV32-NEXT:    vor.vv v8, v30, v8
+; RV32-NEXT:    vsrl.vi v30, v8, 1
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v0, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vv v8, v8, v0
+; RV32-NEXT:    vand.vv v30, v30, v0
+; RV32-NEXT:    vadd.vv v8, v8, v8
+; RV32-NEXT:    vor.vv v8, v30, v8
+; RV32-NEXT:    addi a3, sp, 184
+; RV32-NEXT:    vlse64.v v30, (a3), zero
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vand.vv v6, v8, v10
+; RV32-NEXT:    vand.vv v4, v8, v12
+; RV32-NEXT:    vand.vv v2, v8, v14
+; RV32-NEXT:    vand.vv v0, v8, v16
+; RV32-NEXT:    vand.vv v10, v8, v18
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vv v10, v8, v20
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vv v10, v8, v22
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vv v10, v8, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vv v28, v8, v28
+; RV32-NEXT:    addi a3, sp, 176
+; RV32-NEXT:    addi a0, sp, 168
+; RV32-NEXT:    vlse64.v v10, (a3), zero
+; RV32-NEXT:    vlse64.v v12, (a0), zero
+; RV32-NEXT:    vand.vv v14, v8, v26
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v14, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vv v14, v8, v30
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v14, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vv v10, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    addi a2, sp, 160
+; RV32-NEXT:    addi a3, sp, 152
+; RV32-NEXT:    addi a1, sp, 144
+; RV32-NEXT:    addi a0, sp, 136
+; RV32-NEXT:    vlse64.v v10, (a2), zero
+; RV32-NEXT:    vlse64.v v12, (a3), zero
+; RV32-NEXT:    vlse64.v v14, (a1), zero
+; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vand.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vv v10, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vv v10, v8, v14
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vv v10, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    addi a0, sp, 128
+; RV32-NEXT:    addi a1, sp, 120
+; RV32-NEXT:    addi a2, sp, 112
+; RV32-NEXT:    addi a3, sp, 104
+; RV32-NEXT:    vlse64.v v10, (a0), zero
+; RV32-NEXT:    vlse64.v v12, (a1), zero
+; RV32-NEXT:    vlse64.v v14, (a2), zero
+; RV32-NEXT:    vlse64.v v16, (a3), zero
+; RV32-NEXT:    vand.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vv v10, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vv v10, v8, v14
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vv v10, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    addi a0, sp, 96
+; RV32-NEXT:    addi a1, sp, 88
+; RV32-NEXT:    addi a2, sp, 80
+; RV32-NEXT:    addi a3, sp, 72
+; RV32-NEXT:    vlse64.v v10, (a0), zero
+; RV32-NEXT:    vlse64.v v12, (a1), zero
+; RV32-NEXT:    vlse64.v v14, (a2), zero
+; RV32-NEXT:    vlse64.v v16, (a3), zero
+; RV32-NEXT:    vand.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vv v10, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vv v10, v8, v14
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vv v10, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    addi a0, sp, 64
+; RV32-NEXT:    addi a1, sp, 56
+; RV32-NEXT:    addi a2, sp, 48
+; RV32-NEXT:    addi a3, sp, 40
+; RV32-NEXT:    vlse64.v v10, (a0), zero
+; RV32-NEXT:    vlse64.v v12, (a1), zero
+; RV32-NEXT:    vlse64.v v14, (a2), zero
+; RV32-NEXT:    vlse64.v v16, (a3), zero
+; RV32-NEXT:    vand.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vv v10, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vv v10, v8, v14
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vv v10, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    addi a0, sp, 32
+; RV32-NEXT:    addi a1, sp, 24
+; RV32-NEXT:    addi a2, sp, 16
+; RV32-NEXT:    addi a3, sp, 8
+; RV32-NEXT:    vlse64.v v10, (a0), zero
+; RV32-NEXT:    vlse64.v v12, (a1), zero
+; RV32-NEXT:    vlse64.v v14, (a2), zero
+; RV32-NEXT:    vlse64.v v16, (a3), zero
+; RV32-NEXT:    vand.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vv v10, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 6
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vv v10, v8, v14
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vv v10, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vi v10, v8, 2
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vi v10, v8, 1
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vi v10, v8, 4
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vi v10, v8, 8
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    li a0, 16
+; RV32-NEXT:    vand.vx v10, v8, a0
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    li a0, 32
+; RV32-NEXT:    vand.vx v10, v8, a0
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    li a0, 64
+; RV32-NEXT:    vand.vx v10, v8, a0
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    li a0, 128
+; RV32-NEXT:    vand.vx v10, v8, a0
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    li a0, 256
+; RV32-NEXT:    vand.vx v10, v8, a0
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    li a0, 512
+; RV32-NEXT:    vand.vx v10, v8, a0
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    li a0, 1024
+; RV32-NEXT:    vand.vx v10, v8, a0
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vx v10, v8, a5
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    lui a0, 1
+; RV32-NEXT:    vand.vx v10, v8, a0
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vx v10, v8, t0
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vx v10, v8, t1
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vx v10, v8, t2
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vx v10, v8, t3
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vx v10, v8, t4
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vx v10, v8, t5
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vx v10, v8, t6
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vx v10, v8, s0
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vx v10, v8, s1
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vx v10, v8, s2
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vx v10, v8, s3
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vx v10, v8, s4
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vx v10, v8, s5
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vx v10, v8, s6
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vx v10, v8, s7
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vx v10, v8, s8
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vx v10, v8, s9
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vand.vx v10, v8, s10
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    addi a0, sp, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    vmul.vv v12, v8, v6
+; RV32-NEXT:    vmul.vv v14, v8, v4
+; RV32-NEXT:    vmul.vv v16, v8, v2
+; RV32-NEXT:    vmul.vv v18, v8, v0
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v20, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v20, v8, v20
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v22, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v22, v8, v22
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v24, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v24, v8, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v26, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v26, v8, v26
+; RV32-NEXT:    vmul.vv v28, v8, v28
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v30, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v30, v8, v30
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v6, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v6, v8, v6
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v4, v8, v4
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v2, v8, v2
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v0, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v0, v8, v0
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 6
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v10, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vmul.vv v8, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs2r.v v8, (a0) # vscale x 16-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v8, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vi v8, v8, 0
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    addi a0, sp, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    vxor.vv v8, v8, v14
+; RV32-NEXT:    vxor.vv v8, v8, v16
+; RV32-NEXT:    vxor.vv v8, v8, v18
+; RV32-NEXT:    vxor.vv v8, v8, v20
+; RV32-NEXT:    vxor.vv v8, v8, v22
+; RV32-NEXT:    vxor.vv v8, v8, v24
+; RV32-NEXT:    vxor.vv v8, v8, v26
+; RV32-NEXT:    vxor.vv v8, v8, v28
+; RV32-NEXT:    vxor.vv v8, v8, v30
+; RV32-NEXT:    vxor.vv v8, v8, v6
+; RV32-NEXT:    vxor.vv v8, v8, v4
+; RV32-NEXT:    vxor.vv v8, v8, v2
+; RV32-NEXT:    vxor.vv v8, v8, v0
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v10
+; RV32-NEXT:    vsrl.vx v10, v8, ra
+; RV32-NEXT:    vsll.vx v12, v8, ra
+; RV32-NEXT:    vsrl.vx v14, v8, s11
+; RV32-NEXT:    vand.vx v16, v8, a4
+; RV32-NEXT:    vand.vx v14, v14, a4
+; RV32-NEXT:    vsrl.vi v18, v8, 24
+; RV32-NEXT:    vand.vx v20, v8, a6
+; RV32-NEXT:    vand.vx v18, v18, a6
+; RV32-NEXT:    vsll.vx v16, v16, s11
+; RV32-NEXT:    vsrl.vi v22, v8, 8
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v24, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vand.vv v8, v8, v24
+; RV32-NEXT:    vand.vv v22, v22, v24
+; RV32-NEXT:    vor.vv v10, v14, v10
+; RV32-NEXT:    vor.vv v14, v22, v18
+; RV32-NEXT:    vsll.vi v8, v8, 8
+; RV32-NEXT:    vsll.vi v18, v20, 24
+; RV32-NEXT:    vor.vv v8, v18, v8
+; RV32-NEXT:    vor.vv v12, v12, v16
+; RV32-NEXT:    vor.vv v10, v14, v10
+; RV32-NEXT:    vor.vv v8, v12, v8
+; RV32-NEXT:    vor.vv v8, v8, v10
+; RV32-NEXT:    vsrl.vi v10, v8, 4
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v12, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vand.vv v8, v8, v12
+; RV32-NEXT:    vand.vv v10, v10, v12
+; RV32-NEXT:    vsll.vi v8, v8, 4
+; RV32-NEXT:    vor.vv v8, v10, v8
+; RV32-NEXT:    vsrl.vi v10, v8, 2
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v12, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vand.vv v8, v8, v12
+; RV32-NEXT:    vand.vv v10, v10, v12
+; RV32-NEXT:    vsll.vi v8, v8, 2
+; RV32-NEXT:    vor.vv v8, v10, v8
+; RV32-NEXT:    vsrl.vi v10, v8, 1
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl2r.v v12, (a0) # vscale x 16-byte Folded Reload
+; RV32-NEXT:    vand.vv v8, v8, v12
+; RV32-NEXT:    vand.vv v10, v10, v12
+; RV32-NEXT:    vadd.vv v8, v8, v8
+; RV32-NEXT:    vor.vv v8, v10, v8
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add sp, sp, a0
+; RV32-NEXT:    lw ra, 348(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s0, 344(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s1, 340(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s2, 336(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s3, 332(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s4, 328(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s5, 324(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s6, 320(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s7, 316(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s8, 312(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s9, 308(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s10, 304(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s11, 300(sp) # 4-byte Folded Reload
+; RV32-NEXT:    addi sp, sp, 352
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: clmulr_v4i64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    addi sp, sp, -224
+; RV64-NEXT:    sd ra, 216(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s0, 208(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s1, 200(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s2, 192(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s3, 184(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s4, 176(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s5, 168(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s6, 160(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s7, 152(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s8, 144(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s9, 136(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s10, 128(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s11, 120(sp) # 8-byte Folded Spill
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    sub sp, sp, a0
+; RV64-NEXT:    li a7, 56
+; RV64-NEXT:    li s1, 40
+; RV64-NEXT:    lui s3, 16
+; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT:    vsrl.vi v14, v8, 24
+; RV64-NEXT:    lui t6, 4080
+; RV64-NEXT:    vsrl.vi v10, v8, 8
+; RV64-NEXT:    li s2, 255
+; RV64-NEXT:    lui a5, 61681
+; RV64-NEXT:    lui a6, 209715
+; RV64-NEXT:    lui t5, 349525
+; RV64-NEXT:    li t4, 16
+; RV64-NEXT:    li t3, 32
+; RV64-NEXT:    li t2, 64
+; RV64-NEXT:    li t1, 128
+; RV64-NEXT:    li t0, 256
+; RV64-NEXT:    li a4, 512
+; RV64-NEXT:    li a3, 1024
+; RV64-NEXT:    li s0, 1
+; RV64-NEXT:    lui a2, 1
+; RV64-NEXT:    lui a1, 2
+; RV64-NEXT:    lui a0, 4
+; RV64-NEXT:    vsrl.vx v12, v8, a7
+; RV64-NEXT:    vsrl.vx v18, v8, s1
+; RV64-NEXT:    addi s4, s3, -256
+; RV64-NEXT:    vand.vx v16, v14, t6
+; RV64-NEXT:    slli s2, s2, 24
+; RV64-NEXT:    vand.vx v20, v8, t6
+; RV64-NEXT:    vsll.vx v14, v8, a7
+; RV64-NEXT:    addi a7, a5, -241
+; RV64-NEXT:    addi a6, a6, 819
+; RV64-NEXT:    addi a5, t5, 1365
+; RV64-NEXT:    slli t5, s0, 11
+; RV64-NEXT:    slli t6, s0, 31
+; RV64-NEXT:    sd t6, 96(sp) # 8-byte Folded Spill
+; RV64-NEXT:    slli t6, s0, 32
+; RV64-NEXT:    sd t6, 88(sp) # 8-byte Folded Spill
+; RV64-NEXT:    slli t6, s0, 33
+; RV64-NEXT:    sd t6, 80(sp) # 8-byte Folded Spill
+; RV64-NEXT:    slli t6, s0, 34
+; RV64-NEXT:    sd t6, 72(sp) # 8-byte Folded Spill
+; RV64-NEXT:    slli t6, s0, 35
+; RV64-NEXT:    sd t6, 64(sp) # 8-byte Folded Spill
+; RV64-NEXT:    slli t6, s0, 36
+; RV64-NEXT:    sd t6, 56(sp) # 8-byte Folded Spill
+; RV64-NEXT:    slli t6, a7, 32
+; RV64-NEXT:    add a7, a7, t6
+; RV64-NEXT:    slli t6, a6, 32
+; RV64-NEXT:    add a6, a6, t6
+; RV64-NEXT:    slli t6, a5, 32
+; RV64-NEXT:    add a5, a5, t6
+; RV64-NEXT:    slli t6, s0, 37
+; RV64-NEXT:    sd t6, 48(sp) # 8-byte Folded Spill
+; RV64-NEXT:    vand.vx v18, v18, s4
+; RV64-NEXT:    vand.vx v10, v10, s2
+; RV64-NEXT:    vsll.vi v20, v20, 24
+; RV64-NEXT:    vand.vx v22, v8, s2
+; RV64-NEXT:    vand.vx v8, v8, s4
+; RV64-NEXT:    vor.vv v12, v18, v12
+; RV64-NEXT:    vor.vv v10, v10, v16
+; RV64-NEXT:    vsll.vi v16, v22, 8
+; RV64-NEXT:    vsll.vx v8, v8, s1
+; RV64-NEXT:    vor.vv v10, v10, v12
+; RV64-NEXT:    vor.vv v12, v20, v16
+; RV64-NEXT:    vor.vv v8, v14, v8
+; RV64-NEXT:    vor.vv v8, v8, v12
+; RV64-NEXT:    vor.vv v8, v8, v10
+; RV64-NEXT:    vsrl.vi v10, v8, 4
+; RV64-NEXT:    vand.vx v8, v8, a7
+; RV64-NEXT:    vand.vx v10, v10, a7
+; RV64-NEXT:    vsll.vi v8, v8, 4
+; RV64-NEXT:    vor.vv v8, v10, v8
+; RV64-NEXT:    vsrl.vi v10, v8, 2
+; RV64-NEXT:    vand.vx v8, v8, a6
+; RV64-NEXT:    vand.vx v10, v10, a6
+; RV64-NEXT:    vsll.vi v8, v8, 2
+; RV64-NEXT:    vor.vv v8, v10, v8
+; RV64-NEXT:    vsrl.vi v10, v8, 1
+; RV64-NEXT:    vand.vx v8, v8, a5
+; RV64-NEXT:    vand.vx v10, v10, a5
+; RV64-NEXT:    vadd.vv v8, v8, v8
+; RV64-NEXT:    vor.vv v8, v10, v8
+; RV64-NEXT:    vand.vx v10, v8, t4
+; RV64-NEXT:    slli t4, s0, 38
+; RV64-NEXT:    sd t4, 40(sp) # 8-byte Folded Spill
+; RV64-NEXT:    vand.vx v12, v8, t3
+; RV64-NEXT:    slli t3, s0, 39
+; RV64-NEXT:    sd t3, 32(sp) # 8-byte Folded Spill
+; RV64-NEXT:    vand.vx v14, v8, t2
+; RV64-NEXT:    slli t2, s0, 40
+; RV64-NEXT:    sd t2, 24(sp) # 8-byte Folded Spill
+; RV64-NEXT:    vand.vx v16, v8, t1
+; RV64-NEXT:    slli t1, s0, 41
+; RV64-NEXT:    sd t1, 16(sp) # 8-byte Folded Spill
+; RV64-NEXT:    vand.vx v18, v8, t0
+; RV64-NEXT:    slli s6, s0, 42
+; RV64-NEXT:    vand.vx v20, v8, a4
+; RV64-NEXT:    slli s7, s0, 43
+; RV64-NEXT:    vand.vx v22, v8, a3
+; RV64-NEXT:    slli s8, s0, 44
+; RV64-NEXT:    vand.vx v24, v8, t5
+; RV64-NEXT:    slli s9, s0, 45
+; RV64-NEXT:    vand.vx v26, v8, a2
+; RV64-NEXT:    slli s10, s0, 46
+; RV64-NEXT:    vand.vx v28, v8, a1
+; RV64-NEXT:    slli s11, s0, 47
+; RV64-NEXT:    vand.vx v30, v8, a0
+; RV64-NEXT:    slli ra, s0, 48
+; RV64-NEXT:    slli s3, s0, 49
+; RV64-NEXT:    slli s1, s0, 50
+; RV64-NEXT:    slli t6, s0, 51
+; RV64-NEXT:    slli t5, s0, 52
+; RV64-NEXT:    slli t4, s0, 53
+; RV64-NEXT:    slli t3, s0, 54
+; RV64-NEXT:    slli t2, s0, 55
+; RV64-NEXT:    slli t1, s0, 56
+; RV64-NEXT:    slli t0, s0, 57
+; RV64-NEXT:    slli a4, s0, 58
+; RV64-NEXT:    slli a3, s0, 59
+; RV64-NEXT:    slli a2, s0, 60
+; RV64-NEXT:    slli a1, s0, 61
+; RV64-NEXT:    slli s0, s0, 62
+; RV64-NEXT:    li a0, -1
+; RV64-NEXT:    slli a0, a0, 63
+; RV64-NEXT:    vand.vi v6, v8, 2
+; RV64-NEXT:    vand.vi v4, v8, 1
+; RV64-NEXT:    vand.vi v2, v8, 4
+; RV64-NEXT:    vand.vi v0, v8, 8
+; RV64-NEXT:    vmul.vv v6, v8, v6
+; RV64-NEXT:    sd a5, 8(sp) # 8-byte Folded Spill
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 4
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 112
+; RV64-NEXT:    vs2r.v v6, (a5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    vmul.vv v6, v8, v4
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 5
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 112
+; RV64-NEXT:    vs2r.v v6, (a5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    vmul.vv v6, v8, v2
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 112
+; RV64-NEXT:    vs2r.v v6, (a5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    vmul.vv v6, v8, v0
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 112
+; RV64-NEXT:    vs2r.v v6, (a5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    vmul.vv v10, v8, v10
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 112
+; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    vmul.vv v10, v8, v12
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 3
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 112
+; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    vmul.vv v10, v8, v14
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 112
+; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    vmul.vv v10, v8, v16
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 112
+; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    vmul.vv v10, v8, v18
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 3
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 112
+; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    vmul.vv v10, v8, v20
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 4
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 112
+; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    vmul.vv v10, v8, v22
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 3
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 112
+; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    vmul.vv v10, v8, v24
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 3
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 112
+; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    vmul.vv v10, v8, v26
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 3
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 112
+; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    vmul.vv v10, v8, v28
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 3
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 3
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 112
+; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    vmul.vv v10, v8, v30
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 4
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 112
+; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    lui s5, 8
+; RV64-NEXT:    vand.vx v10, v8, s5
+; RV64-NEXT:    vmul.vv v10, v8, v10
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 4
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 112
+; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    lui s5, 16
+; RV64-NEXT:    vand.vx v10, v8, s5
+; RV64-NEXT:    vmul.vv v10, v8, v10
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 5
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 112
+; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    lui s5, 32
+; RV64-NEXT:    vand.vx v10, v8, s5
+; RV64-NEXT:    vmul.vv v10, v8, v10
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 6
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 112
+; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    lui s5, 64
+; RV64-NEXT:    vand.vx v10, v8, s5
+; RV64-NEXT:    vmul.vv v10, v8, v10
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 112
+; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    lui s5, 128
+; RV64-NEXT:    vand.vx v10, v8, s5
+; RV64-NEXT:    vmul.vv v10, v8, v10
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 112
+; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    lui s5, 256
+; RV64-NEXT:    vand.vx v10, v8, s5
+; RV64-NEXT:    vmul.vv v10, v8, v10
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 112
+; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    lui s5, 512
+; RV64-NEXT:    vand.vx v10, v8, s5
+; RV64-NEXT:    vmul.vv v10, v8, v10
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 3
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 112
+; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    lui s5, 1024
+; RV64-NEXT:    vand.vx v10, v8, s5
+; RV64-NEXT:    vmul.vv v10, v8, v10
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 112
+; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    lui s5, 2048
+; RV64-NEXT:    vand.vx v10, v8, s5
+; RV64-NEXT:    vmul.vv v10, v8, v10
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 112
+; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    lui s5, 4096
+; RV64-NEXT:    vand.vx v10, v8, s5
+; RV64-NEXT:    vmul.vv v10, v8, v10
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 3
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 112
+; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    lui s5, 8192
+; RV64-NEXT:    vand.vx v10, v8, s5
+; RV64-NEXT:    vmul.vv v10, v8, v10
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 4
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 112
+; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    lui s5, 16384
+; RV64-NEXT:    vand.vx v10, v8, s5
+; RV64-NEXT:    vmul.vv v10, v8, v10
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 112
+; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    lui s5, 32768
+; RV64-NEXT:    vand.vx v10, v8, s5
+; RV64-NEXT:    vmul.vv v10, v8, v10
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 112
+; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    lui s5, 65536
+; RV64-NEXT:    vand.vx v10, v8, s5
+; RV64-NEXT:    vmul.vv v10, v8, v10
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 112
+; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    lui s5, 131072
+; RV64-NEXT:    vand.vx v10, v8, s5
+; RV64-NEXT:    vmul.vv v10, v8, v10
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 3
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 112
+; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    lui s5, 262144
+; RV64-NEXT:    vand.vx v10, v8, s5
+; RV64-NEXT:    vmul.vv v10, v8, v10
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 3
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 112
+; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    ld s5, 96(sp) # 8-byte Folded Reload
+; RV64-NEXT:    vand.vx v10, v8, s5
+; RV64-NEXT:    vmul.vv v10, v8, v10
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 3
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 112
+; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    ld s5, 88(sp) # 8-byte Folded Reload
+; RV64-NEXT:    vand.vx v10, v8, s5
+; RV64-NEXT:    vmul.vv v10, v8, v10
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 4
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 112
+; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    ld s5, 80(sp) # 8-byte Folded Reload
+; RV64-NEXT:    vand.vx v10, v8, s5
+; RV64-NEXT:    vmul.vv v10, v8, v10
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 112
+; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    ld s5, 72(sp) # 8-byte Folded Reload
+; RV64-NEXT:    vand.vx v10, v8, s5
+; RV64-NEXT:    vmul.vv v10, v8, v10
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 112
+; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    ld s5, 64(sp) # 8-byte Folded Reload
+; RV64-NEXT:    vand.vx v10, v8, s5
+; RV64-NEXT:    vmul.vv v10, v8, v10
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 112
+; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    ld s5, 56(sp) # 8-byte Folded Reload
+; RV64-NEXT:    vand.vx v10, v8, s5
+; RV64-NEXT:    vmul.vv v10, v8, v10
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 112
+; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    ld s5, 48(sp) # 8-byte Folded Reload
+; RV64-NEXT:    vand.vx v10, v8, s5
+; RV64-NEXT:    vmul.vv v10, v8, v10
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 3
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 112
+; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    ld s5, 40(sp) # 8-byte Folded Reload
+; RV64-NEXT:    vand.vx v10, v8, s5
+; RV64-NEXT:    vmul.vv v10, v8, v10
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 112
+; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    ld s5, 32(sp) # 8-byte Folded Reload
+; RV64-NEXT:    vand.vx v10, v8, s5
+; RV64-NEXT:    vmul.vv v10, v8, v10
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 112
+; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    ld s5, 24(sp) # 8-byte Folded Reload
+; RV64-NEXT:    vand.vx v10, v8, s5
+; RV64-NEXT:    vmul.vv v10, v8, v10
+; RV64-NEXT:    csrr s5, vlenb
+; RV64-NEXT:    slli s5, s5, 1
+; RV64-NEXT:    mv a5, s5
+; RV64-NEXT:    slli s5, s5, 3
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    ld a5, 8(sp) # 8-byte Folded Reload
+; RV64-NEXT:    add s5, sp, s5
+; RV64-NEXT:    addi s5, s5, 112
+; RV64-NEXT:    vs2r.v v10, (s5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    ld s5, 16(sp) # 8-byte Folded Reload
+; RV64-NEXT:    vand.vx v10, v8, s5
+; RV64-NEXT:    vmul.vv v10, v8, v10
+; RV64-NEXT:    csrr s5, vlenb
+; RV64-NEXT:    slli s5, s5, 4
+; RV64-NEXT:    add s5, sp, s5
+; RV64-NEXT:    addi s5, s5, 112
+; RV64-NEXT:    vs2r.v v10, (s5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    vand.vx v10, v8, s6
+; RV64-NEXT:    vmul.vv v10, v8, v10
+; RV64-NEXT:    csrr s5, vlenb
+; RV64-NEXT:    slli s5, s5, 1
+; RV64-NEXT:    mv s6, s5
+; RV64-NEXT:    slli s5, s5, 1
+; RV64-NEXT:    add s6, s6, s5
+; RV64-NEXT:    slli s5, s5, 1
+; RV64-NEXT:    add s5, s5, s6
+; RV64-NEXT:    add s5, sp, s5
+; RV64-NEXT:    addi s5, s5, 112
+; RV64-NEXT:    vs2r.v v10, (s5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    vand.vx v10, v8, s7
+; RV64-NEXT:    vmul.vv v10, v8, v10
+; RV64-NEXT:    csrr s5, vlenb
+; RV64-NEXT:    slli s5, s5, 2
+; RV64-NEXT:    mv s6, s5
+; RV64-NEXT:    slli s5, s5, 1
+; RV64-NEXT:    add s5, s5, s6
+; RV64-NEXT:    add s5, sp, s5
+; RV64-NEXT:    addi s5, s5, 112
+; RV64-NEXT:    vs2r.v v10, (s5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    vand.vx v10, v8, s8
+; RV64-NEXT:    vmul.vv v10, v8, v10
+; RV64-NEXT:    csrr s5, vlenb
+; RV64-NEXT:    slli s5, s5, 1
+; RV64-NEXT:    mv s6, s5
+; RV64-NEXT:    slli s5, s5, 2
+; RV64-NEXT:    add s5, s5, s6
+; RV64-NEXT:    add s5, sp, s5
+; RV64-NEXT:    addi s5, s5, 112
+; RV64-NEXT:    vs2r.v v10, (s5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    vand.vx v10, v8, s9
+; RV64-NEXT:    vmul.vv v10, v8, v10
+; RV64-NEXT:    csrr s5, vlenb
+; RV64-NEXT:    slli s5, s5, 3
+; RV64-NEXT:    add s5, sp, s5
+; RV64-NEXT:    addi s5, s5, 112
+; RV64-NEXT:    vs2r.v v10, (s5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    vand.vx v10, v8, s10
+; RV64-NEXT:    vmul.vv v10, v8, v10
+; RV64-NEXT:    csrr s5, vlenb
+; RV64-NEXT:    slli s5, s5, 1
+; RV64-NEXT:    mv s6, s5
+; RV64-NEXT:    slli s5, s5, 1
+; RV64-NEXT:    add s5, s5, s6
+; RV64-NEXT:    add s5, sp, s5
+; RV64-NEXT:    addi s5, s5, 112
+; RV64-NEXT:    vs2r.v v10, (s5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    vand.vx v10, v8, s11
+; RV64-NEXT:    vmul.vv v10, v8, v10
+; RV64-NEXT:    csrr s5, vlenb
+; RV64-NEXT:    slli s5, s5, 2
+; RV64-NEXT:    add s5, sp, s5
+; RV64-NEXT:    addi s5, s5, 112
+; RV64-NEXT:    vs2r.v v10, (s5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    vand.vx v10, v8, ra
+; RV64-NEXT:    vmul.vv v10, v8, v10
+; RV64-NEXT:    csrr s5, vlenb
+; RV64-NEXT:    slli s5, s5, 1
+; RV64-NEXT:    add s5, sp, s5
+; RV64-NEXT:    addi s5, s5, 112
+; RV64-NEXT:    vs2r.v v10, (s5) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    vand.vx v10, v8, s3
+; RV64-NEXT:    vmul.vv v10, v8, v10
+; RV64-NEXT:    addi s3, sp, 112
+; RV64-NEXT:    vs2r.v v10, (s3) # vscale x 16-byte Folded Spill
+; RV64-NEXT:    vand.vx v10, v8, s1
+; RV64-NEXT:    vmul.vv v4, v8, v10
+; RV64-NEXT:    vand.vx v10, v8, t6
+; RV64-NEXT:    vmul.vv v6, v8, v10
+; RV64-NEXT:    vand.vx v10, v8, t5
+; RV64-NEXT:    vmul.vv v30, v8, v10
+; RV64-NEXT:    vand.vx v10, v8, t4
+; RV64-NEXT:    vmul.vv v28, v8, v10
+; RV64-NEXT:    vand.vx v10, v8, t3
+; RV64-NEXT:    vmul.vv v26, v8, v10
+; RV64-NEXT:    vand.vx v10, v8, t2
+; RV64-NEXT:    vmul.vv v24, v8, v10
+; RV64-NEXT:    vand.vx v10, v8, t1
+; RV64-NEXT:    vmul.vv v22, v8, v10
+; RV64-NEXT:    vand.vx v10, v8, t0
+; RV64-NEXT:    vmul.vv v20, v8, v10
+; RV64-NEXT:    vand.vx v10, v8, a4
+; RV64-NEXT:    vmul.vv v18, v8, v10
+; RV64-NEXT:    vand.vx v10, v8, a3
+; RV64-NEXT:    vmul.vv v16, v8, v10
+; RV64-NEXT:    vand.vx v10, v8, a2
+; RV64-NEXT:    vmul.vv v14, v8, v10
+; RV64-NEXT:    vand.vx v10, v8, a1
+; RV64-NEXT:    vmul.vv v12, v8, v10
+; RV64-NEXT:    vand.vx v10, v8, s0
+; RV64-NEXT:    vmul.vv v10, v8, v10
+; RV64-NEXT:    vand.vx v0, v8, a0
+; RV64-NEXT:    vmul.vv v8, v8, v0
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl2r.v v0, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 5
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v2, v0
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 5
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 6
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 112
+; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v2
+; RV64-NEXT:    addi a0, sp, 112
+; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
+; RV64-NEXT:    vxor.vv v2, v0, v2
+; RV64-NEXT:    vxor.vv v4, v2, v4
+; RV64-NEXT:    vxor.vv v6, v4, v6
+; RV64-NEXT:    vxor.vv v30, v6, v30
+; RV64-NEXT:    vxor.vv v28, v30, v28
+; RV64-NEXT:    vxor.vv v26, v28, v26
+; RV64-NEXT:    vxor.vv v24, v26, v24
+; RV64-NEXT:    vxor.vv v22, v24, v22
+; RV64-NEXT:    vxor.vv v20, v22, v20
+; RV64-NEXT:    vxor.vv v18, v20, v18
+; RV64-NEXT:    vxor.vv v16, v18, v16
+; RV64-NEXT:    vxor.vv v14, v16, v14
+; RV64-NEXT:    vxor.vv v12, v14, v12
+; RV64-NEXT:    vxor.vv v10, v12, v10
+; RV64-NEXT:    vxor.vv v8, v10, v8
+; RV64-NEXT:    li a0, 56
+; RV64-NEXT:    vsrl.vx v10, v8, a0
+; RV64-NEXT:    li a1, 40
+; RV64-NEXT:    vsrl.vx v12, v8, a1
+; RV64-NEXT:    vsrl.vi v14, v8, 24
+; RV64-NEXT:    vsrl.vi v16, v8, 8
+; RV64-NEXT:    vand.vx v12, v12, s4
+; RV64-NEXT:    vor.vv v10, v12, v10
+; RV64-NEXT:    vand.vx v12, v8, s2
+; RV64-NEXT:    lui a2, 4080
+; RV64-NEXT:    vand.vx v14, v14, a2
+; RV64-NEXT:    vand.vx v16, v16, s2
+; RV64-NEXT:    vor.vv v14, v16, v14
+; RV64-NEXT:    vand.vx v16, v8, a2
+; RV64-NEXT:    vsll.vi v12, v12, 8
+; RV64-NEXT:    vsll.vi v16, v16, 24
+; RV64-NEXT:    vor.vv v12, v16, v12
+; RV64-NEXT:    vsll.vx v16, v8, a0
+; RV64-NEXT:    vand.vx v8, v8, s4
+; RV64-NEXT:    vsll.vx v8, v8, a1
+; RV64-NEXT:    vor.vv v8, v16, v8
+; RV64-NEXT:    vor.vv v10, v14, v10
+; RV64-NEXT:    vor.vv v8, v8, v12
+; RV64-NEXT:    vor.vv v8, v8, v10
+; RV64-NEXT:    vsrl.vi v10, v8, 4
+; RV64-NEXT:    vand.vx v8, v8, a7
+; RV64-NEXT:    vand.vx v10, v10, a7
+; RV64-NEXT:    vsll.vi v8, v8, 4
+; RV64-NEXT:    vor.vv v8, v10, v8
+; RV64-NEXT:    vsrl.vi v10, v8, 2
+; RV64-NEXT:    vand.vx v8, v8, a6
+; RV64-NEXT:    vand.vx v10, v10, a6
+; RV64-NEXT:    vsll.vi v8, v8, 2
+; RV64-NEXT:    vor.vv v8, v10, v8
+; RV64-NEXT:    vsrl.vi v10, v8, 1
+; RV64-NEXT:    vand.vx v8, v8, a5
+; RV64-NEXT:    vand.vx v10, v10, a5
+; RV64-NEXT:    vadd.vv v8, v8, v8
+; RV64-NEXT:    vor.vv v8, v10, v8
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add sp, sp, a0
+; RV64-NEXT:    ld ra, 216(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s0, 208(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s1, 200(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s2, 192(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s3, 184(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s4, 176(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s5, 168(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s6, 160(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s7, 152(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s8, 144(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s9, 136(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s10, 128(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s11, 120(sp) # 8-byte Folded Reload
+; RV64-NEXT:    addi sp, sp, 224
+; RV64-NEXT:    ret
+  %a = call <4 x i64> @llvm.clmulr.v4i64(<4 x i64> %x, <4 x i64> %y)
+  ret <4 x i64> %a
+}
+
+define <8 x i64> @clmulr_v8i64(<8 x i64> %x, <8 x i64> %y) nounwind {
+; RV32-LABEL: clmulr_v8i64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -352
+; RV32-NEXT:    sw ra, 348(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s0, 344(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s1, 340(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s2, 336(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s3, 332(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s4, 328(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s5, 324(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s6, 320(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s7, 316(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s8, 312(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s9, 308(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s10, 304(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s11, 300(sp) # 4-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 6
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    sub sp, sp, a0
+; RV32-NEXT:    lui s11, 1044480
+; RV32-NEXT:    lui s0, 524288
+; RV32-NEXT:    li a0, 1
+; RV32-NEXT:    li ra, 2
+; RV32-NEXT:    li t4, 4
+; RV32-NEXT:    li t2, 8
+; RV32-NEXT:    li t6, 16
+; RV32-NEXT:    li t5, 32
+; RV32-NEXT:    li t3, 64
+; RV32-NEXT:    li t1, 128
+; RV32-NEXT:    li t0, 256
+; RV32-NEXT:    li a7, 512
+; RV32-NEXT:    li a6, 1024
+; RV32-NEXT:    lui a4, 1
+; RV32-NEXT:    lui a3, 2
+; RV32-NEXT:    lui a2, 4
+; RV32-NEXT:    lui a5, 8
+; RV32-NEXT:    lui s1, 16
+; RV32-NEXT:    lui a1, 32
+; RV32-NEXT:    lui s2, 64
+; RV32-NEXT:    lui s3, 128
+; RV32-NEXT:    lui s4, 256
+; RV32-NEXT:    lui s5, 512
+; RV32-NEXT:    lui s6, 1024
+; RV32-NEXT:    lui s7, 2048
+; RV32-NEXT:    lui s8, 4096
+; RV32-NEXT:    lui s9, 8192
+; RV32-NEXT:    lui s10, 16384
+; RV32-NEXT:    sw s11, 272(sp)
+; RV32-NEXT:    lui s11, 32768
+; RV32-NEXT:    sw zero, 276(sp)
+; RV32-NEXT:    sw s0, 264(sp)
+; RV32-NEXT:    sw zero, 268(sp)
+; RV32-NEXT:    sw zero, 256(sp)
+; RV32-NEXT:    sw a0, 260(sp)
+; RV32-NEXT:    sw zero, 248(sp)
+; RV32-NEXT:    sw ra, 252(sp)
+; RV32-NEXT:    lui ra, 65536
+; RV32-NEXT:    sw zero, 240(sp)
+; RV32-NEXT:    sw t4, 244(sp)
+; RV32-NEXT:    lui t4, 131072
+; RV32-NEXT:    sw zero, 232(sp)
+; RV32-NEXT:    sw t2, 236(sp)
+; RV32-NEXT:    lui t2, 262144
+; RV32-NEXT:    sw zero, 224(sp)
+; RV32-NEXT:    sw t6, 228(sp)
+; RV32-NEXT:    sw zero, 216(sp)
+; RV32-NEXT:    sw t5, 220(sp)
+; RV32-NEXT:    sw zero, 208(sp)
+; RV32-NEXT:    sw t3, 212(sp)
+; RV32-NEXT:    sw zero, 200(sp)
+; RV32-NEXT:    sw t1, 204(sp)
+; RV32-NEXT:    sw zero, 192(sp)
+; RV32-NEXT:    sw t0, 196(sp)
+; RV32-NEXT:    sw zero, 184(sp)
+; RV32-NEXT:    sw a7, 188(sp)
+; RV32-NEXT:    sw zero, 176(sp)
+; RV32-NEXT:    sw a6, 180(sp)
+; RV32-NEXT:    li t1, 1024
+; RV32-NEXT:    slli t6, a0, 11
+; RV32-NEXT:    sw zero, 168(sp)
+; RV32-NEXT:    sw t6, 172(sp)
+; RV32-NEXT:    sw zero, 160(sp)
+; RV32-NEXT:    sw a4, 164(sp)
+; RV32-NEXT:    sw zero, 152(sp)
+; RV32-NEXT:    sw a3, 156(sp)
+; RV32-NEXT:    lui t3, 2
+; RV32-NEXT:    sw zero, 144(sp)
+; RV32-NEXT:    sw a2, 148(sp)
+; RV32-NEXT:    lui t5, 4
+; RV32-NEXT:    sw zero, 136(sp)
+; RV32-NEXT:    sw a5, 140(sp)
+; RV32-NEXT:    lui a4, 8
+; RV32-NEXT:    sw zero, 128(sp)
+; RV32-NEXT:    sw s1, 132(sp)
+; RV32-NEXT:    sw zero, 120(sp)
+; RV32-NEXT:    sw a1, 124(sp)
+; RV32-NEXT:    sw zero, 112(sp)
+; RV32-NEXT:    sw s2, 116(sp)
+; RV32-NEXT:    sw zero, 104(sp)
+; RV32-NEXT:    sw s3, 108(sp)
+; RV32-NEXT:    sw zero, 96(sp)
+; RV32-NEXT:    sw s4, 100(sp)
+; RV32-NEXT:    sw zero, 88(sp)
+; RV32-NEXT:    sw s5, 92(sp)
+; RV32-NEXT:    sw zero, 80(sp)
+; RV32-NEXT:    sw s6, 84(sp)
+; RV32-NEXT:    sw zero, 72(sp)
+; RV32-NEXT:    sw s7, 76(sp)
+; RV32-NEXT:    sw zero, 64(sp)
+; RV32-NEXT:    sw s8, 68(sp)
+; RV32-NEXT:    sw zero, 56(sp)
+; RV32-NEXT:    sw s9, 60(sp)
+; RV32-NEXT:    sw zero, 48(sp)
+; RV32-NEXT:    sw s10, 52(sp)
+; RV32-NEXT:    sw zero, 40(sp)
+; RV32-NEXT:    sw s11, 44(sp)
+; RV32-NEXT:    sw zero, 32(sp)
+; RV32-NEXT:    sw ra, 36(sp)
+; RV32-NEXT:    sw zero, 24(sp)
+; RV32-NEXT:    sw t4, 28(sp)
+; RV32-NEXT:    sw zero, 16(sp)
+; RV32-NEXT:    sw t2, 20(sp)
+; RV32-NEXT:    sw zero, 8(sp)
+; RV32-NEXT:    sw s0, 12(sp)
+; RV32-NEXT:    lui a1, 61681
+; RV32-NEXT:    addi a1, a1, -241
+; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
+; RV32-NEXT:    vmv.v.x v28, a1
+; RV32-NEXT:    lui a1, 209715
+; RV32-NEXT:    addi a1, a1, 819
+; RV32-NEXT:    vmv.v.x v4, a1
+; RV32-NEXT:    addi a1, sp, 272
+; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
+; RV32-NEXT:    vlse64.v v0, (a1), zero
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 8
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v0, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    li t0, 56
+; RV32-NEXT:    vsrl.vi v20, v8, 24
+; RV32-NEXT:    vsrl.vx v12, v8, t0
+; RV32-NEXT:    li a6, 40
+; RV32-NEXT:    vsrl.vx v16, v8, a6
+; RV32-NEXT:    vsll.vx v24, v8, t0
+; RV32-NEXT:    addi a3, s1, -256
+; RV32-NEXT:    vand.vx v16, v16, a3
+; RV32-NEXT:    vor.vv v16, v16, v12
+; RV32-NEXT:    vand.vx v12, v8, a3
+; RV32-NEXT:    vsll.vx v12, v12, a6
+; RV32-NEXT:    vor.vv v12, v24, v12
+; RV32-NEXT:    vsrl.vi v24, v8, 8
+; RV32-NEXT:    lui a5, 4080
+; RV32-NEXT:    vand.vx v20, v20, a5
+; RV32-NEXT:    lui a7, 349525
+; RV32-NEXT:    addi a7, a7, 1365
+; RV32-NEXT:    vand.vv v24, v24, v0
+; RV32-NEXT:    vor.vv v20, v24, v20
+; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
+; RV32-NEXT:    vmv.v.x v24, a7
+; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
+; RV32-NEXT:    vor.vv v16, v20, v16
+; RV32-NEXT:    vand.vx v20, v8, a5
+; RV32-NEXT:    vsll.vi v20, v20, 24
+; RV32-NEXT:    vand.vv v8, v8, v0
+; RV32-NEXT:    vsll.vi v8, v8, 8
+; RV32-NEXT:    vor.vv v8, v20, v8
+; RV32-NEXT:    addi a7, sp, 264
+; RV32-NEXT:    vlse64.v v20, (a7), zero
+; RV32-NEXT:    vor.vv v8, v12, v8
+; RV32-NEXT:    addi a7, sp, 256
+; RV32-NEXT:    vlse64.v v12, (a7), zero
+; RV32-NEXT:    vor.vv v8, v8, v16
+; RV32-NEXT:    vsrl.vi v16, v8, 4
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v28, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vv v8, v8, v28
+; RV32-NEXT:    vand.vv v16, v16, v28
+; RV32-NEXT:    vsll.vi v8, v8, 4
+; RV32-NEXT:    vor.vv v8, v16, v8
+; RV32-NEXT:    vsrl.vi v16, v8, 2
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v4, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vv v8, v8, v4
+; RV32-NEXT:    vand.vv v16, v16, v4
+; RV32-NEXT:    vsll.vi v8, v8, 2
+; RV32-NEXT:    vor.vv v8, v16, v8
+; RV32-NEXT:    vsrl.vi v16, v8, 1
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v24, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vv v8, v8, v24
+; RV32-NEXT:    vand.vv v16, v16, v24
+; RV32-NEXT:    vadd.vv v8, v8, v8
+; RV32-NEXT:    vor.vv v8, v16, v8
+; RV32-NEXT:    addi a7, sp, 248
+; RV32-NEXT:    vlse64.v v16, (a7), zero
+; RV32-NEXT:    vand.vv v28, v8, v20
+; RV32-NEXT:    addi a7, sp, 240
+; RV32-NEXT:    addi a0, sp, 232
+; RV32-NEXT:    vlse64.v v20, (a7), zero
+; RV32-NEXT:    vlse64.v v24, (a0), zero
+; RV32-NEXT:    vand.vv v4, v8, v12
+; RV32-NEXT:    vand.vv v0, v8, v16
+; RV32-NEXT:    vand.vv v12, v8, v20
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vv v12, v8, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    addi a2, sp, 224
+; RV32-NEXT:    addi a7, sp, 216
+; RV32-NEXT:    addi a1, sp, 208
+; RV32-NEXT:    addi a0, sp, 200
+; RV32-NEXT:    vlse64.v v12, (a2), zero
+; RV32-NEXT:    vlse64.v v16, (a7), zero
+; RV32-NEXT:    vlse64.v v20, (a1), zero
+; RV32-NEXT:    vlse64.v v24, (a0), zero
+; RV32-NEXT:    vand.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vv v12, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vv v12, v8, v20
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vv v12, v8, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    addi a0, sp, 192
+; RV32-NEXT:    addi a1, sp, 184
+; RV32-NEXT:    addi a2, sp, 176
+; RV32-NEXT:    addi a7, sp, 168
+; RV32-NEXT:    vlse64.v v12, (a0), zero
+; RV32-NEXT:    vlse64.v v16, (a1), zero
+; RV32-NEXT:    vlse64.v v20, (a2), zero
+; RV32-NEXT:    vlse64.v v24, (a7), zero
+; RV32-NEXT:    vand.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vv v12, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vv v12, v8, v20
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vv v12, v8, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    addi a0, sp, 160
+; RV32-NEXT:    addi a1, sp, 152
+; RV32-NEXT:    addi a2, sp, 144
+; RV32-NEXT:    addi a7, sp, 136
+; RV32-NEXT:    vlse64.v v12, (a0), zero
+; RV32-NEXT:    vlse64.v v16, (a1), zero
+; RV32-NEXT:    vlse64.v v20, (a2), zero
+; RV32-NEXT:    vlse64.v v24, (a7), zero
+; RV32-NEXT:    vand.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vv v12, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vv v12, v8, v20
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 6
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vv v12, v8, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    addi a0, sp, 128
+; RV32-NEXT:    addi a1, sp, 120
+; RV32-NEXT:    addi a2, sp, 112
+; RV32-NEXT:    addi a7, sp, 104
+; RV32-NEXT:    vlse64.v v12, (a0), zero
+; RV32-NEXT:    vlse64.v v16, (a1), zero
+; RV32-NEXT:    vlse64.v v20, (a2), zero
+; RV32-NEXT:    vlse64.v v24, (a7), zero
+; RV32-NEXT:    vand.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vv v12, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vv v12, v8, v20
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vv v12, v8, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    addi a0, sp, 96
+; RV32-NEXT:    addi a1, sp, 88
+; RV32-NEXT:    addi a2, sp, 80
+; RV32-NEXT:    addi a7, sp, 72
+; RV32-NEXT:    vlse64.v v12, (a0), zero
+; RV32-NEXT:    vlse64.v v16, (a1), zero
+; RV32-NEXT:    vlse64.v v20, (a2), zero
+; RV32-NEXT:    vlse64.v v24, (a7), zero
+; RV32-NEXT:    vand.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vv v12, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vv v12, v8, v20
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vv v12, v8, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    addi a0, sp, 64
+; RV32-NEXT:    addi a1, sp, 56
+; RV32-NEXT:    addi a2, sp, 48
+; RV32-NEXT:    addi a7, sp, 40
+; RV32-NEXT:    vlse64.v v12, (a0), zero
+; RV32-NEXT:    vlse64.v v16, (a1), zero
+; RV32-NEXT:    vlse64.v v20, (a2), zero
+; RV32-NEXT:    vlse64.v v24, (a7), zero
+; RV32-NEXT:    vand.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vv v12, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vv v12, v8, v20
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vv v12, v8, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    addi a0, sp, 32
+; RV32-NEXT:    addi a1, sp, 24
+; RV32-NEXT:    addi a2, sp, 16
+; RV32-NEXT:    addi a7, sp, 8
+; RV32-NEXT:    vlse64.v v12, (a0), zero
+; RV32-NEXT:    vlse64.v v16, (a1), zero
+; RV32-NEXT:    vlse64.v v20, (a2), zero
+; RV32-NEXT:    vlse64.v v24, (a7), zero
+; RV32-NEXT:    vand.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vv v12, v8, v16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 7
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vv v12, v8, v20
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vv v12, v8, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vi v12, v8, 2
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vi v12, v8, 1
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vi v12, v8, 4
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vi v12, v8, 8
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    li a0, 16
+; RV32-NEXT:    vand.vx v12, v8, a0
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    li a0, 32
+; RV32-NEXT:    vand.vx v12, v8, a0
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    li a0, 64
+; RV32-NEXT:    vand.vx v12, v8, a0
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    li a0, 128
+; RV32-NEXT:    vand.vx v12, v8, a0
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    li a0, 256
+; RV32-NEXT:    vand.vx v12, v8, a0
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    li a0, 512
+; RV32-NEXT:    vand.vx v12, v8, a0
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vx v12, v8, t1
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vx v12, v8, t6
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    lui a0, 1
+; RV32-NEXT:    vand.vx v12, v8, a0
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vx v12, v8, t3
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vx v12, v8, t5
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 6
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vx v12, v8, a4
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vx v12, v8, s1
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    lui a0, 32
+; RV32-NEXT:    vand.vx v12, v8, a0
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vx v12, v8, s2
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vx v12, v8, s3
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vx v12, v8, s4
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vx v12, v8, s5
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vx v12, v8, s6
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vx v12, v8, s7
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vx v12, v8, s8
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vx v12, v8, s9
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vx v12, v8, s10
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vx v12, v8, s11
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vx v12, v8, ra
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vx v12, v8, t4
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vand.vx v12, v8, t2
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    addi a0, sp, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    vmul.vv v16, v8, v28
+; RV32-NEXT:    vmul.vv v20, v8, v4
+; RV32-NEXT:    vmul.vv v24, v8, v0
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v28, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v28, v8, v28
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v4, v8, v4
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v0, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v0, v8, v0
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 6
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 6
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 7
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v12, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vmul.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vs4r.v v8, (a0) # vscale x 32-byte Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v8, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vi v8, v8, 0
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 6
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    addi a0, sp, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    vxor.vv v8, v8, v16
+; RV32-NEXT:    vxor.vv v8, v8, v20
+; RV32-NEXT:    vxor.vv v8, v8, v24
+; RV32-NEXT:    vxor.vv v8, v8, v28
+; RV32-NEXT:    vxor.vv v8, v8, v4
+; RV32-NEXT:    vxor.vv v8, v8, v0
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 6
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vxor.vv v8, v8, v12
+; RV32-NEXT:    vsrl.vx v12, v8, t0
+; RV32-NEXT:    vsrl.vx v16, v8, a6
+; RV32-NEXT:    vsrl.vi v20, v8, 24
+; RV32-NEXT:    vand.vx v16, v16, a3
+; RV32-NEXT:    vor.vv v12, v16, v12
+; RV32-NEXT:    vsrl.vi v16, v8, 8
+; RV32-NEXT:    vand.vx v20, v20, a5
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 8
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vand.vv v16, v16, v24
+; RV32-NEXT:    vor.vv v16, v16, v20
+; RV32-NEXT:    vand.vx v20, v8, a5
+; RV32-NEXT:    vand.vv v24, v8, v24
+; RV32-NEXT:    vsll.vi v24, v24, 8
+; RV32-NEXT:    vsll.vi v20, v20, 24
+; RV32-NEXT:    vor.vv v20, v20, v24
+; RV32-NEXT:    vsll.vx v24, v8, t0
+; RV32-NEXT:    vand.vx v8, v8, a3
+; RV32-NEXT:    vsll.vx v8, v8, a6
+; RV32-NEXT:    vor.vv v8, v24, v8
+; RV32-NEXT:    vor.vv v12, v16, v12
+; RV32-NEXT:    vor.vv v8, v8, v20
+; RV32-NEXT:    vor.vv v8, v8, v12
+; RV32-NEXT:    vsrl.vi v12, v8, 4
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vand.vv v8, v8, v16
+; RV32-NEXT:    vand.vv v12, v12, v16
+; RV32-NEXT:    vsll.vi v8, v8, 4
+; RV32-NEXT:    vor.vv v8, v12, v8
+; RV32-NEXT:    vsrl.vi v12, v8, 2
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vand.vv v8, v8, v16
+; RV32-NEXT:    vand.vv v12, v12, v16
+; RV32-NEXT:    vsll.vi v8, v8, 2
+; RV32-NEXT:    vor.vv v8, v12, v8
+; RV32-NEXT:    vsrl.vi v12, v8, 1
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 288
+; RV32-NEXT:    vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
+; RV32-NEXT:    vand.vv v8, v8, v16
+; RV32-NEXT:    vand.vv v12, v12, v16
+; RV32-NEXT:    vadd.vv v8, v8, v8
+; RV32-NEXT:    vor.vv v8, v12, v8
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    slli a0, a0, 6
+; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add sp, sp, a0
+; RV32-NEXT:    lw ra, 348(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s0, 344(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s1, 340(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s2, 336(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s3, 332(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s4, 328(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s5, 324(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s6, 320(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s7, 316(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s8, 312(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s9, 308(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s10, 304(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s11, 300(sp) # 4-byte Folded Reload
+; RV32-NEXT:    addi sp, sp, 352
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: clmulr_v8i64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    addi sp, sp, -240
+; RV64-NEXT:    sd ra, 232(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s0, 224(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s1, 216(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s2, 208(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s3, 200(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s4, 192(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s5, 184(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s6, 176(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s7, 168(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s8, 160(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s9, 152(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s10, 144(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s11, 136(sp) # 8-byte Folded Spill
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    sub sp, sp, a0
+; RV64-NEXT:    li a6, 56
+; RV64-NEXT:    li t0, 40
+; RV64-NEXT:    lui t1, 16
+; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
+; RV64-NEXT:    vsrl.vi v20, v8, 24
+; RV64-NEXT:    lui a7, 4080
+; RV64-NEXT:    vsrl.vi v12, v8, 8
+; RV64-NEXT:    li s0, 255
+; RV64-NEXT:    lui a3, 61681
+; RV64-NEXT:    lui a4, 209715
+; RV64-NEXT:    lui a5, 349525
+; RV64-NEXT:    li a2, 16
+; RV64-NEXT:    li a1, 32
+; RV64-NEXT:    li a0, 64
+; RV64-NEXT:    li s9, 1
+; RV64-NEXT:    vsrl.vx v16, v8, a6
+; RV64-NEXT:    vsrl.vx v28, v8, t0
+; RV64-NEXT:    addi s4, t1, -256
+; RV64-NEXT:    vand.vx v24, v20, a7
+; RV64-NEXT:    slli s0, s0, 24
+; RV64-NEXT:    vand.vx v4, v8, a7
+; RV64-NEXT:    vsll.vx v20, v8, a6
+; RV64-NEXT:    addi a7, a3, -241
+; RV64-NEXT:    addi a6, a4, 819
+; RV64-NEXT:    addi a5, a5, 1365
+; RV64-NEXT:    slli a3, s9, 11
+; RV64-NEXT:    sd a3, 112(sp) # 8-byte Folded Spill
+; RV64-NEXT:    slli a3, s9, 31
+; RV64-NEXT:    sd a3, 104(sp) # 8-byte Folded Spill
+; RV64-NEXT:    slli a3, s9, 32
+; RV64-NEXT:    sd a3, 96(sp) # 8-byte Folded Spill
+; RV64-NEXT:    slli a3, s9, 33
+; RV64-NEXT:    sd a3, 88(sp) # 8-byte Folded Spill
+; RV64-NEXT:    slli a3, s9, 34
+; RV64-NEXT:    sd a3, 80(sp) # 8-byte Folded Spill
+; RV64-NEXT:    slli a3, s9, 35
+; RV64-NEXT:    sd a3, 72(sp) # 8-byte Folded Spill
+; RV64-NEXT:    slli a3, s9, 36
+; RV64-NEXT:    sd a3, 64(sp) # 8-byte Folded Spill
+; RV64-NEXT:    slli a3, s9, 37
+; RV64-NEXT:    sd a3, 56(sp) # 8-byte Folded Spill
+; RV64-NEXT:    slli a3, s9, 38
+; RV64-NEXT:    sd a3, 48(sp) # 8-byte Folded Spill
+; RV64-NEXT:    slli a3, s9, 39
+; RV64-NEXT:    sd a3, 40(sp) # 8-byte Folded Spill
+; RV64-NEXT:    slli a3, s9, 40
+; RV64-NEXT:    sd a3, 32(sp) # 8-byte Folded Spill
+; RV64-NEXT:    slli a3, s9, 41
+; RV64-NEXT:    sd a3, 24(sp) # 8-byte Folded Spill
+; RV64-NEXT:    slli s6, s9, 42
+; RV64-NEXT:    slli s7, s9, 43
+; RV64-NEXT:    slli a3, a7, 32
+; RV64-NEXT:    add a7, a7, a3
+; RV64-NEXT:    slli a3, a6, 32
+; RV64-NEXT:    add a6, a6, a3
+; RV64-NEXT:    slli a3, a5, 32
+; RV64-NEXT:    add a5, a5, a3
+; RV64-NEXT:    slli s8, s9, 44
+; RV64-NEXT:    vand.vx v28, v28, s4
+; RV64-NEXT:    vand.vx v12, v12, s0
+; RV64-NEXT:    vsll.vi v4, v4, 24
+; RV64-NEXT:    vand.vx v0, v8, s0
+; RV64-NEXT:    vand.vx v8, v8, s4
+; RV64-NEXT:    vor.vv v16, v28, v16
+; RV64-NEXT:    vor.vv v12, v12, v24
+; RV64-NEXT:    vsll.vi v24, v0, 8
+; RV64-NEXT:    vsll.vx v8, v8, t0
+; RV64-NEXT:    vor.vv v12, v12, v16
+; RV64-NEXT:    vor.vv v16, v4, v24
+; RV64-NEXT:    vor.vv v8, v20, v8
+; RV64-NEXT:    vor.vv v8, v8, v16
+; RV64-NEXT:    vor.vv v8, v8, v12
+; RV64-NEXT:    vsrl.vi v12, v8, 4
+; RV64-NEXT:    vand.vx v8, v8, a7
+; RV64-NEXT:    vand.vx v12, v12, a7
+; RV64-NEXT:    vsll.vi v8, v8, 4
+; RV64-NEXT:    vor.vv v8, v12, v8
+; RV64-NEXT:    vsrl.vi v12, v8, 2
+; RV64-NEXT:    vand.vx v8, v8, a6
+; RV64-NEXT:    vand.vx v12, v12, a6
+; RV64-NEXT:    vsll.vi v8, v8, 2
+; RV64-NEXT:    vor.vv v8, v12, v8
+; RV64-NEXT:    vsrl.vi v12, v8, 1
+; RV64-NEXT:    vand.vx v8, v8, a5
+; RV64-NEXT:    vand.vx v12, v12, a5
+; RV64-NEXT:    vadd.vv v8, v8, v8
+; RV64-NEXT:    vor.vv v8, v12, v8
+; RV64-NEXT:    vand.vx v12, v8, a2
+; RV64-NEXT:    slli s10, s9, 45
+; RV64-NEXT:    vand.vx v16, v8, a1
+; RV64-NEXT:    slli s11, s9, 46
+; RV64-NEXT:    vand.vx v20, v8, a0
+; RV64-NEXT:    slli ra, s9, 47
+; RV64-NEXT:    slli s3, s9, 48
+; RV64-NEXT:    slli s2, s9, 49
+; RV64-NEXT:    slli s1, s9, 50
+; RV64-NEXT:    slli t6, s9, 51
+; RV64-NEXT:    slli t5, s9, 52
+; RV64-NEXT:    slli t4, s9, 53
+; RV64-NEXT:    slli t3, s9, 54
+; RV64-NEXT:    slli t2, s9, 55
+; RV64-NEXT:    slli t1, s9, 56
+; RV64-NEXT:    slli t0, s9, 57
+; RV64-NEXT:    slli a4, s9, 58
+; RV64-NEXT:    slli a3, s9, 59
+; RV64-NEXT:    slli a2, s9, 60
+; RV64-NEXT:    slli a1, s9, 61
+; RV64-NEXT:    slli s9, s9, 62
+; RV64-NEXT:    li a0, -1
+; RV64-NEXT:    slli a0, a0, 63
+; RV64-NEXT:    vand.vi v24, v8, 2
+; RV64-NEXT:    vand.vi v28, v8, 1
+; RV64-NEXT:    vand.vi v4, v8, 4
+; RV64-NEXT:    vand.vi v0, v8, 8
+; RV64-NEXT:    vmul.vv v24, v8, v24
+; RV64-NEXT:    sd a5, 8(sp) # 8-byte Folded Spill
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 3
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 128
+; RV64-NEXT:    vs4r.v v24, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    vmul.vv v24, v8, v28
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 5
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 128
+; RV64-NEXT:    vs4r.v v24, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    vmul.vv v24, v8, v4
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 128
+; RV64-NEXT:    vs4r.v v24, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    vmul.vv v24, v8, v0
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 3
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 128
+; RV64-NEXT:    vs4r.v v24, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 128
+; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    vmul.vv v12, v8, v16
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 4
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 128
+; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    vmul.vv v12, v8, v20
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 3
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 128
+; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    li s5, 128
+; RV64-NEXT:    vand.vx v12, v8, s5
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 3
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 3
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 128
+; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    li s5, 256
+; RV64-NEXT:    vand.vx v12, v8, s5
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 4
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 128
+; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    li s5, 512
+; RV64-NEXT:    vand.vx v12, v8, s5
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 6
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 128
+; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    li s5, 1024
+; RV64-NEXT:    vand.vx v12, v8, s5
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 128
+; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    ld s5, 112(sp) # 8-byte Folded Reload
+; RV64-NEXT:    vand.vx v12, v8, s5
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 3
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 128
+; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    lui s5, 1
+; RV64-NEXT:    vand.vx v12, v8, s5
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 128
+; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    lui s5, 2
+; RV64-NEXT:    vand.vx v12, v8, s5
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 4
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 128
+; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    lui s5, 4
+; RV64-NEXT:    vand.vx v12, v8, s5
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 128
+; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    lui s5, 8
+; RV64-NEXT:    vand.vx v12, v8, s5
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 3
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 128
+; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    lui s5, 16
+; RV64-NEXT:    vand.vx v12, v8, s5
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 3
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 128
+; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    lui s5, 32
+; RV64-NEXT:    vand.vx v12, v8, s5
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 5
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 128
+; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    lui s5, 64
+; RV64-NEXT:    vand.vx v12, v8, s5
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 3
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 128
+; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    lui s5, 128
+; RV64-NEXT:    vand.vx v12, v8, s5
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 3
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 3
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 128
+; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    lui s5, 256
+; RV64-NEXT:    vand.vx v12, v8, s5
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 3
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 128
+; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    lui s5, 512
+; RV64-NEXT:    vand.vx v12, v8, s5
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 4
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 3
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 128
+; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    lui s5, 1024
+; RV64-NEXT:    vand.vx v12, v8, s5
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 4
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 128
+; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    lui s5, 2048
+; RV64-NEXT:    vand.vx v12, v8, s5
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 3
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 4
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 128
+; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    lui s5, 4096
+; RV64-NEXT:    vand.vx v12, v8, s5
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 5
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 128
+; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    lui s5, 8192
+; RV64-NEXT:    vand.vx v12, v8, s5
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 7
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 128
+; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    lui s5, 16384
+; RV64-NEXT:    vand.vx v12, v8, s5
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 128
+; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    lui s5, 32768
+; RV64-NEXT:    vand.vx v12, v8, s5
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 3
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 128
+; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    lui s5, 65536
+; RV64-NEXT:    vand.vx v12, v8, s5
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 128
+; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    lui s5, 131072
+; RV64-NEXT:    vand.vx v12, v8, s5
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 4
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 128
+; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    lui s5, 262144
+; RV64-NEXT:    vand.vx v12, v8, s5
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 128
+; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    ld s5, 104(sp) # 8-byte Folded Reload
+; RV64-NEXT:    vand.vx v12, v8, s5
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 3
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 128
+; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    ld s5, 96(sp) # 8-byte Folded Reload
+; RV64-NEXT:    vand.vx v12, v8, s5
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 3
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 128
+; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    ld s5, 88(sp) # 8-byte Folded Reload
+; RV64-NEXT:    vand.vx v12, v8, s5
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 5
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 128
+; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    ld s5, 80(sp) # 8-byte Folded Reload
+; RV64-NEXT:    vand.vx v12, v8, s5
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 128
+; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    ld s5, 72(sp) # 8-byte Folded Reload
+; RV64-NEXT:    vand.vx v12, v8, s5
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 3
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 128
+; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    ld s5, 64(sp) # 8-byte Folded Reload
+; RV64-NEXT:    vand.vx v12, v8, s5
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 128
+; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    ld s5, 56(sp) # 8-byte Folded Reload
+; RV64-NEXT:    vand.vx v12, v8, s5
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 4
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 128
+; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    ld s5, 48(sp) # 8-byte Folded Reload
+; RV64-NEXT:    vand.vx v12, v8, s5
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 2
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 1
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    slli a5, a5, 3
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 128
+; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    ld s5, 40(sp) # 8-byte Folded Reload
+; RV64-NEXT:    vand.vx v12, v8, s5
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 3
+; RV64-NEXT:    mv s5, a5
+; RV64-NEXT:    slli a5, a5, 3
+; RV64-NEXT:    add a5, a5, s5
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 128
+; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    ld s5, 32(sp) # 8-byte Folded Reload
+; RV64-NEXT:    vand.vx v12, v8, s5
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr s5, vlenb
+; RV64-NEXT:    slli s5, s5, 2
+; RV64-NEXT:    mv a5, s5
+; RV64-NEXT:    slli s5, s5, 4
+; RV64-NEXT:    add s5, s5, a5
+; RV64-NEXT:    ld a5, 8(sp) # 8-byte Folded Reload
+; RV64-NEXT:    add s5, sp, s5
+; RV64-NEXT:    addi s5, s5, 128
+; RV64-NEXT:    vs4r.v v12, (s5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    ld s5, 24(sp) # 8-byte Folded Reload
+; RV64-NEXT:    vand.vx v12, v8, s5
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr s5, vlenb
+; RV64-NEXT:    slli s5, s5, 6
+; RV64-NEXT:    add s5, sp, s5
+; RV64-NEXT:    addi s5, s5, 128
+; RV64-NEXT:    vs4r.v v12, (s5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    vand.vx v12, v8, s6
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr s5, vlenb
+; RV64-NEXT:    slli s5, s5, 2
+; RV64-NEXT:    mv s6, s5
+; RV64-NEXT:    slli s5, s5, 1
+; RV64-NEXT:    add s6, s6, s5
+; RV64-NEXT:    slli s5, s5, 1
+; RV64-NEXT:    add s6, s6, s5
+; RV64-NEXT:    slli s5, s5, 1
+; RV64-NEXT:    add s5, s5, s6
+; RV64-NEXT:    add s5, sp, s5
+; RV64-NEXT:    addi s5, s5, 128
+; RV64-NEXT:    vs4r.v v12, (s5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    vand.vx v12, v8, s7
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr s5, vlenb
+; RV64-NEXT:    slli s5, s5, 3
+; RV64-NEXT:    mv s6, s5
+; RV64-NEXT:    slli s5, s5, 1
+; RV64-NEXT:    add s6, s6, s5
+; RV64-NEXT:    slli s5, s5, 1
+; RV64-NEXT:    add s5, s5, s6
+; RV64-NEXT:    add s5, sp, s5
+; RV64-NEXT:    addi s5, s5, 128
+; RV64-NEXT:    vs4r.v v12, (s5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    vand.vx v12, v8, s8
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr s5, vlenb
+; RV64-NEXT:    slli s5, s5, 2
+; RV64-NEXT:    mv s6, s5
+; RV64-NEXT:    slli s5, s5, 2
+; RV64-NEXT:    add s6, s6, s5
+; RV64-NEXT:    slli s5, s5, 1
+; RV64-NEXT:    add s5, s5, s6
+; RV64-NEXT:    add s5, sp, s5
+; RV64-NEXT:    addi s5, s5, 128
+; RV64-NEXT:    vs4r.v v12, (s5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    vand.vx v12, v8, s10
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr s5, vlenb
+; RV64-NEXT:    slli s5, s5, 4
+; RV64-NEXT:    mv s6, s5
+; RV64-NEXT:    slli s5, s5, 1
+; RV64-NEXT:    add s5, s5, s6
+; RV64-NEXT:    add s5, sp, s5
+; RV64-NEXT:    addi s5, s5, 128
+; RV64-NEXT:    vs4r.v v12, (s5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    vand.vx v12, v8, s11
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr s5, vlenb
+; RV64-NEXT:    slli s5, s5, 2
+; RV64-NEXT:    mv s6, s5
+; RV64-NEXT:    slli s5, s5, 1
+; RV64-NEXT:    add s6, s6, s5
+; RV64-NEXT:    slli s5, s5, 2
+; RV64-NEXT:    add s5, s5, s6
+; RV64-NEXT:    add s5, sp, s5
+; RV64-NEXT:    addi s5, s5, 128
+; RV64-NEXT:    vs4r.v v12, (s5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    vand.vx v12, v8, ra
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr s5, vlenb
+; RV64-NEXT:    slli s5, s5, 3
+; RV64-NEXT:    mv s6, s5
+; RV64-NEXT:    slli s5, s5, 2
+; RV64-NEXT:    add s5, s5, s6
+; RV64-NEXT:    add s5, sp, s5
+; RV64-NEXT:    addi s5, s5, 128
+; RV64-NEXT:    vs4r.v v12, (s5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    vand.vx v12, v8, s3
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr s3, vlenb
+; RV64-NEXT:    slli s3, s3, 2
+; RV64-NEXT:    mv s5, s3
+; RV64-NEXT:    slli s3, s3, 3
+; RV64-NEXT:    add s3, s3, s5
+; RV64-NEXT:    add s3, sp, s3
+; RV64-NEXT:    addi s3, s3, 128
+; RV64-NEXT:    vs4r.v v12, (s3) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    vand.vx v12, v8, s2
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr s2, vlenb
+; RV64-NEXT:    slli s2, s2, 5
+; RV64-NEXT:    add s2, sp, s2
+; RV64-NEXT:    addi s2, s2, 128
+; RV64-NEXT:    vs4r.v v12, (s2) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    vand.vx v12, v8, s1
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr s1, vlenb
+; RV64-NEXT:    slli s1, s1, 2
+; RV64-NEXT:    mv s2, s1
+; RV64-NEXT:    slli s1, s1, 1
+; RV64-NEXT:    add s2, s2, s1
+; RV64-NEXT:    slli s1, s1, 1
+; RV64-NEXT:    add s1, s1, s2
+; RV64-NEXT:    add s1, sp, s1
+; RV64-NEXT:    addi s1, s1, 128
+; RV64-NEXT:    vs4r.v v12, (s1) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    vand.vx v12, v8, t6
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr t6, vlenb
+; RV64-NEXT:    slli t6, t6, 3
+; RV64-NEXT:    mv s1, t6
+; RV64-NEXT:    slli t6, t6, 1
+; RV64-NEXT:    add t6, t6, s1
+; RV64-NEXT:    add t6, sp, t6
+; RV64-NEXT:    addi t6, t6, 128
+; RV64-NEXT:    vs4r.v v12, (t6) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    vand.vx v12, v8, t5
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr t5, vlenb
+; RV64-NEXT:    slli t5, t5, 2
+; RV64-NEXT:    mv t6, t5
+; RV64-NEXT:    slli t5, t5, 2
+; RV64-NEXT:    add t5, t5, t6
+; RV64-NEXT:    add t5, sp, t5
+; RV64-NEXT:    addi t5, t5, 128
+; RV64-NEXT:    vs4r.v v12, (t5) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    vand.vx v12, v8, t4
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr t4, vlenb
+; RV64-NEXT:    slli t4, t4, 4
+; RV64-NEXT:    add t4, sp, t4
+; RV64-NEXT:    addi t4, t4, 128
+; RV64-NEXT:    vs4r.v v12, (t4) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    vand.vx v12, v8, t3
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr t3, vlenb
+; RV64-NEXT:    slli t3, t3, 2
+; RV64-NEXT:    mv t4, t3
+; RV64-NEXT:    slli t3, t3, 1
+; RV64-NEXT:    add t3, t3, t4
+; RV64-NEXT:    add t3, sp, t3
+; RV64-NEXT:    addi t3, t3, 128
+; RV64-NEXT:    vs4r.v v12, (t3) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    vand.vx v12, v8, t2
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr t2, vlenb
+; RV64-NEXT:    slli t2, t2, 3
+; RV64-NEXT:    add t2, sp, t2
+; RV64-NEXT:    addi t2, t2, 128
+; RV64-NEXT:    vs4r.v v12, (t2) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    vand.vx v12, v8, t1
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    csrr t1, vlenb
+; RV64-NEXT:    slli t1, t1, 2
+; RV64-NEXT:    add t1, sp, t1
+; RV64-NEXT:    addi t1, t1, 128
+; RV64-NEXT:    vs4r.v v12, (t1) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    vand.vx v12, v8, t0
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    addi t0, sp, 128
+; RV64-NEXT:    vs4r.v v12, (t0) # vscale x 32-byte Folded Spill
+; RV64-NEXT:    vand.vx v12, v8, a4
+; RV64-NEXT:    vmul.vv v28, v8, v12
+; RV64-NEXT:    vand.vx v12, v8, a3
+; RV64-NEXT:    vmul.vv v24, v8, v12
+; RV64-NEXT:    vand.vx v12, v8, a2
+; RV64-NEXT:    vmul.vv v20, v8, v12
+; RV64-NEXT:    vand.vx v12, v8, a1
+; RV64-NEXT:    vmul.vv v16, v8, v12
+; RV64-NEXT:    vand.vx v12, v8, s9
+; RV64-NEXT:    vmul.vv v12, v8, v12
+; RV64-NEXT:    vand.vx v0, v8, a0
+; RV64-NEXT:    vmul.vv v8, v8, v0
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl4r.v v0, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 5
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v4, v0
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 6
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 5
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 5
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 7
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 5
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 6
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v0, v0, v4
+; RV64-NEXT:    addi a0, sp, 128
+; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; RV64-NEXT:    vxor.vv v4, v0, v4
+; RV64-NEXT:    vxor.vv v28, v4, v28
+; RV64-NEXT:    vxor.vv v24, v28, v24
+; RV64-NEXT:    vxor.vv v20, v24, v20
+; RV64-NEXT:    vxor.vv v16, v20, v16
+; RV64-NEXT:    vxor.vv v12, v16, v12
+; RV64-NEXT:    vxor.vv v8, v12, v8
+; RV64-NEXT:    li a0, 56
+; RV64-NEXT:    vsrl.vx v12, v8, a0
+; RV64-NEXT:    li a1, 40
+; RV64-NEXT:    vsrl.vx v16, v8, a1
+; RV64-NEXT:    vsrl.vi v20, v8, 24
+; RV64-NEXT:    vsrl.vi v24, v8, 8
+; RV64-NEXT:    vand.vx v16, v16, s4
+; RV64-NEXT:    vor.vv v12, v16, v12
+; RV64-NEXT:    vand.vx v16, v8, s0
+; RV64-NEXT:    lui a2, 4080
+; RV64-NEXT:    vand.vx v20, v20, a2
+; RV64-NEXT:    vand.vx v24, v24, s0
+; RV64-NEXT:    vor.vv v20, v24, v20
+; RV64-NEXT:    vand.vx v24, v8, a2
+; RV64-NEXT:    vsll.vi v16, v16, 8
+; RV64-NEXT:    vsll.vi v24, v24, 24
+; RV64-NEXT:    vor.vv v16, v24, v16
+; RV64-NEXT:    vsll.vx v24, v8, a0
+; RV64-NEXT:    vand.vx v8, v8, s4
+; RV64-NEXT:    vsll.vx v8, v8, a1
+; RV64-NEXT:    vor.vv v8, v24, v8
+; RV64-NEXT:    vor.vv v12, v20, v12
+; RV64-NEXT:    vor.vv v8, v8, v16
+; RV64-NEXT:    vor.vv v8, v8, v12
+; RV64-NEXT:    vsrl.vi v12, v8, 4
+; RV64-NEXT:    vand.vx v8, v8, a7
+; RV64-NEXT:    vand.vx v12, v12, a7
+; RV64-NEXT:    vsll.vi v8, v8, 4
+; RV64-NEXT:    vor.vv v8, v12, v8
+; RV64-NEXT:    vsrl.vi v12, v8, 2
+; RV64-NEXT:    vand.vx v8, v8, a6
+; RV64-NEXT:    vand.vx v12, v12, a6
+; RV64-NEXT:    vsll.vi v8, v8, 2
+; RV64-NEXT:    vor.vv v8, v12, v8
+; RV64-NEXT:    vsrl.vi v12, v8, 1
+; RV64-NEXT:    vand.vx v8, v8, a5
+; RV64-NEXT:    vand.vx v12, v12, a5
+; RV64-NEXT:    vadd.vv v8, v8, v8
+; RV64-NEXT:    vor.vv v8, v12, v8
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a1, a1, a0
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    add sp, sp, a0
+; RV64-NEXT:    ld ra, 232(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s0, 224(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s1, 216(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s2, 208(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s3, 200(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s4, 192(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s5, 184(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s6, 176(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s7, 168(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s8, 160(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s9, 152(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s10, 144(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s11, 136(sp) # 8-byte Folded Reload
+; RV64-NEXT:    addi sp, sp, 240
+; RV64-NEXT:    ret
+  %a = call <8 x i64> @llvm.clmulr.v8i64(<8 x i64> %x, <8 x i64> %y)
+  ret <8 x i64> %a
+}

>From 0e5e1c48aedac39dfef9303294cc05adf7fd842c Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Wed, 19 Nov 2025 22:11:16 +0000
Subject: [PATCH 2/5] [ISel] Updates to const-fold tests

---
 llvm/test/CodeGen/RISCV/clmul.ll | 32 ++++++++++----------------------
 1 file changed, 10 insertions(+), 22 deletions(-)

diff --git a/llvm/test/CodeGen/RISCV/clmul.ll b/llvm/test/CodeGen/RISCV/clmul.ll
index 1e3acd8ccce74..f997e56d91178 100644
--- a/llvm/test/CodeGen/RISCV/clmul.ll
+++ b/llvm/test/CodeGen/RISCV/clmul.ll
@@ -3233,17 +3233,11 @@ define i4 @clmul_constfold_i4() nounwind {
 }
 
 define i16 @clmul_constfold_i16() nounwind {
-; RV32IM-LABEL: clmul_constfold_i16:
-; RV32IM:       # %bb.0:
-; RV32IM-NEXT:    lui a0, 699051
-; RV32IM-NEXT:    addi a0, a0, -1366
-; RV32IM-NEXT:    ret
-;
-; RV64IM-LABEL: clmul_constfold_i16:
-; RV64IM:       # %bb.0:
-; RV64IM-NEXT:    lui a0, %hi(.LCPI6_0)
-; RV64IM-NEXT:    ld a0, %lo(.LCPI6_0)(a0)
-; RV64IM-NEXT:    ret
+; CHECK-LABEL: clmul_constfold_i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, 11
+; CHECK-NEXT:    addi a0, a0, -1366
+; CHECK-NEXT:    ret
   %res = call i16 @llvm.clmul.i16(i16 -2, i16 -1)
   ret i16 %res
 }
@@ -7566,17 +7560,11 @@ define i4 @clmulr_constfold_i4() nounwind {
 }
 
 define i16 @clmulr_constfold_i16() nounwind {
-; RV32IM-LABEL: clmulr_constfold_i16:
-; RV32IM:       # %bb.0:
-; RV32IM-NEXT:    lui a0, 699051
-; RV32IM-NEXT:    addi a0, a0, -1366
-; RV32IM-NEXT:    ret
-;
-; RV64IM-LABEL: clmulr_constfold_i16:
-; RV64IM:       # %bb.0:
-; RV64IM-NEXT:    lui a0, %hi(.LCPI13_0)
-; RV64IM-NEXT:    ld a0, %lo(.LCPI13_0)(a0)
-; RV64IM-NEXT:    ret
+; CHECK-LABEL: clmulr_constfold_i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, 11
+; CHECK-NEXT:    addi a0, a0, -1365
+; CHECK-NEXT:    ret
   %res = call i16 @llvm.clmulr.i16(i16 -2, i16 -1)
   ret i16 %res
 }

>From 244a3d842ab8ed7b9905cf83eff6aceae5c46d5a Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Thu, 20 Nov 2025 09:15:44 +0000
Subject: [PATCH 3/5] [ISel] Strip llvm.clmulr

---
 llvm/docs/LangRef.rst                         |    48 -
 llvm/include/llvm/IR/Intrinsics.td            |     2 -
 .../SelectionDAG/SelectionDAGBuilder.cpp      |    18 +-
 .../CodeGen/SelectionDAG/TargetLowering.cpp   |    11 +-
 llvm/test/CodeGen/RISCV/clmul.ll              |  4327 ----
 llvm/test/CodeGen/RISCV/rvv/clmul-sdnode.ll   | 18639 ----------------
 .../CodeGen/RISCV/rvv/fixed-vectors-clmul.ll  | 14737 ------------
 7 files changed, 7 insertions(+), 37775 deletions(-)

diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index a33e2bdceafb8..4fdca9c1a4dbc 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -18436,54 +18436,6 @@ Example:
       %r = call i4 @llvm.clmul.i4(i4 -4, i4 2)   ; %r = -8
       %r = call i4 @llvm.clmul.i4(i4 -4, i4 -5)  ; %r = 4
 
-'``llvm.clmulr.*``' Intrinsic
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-Syntax:
-"""""""
-
-This is an overloaded intrinsic. You can use ``llvm.clmulr`` on any integer
-or vectors of integer elements.
-
-::
-
-      declare i16 @llvm.clmulr.i16(i16 %a, i16 %b)
-      declare i32 @llvm.clmulr.i32(i32 %a, i32 %b)
-      declare i64 @llvm.clmulr.i64(i64 %a, i64 %b)
-      declare <4 x i32> @llvm.clmulr.v4i32(<4 x i32> %a, <4 x i32> %b)
-
-Overview:
-"""""""""
-
-The '``llvm.clmulr``' family of intrinsic functions performs reversed
-carry-less multiplication on the two arguments.
-
-Arguments:
-""""""""""
-
-The arguments may be any integer type or vector of integer type. Both arguments
-and result must have the same type.
-
-Semantics:
-""""""""""
-
-The '``llvm.clmulr``' intrinsic computes reversed carry-less multiply of its
-arguments. The vector variants operate lane-wise.
-
-.. code-block:: text
-
-      clmulr(%a, %b) = bitreverse(clmul(bitreverse(%a), bitreverse(%b)))
-
-Example:
-""""""""
-
-.. code-block:: llvm
-
-      %r = call i4 @llvm.clmulr.i4(i4 1, i4 2)    ; %r = 0
-      %r = call i4 @llvm.clmulr.i4(i4 5, i4 6)    ; %r = 3
-      %r = call i4 @llvm.clmulr.i4(i4 -4, i4 2)   ; %r = 3
-      %r = call i4 @llvm.clmulr.i4(i4 -4, i4 -5)  ; %r = -2
-
 .. _int_overflow:
 
 Arithmetic with Overflow Intrinsics
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index fb8857cec2075..f0aed94529cfb 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -1467,8 +1467,6 @@ let IntrProperties = [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison] in
       [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>]>;
   def int_clmul : DefaultAttrsIntrinsic<[llvm_anyint_ty],
       [LLVMMatchType<0>, LLVMMatchType<0>]>;
-  def int_clmulr : DefaultAttrsIntrinsic<[llvm_anyint_ty],
-      [LLVMMatchType<0>, LLVMMatchType<0>]>;
 }
 
 let IntrProperties = [IntrNoMem, IntrSpeculatable,
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 428eaeb3a1dde..27129b37e1922 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -7279,20 +7279,10 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
     }
     return;
   }
-  case Intrinsic::clmul:
-  case Intrinsic::clmulr: {
-    SDValue Op1 = getValue(I.getArgOperand(0));
-    SDValue Op2 = getValue(I.getArgOperand(1));
-    unsigned Opcode;
-    switch (Intrinsic) {
-    case Intrinsic::clmul:
-      Opcode = ISD::CLMUL;
-      break;
-    case Intrinsic::clmulr:
-      Opcode = ISD::CLMULR;
-      break;
-    }
-    setValue(&I, DAG.getNode(Opcode, sdl, Op1.getValueType(), Op1, Op2));
+  case Intrinsic::clmul: {
+    SDValue X = getValue(I.getArgOperand(0));
+    SDValue Y = getValue(I.getArgOperand(1));
+    setValue(&I, DAG.getNode(ISD::CLMUL, sdl, X.getValueType(), X, Y));
     return;
   }
   case Intrinsic::sadd_sat: {
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 7db1dad5b4426..79627466bad0d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -8324,20 +8324,15 @@ SDValue TargetLowering::expandCLMUL(SDNode *Node, SelectionDAG &DAG) const {
     }
     break;
   }
-  case ISD::CLMULR: {
-    SDValue XRev = DAG.getNode(ISD::BITREVERSE, DL, VT, X);
-    SDValue YRev = DAG.getNode(ISD::BITREVERSE, DL, VT, X);
-    SDValue ResR = DAG.getNode(ISD::CLMUL, DL, VT, XRev, YRev);
-    Res = DAG.getNode(ISD::BITREVERSE, DL, VT, ResR);
-    break;
-  }
+  case ISD::CLMULR:
   case ISD::CLMULH: {
     EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), 2 * BW);
     SDValue XExt = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVT, X);
     SDValue YExt = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVT, Y);
     SDValue ClMul = DAG.getNode(ISD::CLMUL, DL, ExtVT, XExt, YExt);
+    unsigned ShtAmt = Node->getOpcode() == ISD::CLMULR ? BW - 1 : BW;
     SDValue HiBits = DAG.getNode(ISD::SRL, DL, ExtVT, ClMul,
-                                 DAG.getShiftAmountConstant(BW, VT, DL));
+                                 DAG.getShiftAmountConstant(ShtAmt, VT, DL));
     Res = DAG.getNode(ISD::TRUNCATE, DL, VT, HiBits);
     break;
   }
diff --git a/llvm/test/CodeGen/RISCV/clmul.ll b/llvm/test/CodeGen/RISCV/clmul.ll
index f997e56d91178..da4f4d3075133 100644
--- a/llvm/test/CodeGen/RISCV/clmul.ll
+++ b/llvm/test/CodeGen/RISCV/clmul.ll
@@ -3241,4330 +3241,3 @@ define i16 @clmul_constfold_i16() nounwind {
   %res = call i16 @llvm.clmul.i16(i16 -2, i16 -1)
   ret i16 %res
 }
-
-define i4 @clmulr_i4(i4 %a, i4 %b) nounwind {
-; RV32IM-LABEL: clmulr_i4:
-; RV32IM:       # %bb.0:
-; RV32IM-NEXT:    addi sp, sp, -144
-; RV32IM-NEXT:    sw ra, 140(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    sw s0, 136(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    sw s1, 132(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    sw s2, 128(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    sw s3, 124(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    sw s4, 120(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    sw s5, 116(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    sw s6, 112(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    sw s7, 108(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    sw s8, 104(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    sw s9, 100(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    sw s10, 96(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    sw s11, 92(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    srli a3, a0, 8
-; RV32IM-NEXT:    lui s9, 16
-; RV32IM-NEXT:    srli a4, a0, 24
-; RV32IM-NEXT:    slli a2, a0, 24
-; RV32IM-NEXT:    lui a7, 61681
-; RV32IM-NEXT:    lui ra, 209715
-; RV32IM-NEXT:    lui a1, 349525
-; RV32IM-NEXT:    li s0, 1
-; RV32IM-NEXT:    lui t1, 1
-; RV32IM-NEXT:    lui t2, 2
-; RV32IM-NEXT:    lui t3, 4
-; RV32IM-NEXT:    lui t4, 8
-; RV32IM-NEXT:    lui t0, 32
-; RV32IM-NEXT:    lui a6, 64
-; RV32IM-NEXT:    lui a5, 128
-; RV32IM-NEXT:    lui s1, 256
-; RV32IM-NEXT:    lui t5, 512
-; RV32IM-NEXT:    lui t6, 1024
-; RV32IM-NEXT:    lui s4, 2048
-; RV32IM-NEXT:    lui s2, 4096
-; RV32IM-NEXT:    lui s3, 8192
-; RV32IM-NEXT:    lui s7, 16384
-; RV32IM-NEXT:    lui s5, 32768
-; RV32IM-NEXT:    lui s6, 65536
-; RV32IM-NEXT:    lui s11, 131072
-; RV32IM-NEXT:    lui s8, 262144
-; RV32IM-NEXT:    addi s10, s9, -256
-; RV32IM-NEXT:    and a3, a3, s10
-; RV32IM-NEXT:    or a3, a3, a4
-; RV32IM-NEXT:    addi a7, a7, -241
-; RV32IM-NEXT:    sw a7, 80(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    addi a4, ra, 819
-; RV32IM-NEXT:    sw a4, 84(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    addi a1, a1, 1365
-; RV32IM-NEXT:    sw a1, 88(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    slli s0, s0, 11
-; RV32IM-NEXT:    and a0, a0, s10
-; RV32IM-NEXT:    slli a0, a0, 8
-; RV32IM-NEXT:    or a0, a2, a0
-; RV32IM-NEXT:    or a0, a0, a3
-; RV32IM-NEXT:    srli a2, a0, 4
-; RV32IM-NEXT:    and a0, a0, a7
-; RV32IM-NEXT:    and a2, a2, a7
-; RV32IM-NEXT:    slli a0, a0, 4
-; RV32IM-NEXT:    or a0, a2, a0
-; RV32IM-NEXT:    srli a2, a0, 2
-; RV32IM-NEXT:    and a0, a0, a4
-; RV32IM-NEXT:    and a2, a2, a4
-; RV32IM-NEXT:    slli a0, a0, 2
-; RV32IM-NEXT:    or a0, a2, a0
-; RV32IM-NEXT:    srli a2, a0, 1
-; RV32IM-NEXT:    and a0, a0, a1
-; RV32IM-NEXT:    and a2, a2, a1
-; RV32IM-NEXT:    slli a0, a0, 1
-; RV32IM-NEXT:    or a3, a2, a0
-; RV32IM-NEXT:    andi a0, a3, 2
-; RV32IM-NEXT:    andi a1, a3, 1
-; RV32IM-NEXT:    and a4, a3, s0
-; RV32IM-NEXT:    and a7, a3, t1
-; RV32IM-NEXT:    and s0, a3, t2
-; RV32IM-NEXT:    and ra, a3, t3
-; RV32IM-NEXT:    and a2, a3, t4
-; RV32IM-NEXT:    sw a2, 68(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    and a2, a3, s9
-; RV32IM-NEXT:    sw a2, 64(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    and a2, a3, t0
-; RV32IM-NEXT:    sw a2, 60(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    and a6, a3, a6
-; RV32IM-NEXT:    and a5, a3, a5
-; RV32IM-NEXT:    and s1, a3, s1
-; RV32IM-NEXT:    sw s1, 56(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    and a2, a3, t5
-; RV32IM-NEXT:    sw a2, 52(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    and t6, a3, t6
-; RV32IM-NEXT:    and a2, a3, s4
-; RV32IM-NEXT:    sw a2, 48(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    and s2, a3, s2
-; RV32IM-NEXT:    and a2, a3, s3
-; RV32IM-NEXT:    sw a2, 44(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    and a2, a3, s7
-; RV32IM-NEXT:    sw a2, 40(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    and a2, a3, s5
-; RV32IM-NEXT:    sw a2, 36(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    and a2, a3, s6
-; RV32IM-NEXT:    sw a2, 32(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    and a2, a3, s11
-; RV32IM-NEXT:    sw a2, 28(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    and a2, a3, s8
-; RV32IM-NEXT:    sw a2, 24(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    lui a2, 524288
-; RV32IM-NEXT:    and a2, a3, a2
-; RV32IM-NEXT:    sw a2, 20(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    mul a0, a3, a0
-; RV32IM-NEXT:    sw a0, 72(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    mul a0, a3, a1
-; RV32IM-NEXT:    sw a0, 16(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    andi a0, a3, 4
-; RV32IM-NEXT:    mul a0, a3, a0
-; RV32IM-NEXT:    sw a0, 12(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    andi a1, a3, 8
-; RV32IM-NEXT:    mul a0, a3, a1
-; RV32IM-NEXT:    sw a0, 0(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    andi a2, a3, 16
-; RV32IM-NEXT:    mul s9, a3, a2
-; RV32IM-NEXT:    andi t0, a3, 32
-; RV32IM-NEXT:    mul s6, a3, t0
-; RV32IM-NEXT:    andi t1, a3, 64
-; RV32IM-NEXT:    mul a0, a3, t1
-; RV32IM-NEXT:    sw a0, 4(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    andi t2, a3, 128
-; RV32IM-NEXT:    mul a0, a3, t2
-; RV32IM-NEXT:    sw a0, 76(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    andi t2, a3, 256
-; RV32IM-NEXT:    mul s1, a3, t2
-; RV32IM-NEXT:    andi t3, a3, 512
-; RV32IM-NEXT:    mul t5, a3, t3
-; RV32IM-NEXT:    andi t4, a3, 1024
-; RV32IM-NEXT:    mul s5, a3, t4
-; RV32IM-NEXT:    mul s8, a3, a4
-; RV32IM-NEXT:    mul a0, a3, a7
-; RV32IM-NEXT:    sw a0, 8(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    mul t2, a3, s0
-; RV32IM-NEXT:    mul a7, a3, ra
-; RV32IM-NEXT:    lw a0, 68(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mul s0, a3, a0
-; RV32IM-NEXT:    lw a0, 64(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mul s4, a3, a0
-; RV32IM-NEXT:    lw a0, 60(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mul s7, a3, a0
-; RV32IM-NEXT:    mul a0, a3, a6
-; RV32IM-NEXT:    sw a0, 68(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    mul a6, a3, a5
-; RV32IM-NEXT:    lw a0, 56(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mul a4, a3, a0
-; RV32IM-NEXT:    lw a0, 52(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mul t1, a3, a0
-; RV32IM-NEXT:    mul t4, a3, t6
-; RV32IM-NEXT:    lw a0, 48(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mul s3, a3, a0
-; RV32IM-NEXT:    mul a2, a3, s2
-; RV32IM-NEXT:    lw a0, 44(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mul a1, a3, a0
-; RV32IM-NEXT:    lw a0, 40(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mul a5, a3, a0
-; RV32IM-NEXT:    lw a0, 36(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mul t0, a3, a0
-; RV32IM-NEXT:    lw a0, 32(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mul t3, a3, a0
-; RV32IM-NEXT:    lw a0, 28(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mul t6, a3, a0
-; RV32IM-NEXT:    lw a0, 24(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mul s2, a3, a0
-; RV32IM-NEXT:    lw a0, 20(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mul a3, a3, a0
-; RV32IM-NEXT:    lw a0, 72(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw s11, 16(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    xor a0, s11, a0
-; RV32IM-NEXT:    lw s11, 12(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw ra, 0(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    xor s11, s11, ra
-; RV32IM-NEXT:    xor s6, s9, s6
-; RV32IM-NEXT:    xor t5, s1, t5
-; RV32IM-NEXT:    xor a7, t2, a7
-; RV32IM-NEXT:    xor a4, a6, a4
-; RV32IM-NEXT:    xor a1, a2, a1
-; RV32IM-NEXT:    xor a0, a0, s11
-; RV32IM-NEXT:    lw a2, 4(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    xor a2, s6, a2
-; RV32IM-NEXT:    xor a6, t5, s5
-; RV32IM-NEXT:    xor a7, a7, s0
-; RV32IM-NEXT:    xor a4, a4, t1
-; RV32IM-NEXT:    xor a1, a1, a5
-; RV32IM-NEXT:    xor a0, a0, a2
-; RV32IM-NEXT:    xor a2, a6, s8
-; RV32IM-NEXT:    xor a5, a7, s4
-; RV32IM-NEXT:    xor a4, a4, t4
-; RV32IM-NEXT:    xor a1, a1, t0
-; RV32IM-NEXT:    lw a6, 76(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    xor a0, a0, a6
-; RV32IM-NEXT:    lw a6, 8(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    xor a2, a2, a6
-; RV32IM-NEXT:    xor a5, a5, s7
-; RV32IM-NEXT:    xor a4, a4, s3
-; RV32IM-NEXT:    xor a1, a1, t3
-; RV32IM-NEXT:    lw a6, 68(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    xor a5, a5, a6
-; RV32IM-NEXT:    xor a1, a1, t6
-; RV32IM-NEXT:    xor a2, a0, a2
-; RV32IM-NEXT:    xor a2, a2, a5
-; RV32IM-NEXT:    slli a0, a0, 24
-; RV32IM-NEXT:    xor a1, a1, s2
-; RV32IM-NEXT:    xor a2, a2, a4
-; RV32IM-NEXT:    xor a1, a1, a3
-; RV32IM-NEXT:    and a3, a2, s10
-; RV32IM-NEXT:    srli a4, a2, 8
-; RV32IM-NEXT:    xor a1, a2, a1
-; RV32IM-NEXT:    slli a3, a3, 8
-; RV32IM-NEXT:    and a2, a4, s10
-; RV32IM-NEXT:    srli a1, a1, 24
-; RV32IM-NEXT:    or a0, a0, a3
-; RV32IM-NEXT:    or a1, a2, a1
-; RV32IM-NEXT:    or a0, a0, a1
-; RV32IM-NEXT:    srli a1, a0, 4
-; RV32IM-NEXT:    lw a2, 80(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    and a0, a0, a2
-; RV32IM-NEXT:    and a1, a1, a2
-; RV32IM-NEXT:    slli a0, a0, 4
-; RV32IM-NEXT:    or a0, a1, a0
-; RV32IM-NEXT:    srli a1, a0, 2
-; RV32IM-NEXT:    lw a2, 84(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    and a0, a0, a2
-; RV32IM-NEXT:    and a1, a1, a2
-; RV32IM-NEXT:    slli a0, a0, 2
-; RV32IM-NEXT:    or a0, a1, a0
-; RV32IM-NEXT:    srli a1, a0, 1
-; RV32IM-NEXT:    lw a2, 88(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    and a0, a0, a2
-; RV32IM-NEXT:    and a1, a1, a2
-; RV32IM-NEXT:    slli a0, a0, 1
-; RV32IM-NEXT:    or a0, a1, a0
-; RV32IM-NEXT:    lw ra, 140(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw s0, 136(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw s1, 132(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw s2, 128(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw s3, 124(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw s4, 120(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw s5, 116(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw s6, 112(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw s7, 108(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw s8, 104(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw s9, 100(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw s10, 96(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw s11, 92(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    addi sp, sp, 144
-; RV32IM-NEXT:    ret
-;
-; RV64IM-LABEL: clmulr_i4:
-; RV64IM:       # %bb.0:
-; RV64IM-NEXT:    addi sp, sp, -448
-; RV64IM-NEXT:    sd ra, 440(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    sd s0, 432(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    sd s1, 424(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    sd s2, 416(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    sd s3, 408(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    sd s4, 400(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    sd s5, 392(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    sd s6, 384(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    sd s7, 376(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    sd s8, 368(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    sd s9, 360(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    sd s10, 352(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    sd s11, 344(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    srli a2, a0, 24
-; RV64IM-NEXT:    srli a6, a0, 8
-; RV64IM-NEXT:    li a3, 255
-; RV64IM-NEXT:    srli a5, a0, 40
-; RV64IM-NEXT:    lui s3, 16
-; RV64IM-NEXT:    srli s0, a0, 56
-; RV64IM-NEXT:    srliw t2, a0, 24
-; RV64IM-NEXT:    slli t0, a0, 56
-; RV64IM-NEXT:    lui t3, 61681
-; RV64IM-NEXT:    lui t4, 209715
-; RV64IM-NEXT:    lui t6, 349525
-; RV64IM-NEXT:    li a7, 1
-; RV64IM-NEXT:    lui s5, 2
-; RV64IM-NEXT:    lui t1, 4
-; RV64IM-NEXT:    lui a4, 128
-; RV64IM-NEXT:    lui s7, 256
-; RV64IM-NEXT:    lui s8, 4096
-; RV64IM-NEXT:    lui s10, 8192
-; RV64IM-NEXT:    lui a1, 4080
-; RV64IM-NEXT:    and a2, a2, a1
-; RV64IM-NEXT:    slli a3, a3, 24
-; RV64IM-NEXT:    sd a3, 336(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    addi s1, s3, -256
-; RV64IM-NEXT:    and t5, a0, a1
-; RV64IM-NEXT:    slli a1, t2, 32
-; RV64IM-NEXT:    addi s9, t3, -241
-; RV64IM-NEXT:    addi t4, t4, 819
-; RV64IM-NEXT:    addi t2, t6, 1365
-; RV64IM-NEXT:    slli t3, a7, 11
-; RV64IM-NEXT:    slli s11, a7, 32
-; RV64IM-NEXT:    slli ra, a7, 33
-; RV64IM-NEXT:    slli t6, a7, 34
-; RV64IM-NEXT:    slli s2, a7, 35
-; RV64IM-NEXT:    slli s4, a7, 36
-; RV64IM-NEXT:    sd s4, 256(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    and a3, a6, a3
-; RV64IM-NEXT:    or a2, a3, a2
-; RV64IM-NEXT:    slli a3, a7, 37
-; RV64IM-NEXT:    sd a3, 248(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    sd s1, 304(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    and a3, a5, s1
-; RV64IM-NEXT:    or a3, a3, s0
-; RV64IM-NEXT:    slli a5, a7, 38
-; RV64IM-NEXT:    sd a5, 232(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    slli t5, t5, 24
-; RV64IM-NEXT:    and a0, a0, s1
-; RV64IM-NEXT:    or a1, t5, a1
-; RV64IM-NEXT:    slli a5, s9, 32
-; RV64IM-NEXT:    add a5, s9, a5
-; RV64IM-NEXT:    slli s0, t4, 32
-; RV64IM-NEXT:    add t4, t4, s0
-; RV64IM-NEXT:    slli s4, t2, 32
-; RV64IM-NEXT:    slli a0, a0, 40
-; RV64IM-NEXT:    add t2, t2, s4
-; RV64IM-NEXT:    or a2, a2, a3
-; RV64IM-NEXT:    or a0, t0, a0
-; RV64IM-NEXT:    or a0, a0, a1
-; RV64IM-NEXT:    or a0, a0, a2
-; RV64IM-NEXT:    srli a1, a0, 4
-; RV64IM-NEXT:    sd a5, 312(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    and a0, a0, a5
-; RV64IM-NEXT:    and a1, a1, a5
-; RV64IM-NEXT:    slli a0, a0, 4
-; RV64IM-NEXT:    or a0, a1, a0
-; RV64IM-NEXT:    srli a1, a0, 2
-; RV64IM-NEXT:    sd t4, 320(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    and a0, a0, t4
-; RV64IM-NEXT:    and a1, a1, t4
-; RV64IM-NEXT:    slli a0, a0, 2
-; RV64IM-NEXT:    or a0, a1, a0
-; RV64IM-NEXT:    srli a1, a0, 1
-; RV64IM-NEXT:    sd t2, 328(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    and a0, a0, t2
-; RV64IM-NEXT:    and a1, a1, t2
-; RV64IM-NEXT:    slli a0, a0, 1
-; RV64IM-NEXT:    or t0, a1, a0
-; RV64IM-NEXT:    andi a0, t0, 2
-; RV64IM-NEXT:    andi a1, t0, 1
-; RV64IM-NEXT:    andi a2, t0, 4
-; RV64IM-NEXT:    andi a3, t0, 8
-; RV64IM-NEXT:    andi a5, t0, 16
-; RV64IM-NEXT:    mul a0, t0, a0
-; RV64IM-NEXT:    mul a1, t0, a1
-; RV64IM-NEXT:    xor a0, a1, a0
-; RV64IM-NEXT:    sd a0, 296(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    andi a0, t0, 32
-; RV64IM-NEXT:    mul a1, t0, a2
-; RV64IM-NEXT:    mul a2, t0, a3
-; RV64IM-NEXT:    xor a1, a1, a2
-; RV64IM-NEXT:    sd a1, 288(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    andi a1, t0, 256
-; RV64IM-NEXT:    mul a2, t0, a5
-; RV64IM-NEXT:    mul a0, t0, a0
-; RV64IM-NEXT:    xor a0, a2, a0
-; RV64IM-NEXT:    sd a0, 280(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    andi a0, t0, 512
-; RV64IM-NEXT:    mul a1, t0, a1
-; RV64IM-NEXT:    mul a0, t0, a0
-; RV64IM-NEXT:    xor a0, a1, a0
-; RV64IM-NEXT:    sd a0, 272(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    slli t4, a7, 39
-; RV64IM-NEXT:    and a0, t0, s5
-; RV64IM-NEXT:    and a1, t0, t1
-; RV64IM-NEXT:    mul a0, t0, a0
-; RV64IM-NEXT:    mul a1, t0, a1
-; RV64IM-NEXT:    xor a0, a0, a1
-; RV64IM-NEXT:    sd a0, 264(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    slli a0, a7, 40
-; RV64IM-NEXT:    and a1, t0, a4
-; RV64IM-NEXT:    and a2, t0, s7
-; RV64IM-NEXT:    mul a1, t0, a1
-; RV64IM-NEXT:    mul a2, t0, a2
-; RV64IM-NEXT:    xor a1, a1, a2
-; RV64IM-NEXT:    sd a1, 240(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    slli a1, a7, 41
-; RV64IM-NEXT:    and a2, t0, s8
-; RV64IM-NEXT:    and a3, t0, s10
-; RV64IM-NEXT:    mul a2, t0, a2
-; RV64IM-NEXT:    mul a3, t0, a3
-; RV64IM-NEXT:    xor a2, a2, a3
-; RV64IM-NEXT:    sd a2, 224(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    slli a2, a7, 48
-; RV64IM-NEXT:    and a3, t0, s11
-; RV64IM-NEXT:    and a4, t0, ra
-; RV64IM-NEXT:    mul a3, t0, a3
-; RV64IM-NEXT:    mul a4, t0, a4
-; RV64IM-NEXT:    xor a3, a3, a4
-; RV64IM-NEXT:    sd a3, 216(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    slli a3, a7, 49
-; RV64IM-NEXT:    and a0, t0, a0
-; RV64IM-NEXT:    and a1, t0, a1
-; RV64IM-NEXT:    mul a0, t0, a0
-; RV64IM-NEXT:    mul a1, t0, a1
-; RV64IM-NEXT:    xor a0, a0, a1
-; RV64IM-NEXT:    sd a0, 208(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    slli a0, a7, 56
-; RV64IM-NEXT:    and a1, t0, a2
-; RV64IM-NEXT:    and a2, t0, a3
-; RV64IM-NEXT:    mul a1, t0, a1
-; RV64IM-NEXT:    mul a2, t0, a2
-; RV64IM-NEXT:    xor a1, a1, a2
-; RV64IM-NEXT:    sd a1, 200(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    slli a1, a7, 57
-; RV64IM-NEXT:    and a0, t0, a0
-; RV64IM-NEXT:    and a1, t0, a1
-; RV64IM-NEXT:    mul a0, t0, a0
-; RV64IM-NEXT:    mul a1, t0, a1
-; RV64IM-NEXT:    xor a0, a0, a1
-; RV64IM-NEXT:    sd a0, 192(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    slli a2, a7, 42
-; RV64IM-NEXT:    slli ra, a7, 43
-; RV64IM-NEXT:    slli a3, a7, 44
-; RV64IM-NEXT:    slli a4, a7, 45
-; RV64IM-NEXT:    slli t5, a7, 46
-; RV64IM-NEXT:    slli s0, a7, 47
-; RV64IM-NEXT:    slli s1, a7, 50
-; RV64IM-NEXT:    slli a0, a7, 51
-; RV64IM-NEXT:    sd a0, 184(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    slli a0, a7, 52
-; RV64IM-NEXT:    sd a0, 176(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    slli a0, a7, 53
-; RV64IM-NEXT:    sd a0, 168(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    slli a0, a7, 54
-; RV64IM-NEXT:    sd a0, 160(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    slli a0, a7, 55
-; RV64IM-NEXT:    sd a0, 152(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    slli a0, a7, 58
-; RV64IM-NEXT:    sd a0, 144(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    slli a0, a7, 59
-; RV64IM-NEXT:    sd a0, 136(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    slli a0, a7, 60
-; RV64IM-NEXT:    sd a0, 120(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    slli a0, a7, 61
-; RV64IM-NEXT:    sd a0, 80(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    slli a7, a7, 62
-; RV64IM-NEXT:    sd a7, 48(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    and a0, t0, t3
-; RV64IM-NEXT:    sd a0, 128(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    lui s7, 1
-; RV64IM-NEXT:    and a0, t0, s7
-; RV64IM-NEXT:    sd a0, 112(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    lui s8, 8
-; RV64IM-NEXT:    and a0, t0, s8
-; RV64IM-NEXT:    sd a0, 104(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    and a0, t0, s3
-; RV64IM-NEXT:    sd a0, 96(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    lui s6, 32
-; RV64IM-NEXT:    and a0, t0, s6
-; RV64IM-NEXT:    sd a0, 88(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    lui s10, 64
-; RV64IM-NEXT:    and a0, t0, s10
-; RV64IM-NEXT:    sd a0, 72(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    lui s11, 512
-; RV64IM-NEXT:    and a0, t0, s11
-; RV64IM-NEXT:    sd a0, 64(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    lui s4, 1024
-; RV64IM-NEXT:    and a0, t0, s4
-; RV64IM-NEXT:    sd a0, 56(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    lui s5, 2048
-; RV64IM-NEXT:    and a0, t0, s5
-; RV64IM-NEXT:    sd a0, 40(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    lui s9, 16384
-; RV64IM-NEXT:    and a0, t0, s9
-; RV64IM-NEXT:    sd a0, 32(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    lui a5, 32768
-; RV64IM-NEXT:    and a5, t0, a5
-; RV64IM-NEXT:    lui a6, 65536
-; RV64IM-NEXT:    and a6, t0, a6
-; RV64IM-NEXT:    lui t1, 131072
-; RV64IM-NEXT:    and t1, t0, t1
-; RV64IM-NEXT:    lui t2, 262144
-; RV64IM-NEXT:    and t2, t0, t2
-; RV64IM-NEXT:    and a0, t0, t6
-; RV64IM-NEXT:    sd a0, 24(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    and a0, t0, s2
-; RV64IM-NEXT:    sd a0, 16(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    ld a0, 256(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    and a0, t0, a0
-; RV64IM-NEXT:    sd a0, 8(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    ld a0, 248(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    and a0, t0, a0
-; RV64IM-NEXT:    sd a0, 0(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    ld a0, 232(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    and a0, t0, a0
-; RV64IM-NEXT:    and a1, t0, t4
-; RV64IM-NEXT:    and a7, t0, a2
-; RV64IM-NEXT:    and ra, t0, ra
-; RV64IM-NEXT:    and t3, t0, a3
-; RV64IM-NEXT:    and t4, t0, a4
-; RV64IM-NEXT:    and t5, t0, t5
-; RV64IM-NEXT:    and t6, t0, s0
-; RV64IM-NEXT:    and s0, t0, s1
-; RV64IM-NEXT:    ld a2, 184(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    and s1, t0, a2
-; RV64IM-NEXT:    ld a2, 176(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    and s2, t0, a2
-; RV64IM-NEXT:    ld a2, 168(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    and s3, t0, a2
-; RV64IM-NEXT:    ld a2, 160(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    and s4, t0, a2
-; RV64IM-NEXT:    ld a2, 152(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    and s5, t0, a2
-; RV64IM-NEXT:    ld a2, 144(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    and s6, t0, a2
-; RV64IM-NEXT:    ld a2, 136(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    and s7, t0, a2
-; RV64IM-NEXT:    ld a2, 120(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    and s8, t0, a2
-; RV64IM-NEXT:    ld a2, 80(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    and s9, t0, a2
-; RV64IM-NEXT:    ld a2, 48(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    and s10, t0, a2
-; RV64IM-NEXT:    andi s11, t0, 64
-; RV64IM-NEXT:    mul a2, t0, s11
-; RV64IM-NEXT:    sd a2, 80(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    andi s11, t0, 128
-; RV64IM-NEXT:    mul a2, t0, s11
-; RV64IM-NEXT:    sd a2, 232(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    andi s11, t0, 1024
-; RV64IM-NEXT:    mul a2, t0, s11
-; RV64IM-NEXT:    sd a2, 48(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    ld a2, 128(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    mul a2, t0, a2
-; RV64IM-NEXT:    sd a2, 120(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    ld a2, 112(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    mul a2, t0, a2
-; RV64IM-NEXT:    sd a2, 176(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    ld a2, 104(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    mul s11, t0, a2
-; RV64IM-NEXT:    ld a2, 96(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    mul a2, t0, a2
-; RV64IM-NEXT:    sd a2, 104(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    ld a2, 88(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    mul a2, t0, a2
-; RV64IM-NEXT:    sd a2, 168(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    ld a2, 72(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    mul a2, t0, a2
-; RV64IM-NEXT:    sd a2, 256(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    ld a2, 64(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    mul a4, t0, a2
-; RV64IM-NEXT:    ld a2, 56(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    mul a2, t0, a2
-; RV64IM-NEXT:    sd a2, 96(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    ld a2, 40(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    mul a2, t0, a2
-; RV64IM-NEXT:    sd a2, 136(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    ld a2, 32(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    mul a3, t0, a2
-; RV64IM-NEXT:    mul a2, t0, a5
-; RV64IM-NEXT:    sd a2, 88(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    mul a2, t0, a6
-; RV64IM-NEXT:    sd a2, 128(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    mul a2, t0, t1
-; RV64IM-NEXT:    sd a2, 160(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    mul a2, t0, t2
-; RV64IM-NEXT:    sd a2, 248(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    srliw t2, t0, 31
-; RV64IM-NEXT:    slli t2, t2, 31
-; RV64IM-NEXT:    ld a2, 24(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    mul a2, t0, a2
-; RV64IM-NEXT:    ld a5, 16(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    mul a5, t0, a5
-; RV64IM-NEXT:    ld a6, 8(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    mul t1, t0, a6
-; RV64IM-NEXT:    ld a6, 0(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    mul a6, t0, a6
-; RV64IM-NEXT:    sd a6, 112(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    mul a0, t0, a0
-; RV64IM-NEXT:    sd a0, 152(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    mul a0, t0, a1
-; RV64IM-NEXT:    sd a0, 184(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    mul a7, t0, a7
-; RV64IM-NEXT:    mul ra, t0, ra
-; RV64IM-NEXT:    mul a6, t0, t3
-; RV64IM-NEXT:    mul t4, t0, t4
-; RV64IM-NEXT:    mul t5, t0, t5
-; RV64IM-NEXT:    mul a0, t0, t6
-; RV64IM-NEXT:    sd a0, 144(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    mul t6, t0, s0
-; RV64IM-NEXT:    mul s0, t0, s1
-; RV64IM-NEXT:    mul s1, t0, s2
-; RV64IM-NEXT:    mul s2, t0, s3
-; RV64IM-NEXT:    mul s3, t0, s4
-; RV64IM-NEXT:    mul s4, t0, s5
-; RV64IM-NEXT:    mul s5, t0, s6
-; RV64IM-NEXT:    mul s6, t0, s7
-; RV64IM-NEXT:    mul s7, t0, s8
-; RV64IM-NEXT:    mul s8, t0, s9
-; RV64IM-NEXT:    mul s9, t0, s10
-; RV64IM-NEXT:    srli s10, t0, 63
-; RV64IM-NEXT:    slli s10, s10, 63
-; RV64IM-NEXT:    mul t2, t0, t2
-; RV64IM-NEXT:    mul t0, t0, s10
-; RV64IM-NEXT:    ld a0, 296(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    ld a1, 288(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor s10, a0, a1
-; RV64IM-NEXT:    ld a0, 280(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    ld a1, 80(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor a0, a0, a1
-; RV64IM-NEXT:    ld a1, 272(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    ld t3, 48(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor a1, a1, t3
-; RV64IM-NEXT:    ld t3, 264(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor s11, t3, s11
-; RV64IM-NEXT:    ld t3, 240(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor a4, t3, a4
-; RV64IM-NEXT:    ld t3, 224(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor a3, t3, a3
-; RV64IM-NEXT:    ld t3, 216(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor a2, t3, a2
-; RV64IM-NEXT:    ld t3, 208(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor a7, t3, a7
-; RV64IM-NEXT:    ld t3, 200(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor t6, t3, t6
-; RV64IM-NEXT:    ld t3, 192(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor s5, t3, s5
-; RV64IM-NEXT:    xor a0, s10, a0
-; RV64IM-NEXT:    ld t3, 120(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor a1, a1, t3
-; RV64IM-NEXT:    ld t3, 104(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor s10, s11, t3
-; RV64IM-NEXT:    ld t3, 96(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor a4, a4, t3
-; RV64IM-NEXT:    ld t3, 88(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor a3, a3, t3
-; RV64IM-NEXT:    xor a2, a2, a5
-; RV64IM-NEXT:    xor a5, a7, ra
-; RV64IM-NEXT:    xor a7, t6, s0
-; RV64IM-NEXT:    xor t6, s5, s6
-; RV64IM-NEXT:    ld t3, 232(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor a0, a0, t3
-; RV64IM-NEXT:    ld t3, 176(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor a1, a1, t3
-; RV64IM-NEXT:    ld t3, 168(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor s0, s10, t3
-; RV64IM-NEXT:    ld t3, 136(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor a4, a4, t3
-; RV64IM-NEXT:    ld t3, 128(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor a3, a3, t3
-; RV64IM-NEXT:    xor a2, a2, t1
-; RV64IM-NEXT:    xor a5, a5, a6
-; RV64IM-NEXT:    xor a6, a7, s1
-; RV64IM-NEXT:    xor a7, t6, s7
-; RV64IM-NEXT:    ld t1, 256(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor t1, s0, t1
-; RV64IM-NEXT:    ld t3, 160(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor a3, a3, t3
-; RV64IM-NEXT:    ld t3, 112(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor a2, a2, t3
-; RV64IM-NEXT:    xor a5, a5, t4
-; RV64IM-NEXT:    xor a6, a6, s2
-; RV64IM-NEXT:    xor a7, a7, s8
-; RV64IM-NEXT:    xor a1, a0, a1
-; RV64IM-NEXT:    xor a1, a1, t1
-; RV64IM-NEXT:    ld t1, 248(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor a3, a3, t1
-; RV64IM-NEXT:    ld t1, 152(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor a2, a2, t1
-; RV64IM-NEXT:    xor a5, a5, t5
-; RV64IM-NEXT:    xor a6, a6, s3
-; RV64IM-NEXT:    xor a7, a7, s9
-; RV64IM-NEXT:    xor a1, a1, a4
-; RV64IM-NEXT:    xor a3, a3, t2
-; RV64IM-NEXT:    ld a4, 184(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor a2, a2, a4
-; RV64IM-NEXT:    ld a4, 144(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor a4, a5, a4
-; RV64IM-NEXT:    xor a5, a6, s4
-; RV64IM-NEXT:    slli a0, a0, 56
-; RV64IM-NEXT:    xor a6, a7, t0
-; RV64IM-NEXT:    ld t0, 304(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    and a7, a1, t0
-; RV64IM-NEXT:    xor a1, a1, a3
-; RV64IM-NEXT:    slli a7, a7, 40
-; RV64IM-NEXT:    xor a1, a1, a2
-; RV64IM-NEXT:    or a0, a0, a7
-; RV64IM-NEXT:    lui a7, 4080
-; RV64IM-NEXT:    and a2, a1, a7
-; RV64IM-NEXT:    xor a4, a1, a4
-; RV64IM-NEXT:    srli a1, a1, 8
-; RV64IM-NEXT:    slli a2, a2, 24
-; RV64IM-NEXT:    xor a5, a4, a5
-; RV64IM-NEXT:    ld a3, 336(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    and a1, a1, a3
-; RV64IM-NEXT:    srli a4, a4, 24
-; RV64IM-NEXT:    srliw a3, a5, 24
-; RV64IM-NEXT:    and a4, a4, a7
-; RV64IM-NEXT:    srli a7, a5, 40
-; RV64IM-NEXT:    xor a5, a5, a6
-; RV64IM-NEXT:    slli a3, a3, 32
-; RV64IM-NEXT:    or a1, a1, a4
-; RV64IM-NEXT:    and a4, a7, t0
-; RV64IM-NEXT:    srli a5, a5, 56
-; RV64IM-NEXT:    or a2, a2, a3
-; RV64IM-NEXT:    or a4, a4, a5
-; RV64IM-NEXT:    or a0, a0, a2
-; RV64IM-NEXT:    or a1, a1, a4
-; RV64IM-NEXT:    or a0, a0, a1
-; RV64IM-NEXT:    srli a1, a0, 4
-; RV64IM-NEXT:    ld a2, 312(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    and a0, a0, a2
-; RV64IM-NEXT:    and a1, a1, a2
-; RV64IM-NEXT:    slli a0, a0, 4
-; RV64IM-NEXT:    or a0, a1, a0
-; RV64IM-NEXT:    srli a1, a0, 2
-; RV64IM-NEXT:    ld a2, 320(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    and a0, a0, a2
-; RV64IM-NEXT:    and a1, a1, a2
-; RV64IM-NEXT:    slli a0, a0, 2
-; RV64IM-NEXT:    or a0, a1, a0
-; RV64IM-NEXT:    srli a1, a0, 1
-; RV64IM-NEXT:    ld a2, 328(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    and a0, a0, a2
-; RV64IM-NEXT:    and a1, a1, a2
-; RV64IM-NEXT:    slli a0, a0, 1
-; RV64IM-NEXT:    or a0, a1, a0
-; RV64IM-NEXT:    ld ra, 440(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    ld s0, 432(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    ld s1, 424(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    ld s2, 416(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    ld s3, 408(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    ld s4, 400(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    ld s5, 392(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    ld s6, 384(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    ld s7, 376(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    ld s8, 368(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    ld s9, 360(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    ld s10, 352(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    ld s11, 344(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    addi sp, sp, 448
-; RV64IM-NEXT:    ret
-  %res = call i4 @llvm.clmulr.i4(i4 %a, i4 %b)
-  ret i4 %res
-}
-
-define i8 @clmulr_i8(i8 %a, i8 %b) nounwind {
-; RV32IM-LABEL: clmulr_i8:
-; RV32IM:       # %bb.0:
-; RV32IM-NEXT:    addi sp, sp, -144
-; RV32IM-NEXT:    sw ra, 140(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    sw s0, 136(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    sw s1, 132(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    sw s2, 128(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    sw s3, 124(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    sw s4, 120(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    sw s5, 116(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    sw s6, 112(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    sw s7, 108(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    sw s8, 104(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    sw s9, 100(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    sw s10, 96(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    sw s11, 92(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    srli a3, a0, 8
-; RV32IM-NEXT:    lui s9, 16
-; RV32IM-NEXT:    srli a4, a0, 24
-; RV32IM-NEXT:    slli a2, a0, 24
-; RV32IM-NEXT:    lui a7, 61681
-; RV32IM-NEXT:    lui ra, 209715
-; RV32IM-NEXT:    lui a1, 349525
-; RV32IM-NEXT:    li s0, 1
-; RV32IM-NEXT:    lui t1, 1
-; RV32IM-NEXT:    lui t2, 2
-; RV32IM-NEXT:    lui t3, 4
-; RV32IM-NEXT:    lui t4, 8
-; RV32IM-NEXT:    lui t0, 32
-; RV32IM-NEXT:    lui a6, 64
-; RV32IM-NEXT:    lui a5, 128
-; RV32IM-NEXT:    lui s1, 256
-; RV32IM-NEXT:    lui t5, 512
-; RV32IM-NEXT:    lui t6, 1024
-; RV32IM-NEXT:    lui s4, 2048
-; RV32IM-NEXT:    lui s2, 4096
-; RV32IM-NEXT:    lui s3, 8192
-; RV32IM-NEXT:    lui s7, 16384
-; RV32IM-NEXT:    lui s5, 32768
-; RV32IM-NEXT:    lui s6, 65536
-; RV32IM-NEXT:    lui s11, 131072
-; RV32IM-NEXT:    lui s8, 262144
-; RV32IM-NEXT:    addi s10, s9, -256
-; RV32IM-NEXT:    and a3, a3, s10
-; RV32IM-NEXT:    or a3, a3, a4
-; RV32IM-NEXT:    addi a7, a7, -241
-; RV32IM-NEXT:    sw a7, 80(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    addi a4, ra, 819
-; RV32IM-NEXT:    sw a4, 84(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    addi a1, a1, 1365
-; RV32IM-NEXT:    sw a1, 88(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    slli s0, s0, 11
-; RV32IM-NEXT:    and a0, a0, s10
-; RV32IM-NEXT:    slli a0, a0, 8
-; RV32IM-NEXT:    or a0, a2, a0
-; RV32IM-NEXT:    or a0, a0, a3
-; RV32IM-NEXT:    srli a2, a0, 4
-; RV32IM-NEXT:    and a0, a0, a7
-; RV32IM-NEXT:    and a2, a2, a7
-; RV32IM-NEXT:    slli a0, a0, 4
-; RV32IM-NEXT:    or a0, a2, a0
-; RV32IM-NEXT:    srli a2, a0, 2
-; RV32IM-NEXT:    and a0, a0, a4
-; RV32IM-NEXT:    and a2, a2, a4
-; RV32IM-NEXT:    slli a0, a0, 2
-; RV32IM-NEXT:    or a0, a2, a0
-; RV32IM-NEXT:    srli a2, a0, 1
-; RV32IM-NEXT:    and a0, a0, a1
-; RV32IM-NEXT:    and a2, a2, a1
-; RV32IM-NEXT:    slli a0, a0, 1
-; RV32IM-NEXT:    or a3, a2, a0
-; RV32IM-NEXT:    andi a0, a3, 2
-; RV32IM-NEXT:    andi a1, a3, 1
-; RV32IM-NEXT:    and a4, a3, s0
-; RV32IM-NEXT:    and a7, a3, t1
-; RV32IM-NEXT:    and s0, a3, t2
-; RV32IM-NEXT:    and ra, a3, t3
-; RV32IM-NEXT:    and a2, a3, t4
-; RV32IM-NEXT:    sw a2, 68(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    and a2, a3, s9
-; RV32IM-NEXT:    sw a2, 64(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    and a2, a3, t0
-; RV32IM-NEXT:    sw a2, 60(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    and a6, a3, a6
-; RV32IM-NEXT:    and a5, a3, a5
-; RV32IM-NEXT:    and s1, a3, s1
-; RV32IM-NEXT:    sw s1, 56(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    and a2, a3, t5
-; RV32IM-NEXT:    sw a2, 52(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    and t6, a3, t6
-; RV32IM-NEXT:    and a2, a3, s4
-; RV32IM-NEXT:    sw a2, 48(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    and s2, a3, s2
-; RV32IM-NEXT:    and a2, a3, s3
-; RV32IM-NEXT:    sw a2, 44(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    and a2, a3, s7
-; RV32IM-NEXT:    sw a2, 40(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    and a2, a3, s5
-; RV32IM-NEXT:    sw a2, 36(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    and a2, a3, s6
-; RV32IM-NEXT:    sw a2, 32(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    and a2, a3, s11
-; RV32IM-NEXT:    sw a2, 28(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    and a2, a3, s8
-; RV32IM-NEXT:    sw a2, 24(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    lui a2, 524288
-; RV32IM-NEXT:    and a2, a3, a2
-; RV32IM-NEXT:    sw a2, 20(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    mul a0, a3, a0
-; RV32IM-NEXT:    sw a0, 72(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    mul a0, a3, a1
-; RV32IM-NEXT:    sw a0, 16(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    andi a0, a3, 4
-; RV32IM-NEXT:    mul a0, a3, a0
-; RV32IM-NEXT:    sw a0, 12(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    andi a1, a3, 8
-; RV32IM-NEXT:    mul a0, a3, a1
-; RV32IM-NEXT:    sw a0, 0(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    andi a2, a3, 16
-; RV32IM-NEXT:    mul s9, a3, a2
-; RV32IM-NEXT:    andi t0, a3, 32
-; RV32IM-NEXT:    mul s6, a3, t0
-; RV32IM-NEXT:    andi t1, a3, 64
-; RV32IM-NEXT:    mul a0, a3, t1
-; RV32IM-NEXT:    sw a0, 4(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    andi t2, a3, 128
-; RV32IM-NEXT:    mul a0, a3, t2
-; RV32IM-NEXT:    sw a0, 76(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    andi t2, a3, 256
-; RV32IM-NEXT:    mul s1, a3, t2
-; RV32IM-NEXT:    andi t3, a3, 512
-; RV32IM-NEXT:    mul t5, a3, t3
-; RV32IM-NEXT:    andi t4, a3, 1024
-; RV32IM-NEXT:    mul s5, a3, t4
-; RV32IM-NEXT:    mul s8, a3, a4
-; RV32IM-NEXT:    mul a0, a3, a7
-; RV32IM-NEXT:    sw a0, 8(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    mul t2, a3, s0
-; RV32IM-NEXT:    mul a7, a3, ra
-; RV32IM-NEXT:    lw a0, 68(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mul s0, a3, a0
-; RV32IM-NEXT:    lw a0, 64(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mul s4, a3, a0
-; RV32IM-NEXT:    lw a0, 60(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mul s7, a3, a0
-; RV32IM-NEXT:    mul a0, a3, a6
-; RV32IM-NEXT:    sw a0, 68(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    mul a6, a3, a5
-; RV32IM-NEXT:    lw a0, 56(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mul a4, a3, a0
-; RV32IM-NEXT:    lw a0, 52(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mul t1, a3, a0
-; RV32IM-NEXT:    mul t4, a3, t6
-; RV32IM-NEXT:    lw a0, 48(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mul s3, a3, a0
-; RV32IM-NEXT:    mul a2, a3, s2
-; RV32IM-NEXT:    lw a0, 44(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mul a1, a3, a0
-; RV32IM-NEXT:    lw a0, 40(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mul a5, a3, a0
-; RV32IM-NEXT:    lw a0, 36(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mul t0, a3, a0
-; RV32IM-NEXT:    lw a0, 32(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mul t3, a3, a0
-; RV32IM-NEXT:    lw a0, 28(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mul t6, a3, a0
-; RV32IM-NEXT:    lw a0, 24(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mul s2, a3, a0
-; RV32IM-NEXT:    lw a0, 20(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mul a3, a3, a0
-; RV32IM-NEXT:    lw a0, 72(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw s11, 16(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    xor a0, s11, a0
-; RV32IM-NEXT:    lw s11, 12(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw ra, 0(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    xor s11, s11, ra
-; RV32IM-NEXT:    xor s6, s9, s6
-; RV32IM-NEXT:    xor t5, s1, t5
-; RV32IM-NEXT:    xor a7, t2, a7
-; RV32IM-NEXT:    xor a4, a6, a4
-; RV32IM-NEXT:    xor a1, a2, a1
-; RV32IM-NEXT:    xor a0, a0, s11
-; RV32IM-NEXT:    lw a2, 4(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    xor a2, s6, a2
-; RV32IM-NEXT:    xor a6, t5, s5
-; RV32IM-NEXT:    xor a7, a7, s0
-; RV32IM-NEXT:    xor a4, a4, t1
-; RV32IM-NEXT:    xor a1, a1, a5
-; RV32IM-NEXT:    xor a0, a0, a2
-; RV32IM-NEXT:    xor a2, a6, s8
-; RV32IM-NEXT:    xor a5, a7, s4
-; RV32IM-NEXT:    xor a4, a4, t4
-; RV32IM-NEXT:    xor a1, a1, t0
-; RV32IM-NEXT:    lw a6, 76(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    xor a0, a0, a6
-; RV32IM-NEXT:    lw a6, 8(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    xor a2, a2, a6
-; RV32IM-NEXT:    xor a5, a5, s7
-; RV32IM-NEXT:    xor a4, a4, s3
-; RV32IM-NEXT:    xor a1, a1, t3
-; RV32IM-NEXT:    lw a6, 68(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    xor a5, a5, a6
-; RV32IM-NEXT:    xor a1, a1, t6
-; RV32IM-NEXT:    xor a2, a0, a2
-; RV32IM-NEXT:    xor a2, a2, a5
-; RV32IM-NEXT:    slli a0, a0, 24
-; RV32IM-NEXT:    xor a1, a1, s2
-; RV32IM-NEXT:    xor a2, a2, a4
-; RV32IM-NEXT:    xor a1, a1, a3
-; RV32IM-NEXT:    and a3, a2, s10
-; RV32IM-NEXT:    srli a4, a2, 8
-; RV32IM-NEXT:    xor a1, a2, a1
-; RV32IM-NEXT:    slli a3, a3, 8
-; RV32IM-NEXT:    and a2, a4, s10
-; RV32IM-NEXT:    srli a1, a1, 24
-; RV32IM-NEXT:    or a0, a0, a3
-; RV32IM-NEXT:    or a1, a2, a1
-; RV32IM-NEXT:    or a0, a0, a1
-; RV32IM-NEXT:    srli a1, a0, 4
-; RV32IM-NEXT:    lw a2, 80(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    and a0, a0, a2
-; RV32IM-NEXT:    and a1, a1, a2
-; RV32IM-NEXT:    slli a0, a0, 4
-; RV32IM-NEXT:    or a0, a1, a0
-; RV32IM-NEXT:    srli a1, a0, 2
-; RV32IM-NEXT:    lw a2, 84(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    and a0, a0, a2
-; RV32IM-NEXT:    and a1, a1, a2
-; RV32IM-NEXT:    slli a0, a0, 2
-; RV32IM-NEXT:    or a0, a1, a0
-; RV32IM-NEXT:    srli a1, a0, 1
-; RV32IM-NEXT:    lw a2, 88(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    and a0, a0, a2
-; RV32IM-NEXT:    and a1, a1, a2
-; RV32IM-NEXT:    slli a0, a0, 1
-; RV32IM-NEXT:    or a0, a1, a0
-; RV32IM-NEXT:    lw ra, 140(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw s0, 136(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw s1, 132(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw s2, 128(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw s3, 124(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw s4, 120(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw s5, 116(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw s6, 112(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw s7, 108(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw s8, 104(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw s9, 100(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw s10, 96(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw s11, 92(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    addi sp, sp, 144
-; RV32IM-NEXT:    ret
-;
-; RV64IM-LABEL: clmulr_i8:
-; RV64IM:       # %bb.0:
-; RV64IM-NEXT:    addi sp, sp, -448
-; RV64IM-NEXT:    sd ra, 440(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    sd s0, 432(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    sd s1, 424(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    sd s2, 416(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    sd s3, 408(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    sd s4, 400(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    sd s5, 392(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    sd s6, 384(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    sd s7, 376(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    sd s8, 368(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    sd s9, 360(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    sd s10, 352(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    sd s11, 344(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    srli a2, a0, 24
-; RV64IM-NEXT:    srli a6, a0, 8
-; RV64IM-NEXT:    li a3, 255
-; RV64IM-NEXT:    srli a5, a0, 40
-; RV64IM-NEXT:    lui s3, 16
-; RV64IM-NEXT:    srli s0, a0, 56
-; RV64IM-NEXT:    srliw t2, a0, 24
-; RV64IM-NEXT:    slli t0, a0, 56
-; RV64IM-NEXT:    lui t3, 61681
-; RV64IM-NEXT:    lui t4, 209715
-; RV64IM-NEXT:    lui t6, 349525
-; RV64IM-NEXT:    li a7, 1
-; RV64IM-NEXT:    lui s5, 2
-; RV64IM-NEXT:    lui t1, 4
-; RV64IM-NEXT:    lui a4, 128
-; RV64IM-NEXT:    lui s7, 256
-; RV64IM-NEXT:    lui s8, 4096
-; RV64IM-NEXT:    lui s10, 8192
-; RV64IM-NEXT:    lui a1, 4080
-; RV64IM-NEXT:    and a2, a2, a1
-; RV64IM-NEXT:    slli a3, a3, 24
-; RV64IM-NEXT:    sd a3, 336(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    addi s1, s3, -256
-; RV64IM-NEXT:    and t5, a0, a1
-; RV64IM-NEXT:    slli a1, t2, 32
-; RV64IM-NEXT:    addi s9, t3, -241
-; RV64IM-NEXT:    addi t4, t4, 819
-; RV64IM-NEXT:    addi t2, t6, 1365
-; RV64IM-NEXT:    slli t3, a7, 11
-; RV64IM-NEXT:    slli s11, a7, 32
-; RV64IM-NEXT:    slli ra, a7, 33
-; RV64IM-NEXT:    slli t6, a7, 34
-; RV64IM-NEXT:    slli s2, a7, 35
-; RV64IM-NEXT:    slli s4, a7, 36
-; RV64IM-NEXT:    sd s4, 256(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    and a3, a6, a3
-; RV64IM-NEXT:    or a2, a3, a2
-; RV64IM-NEXT:    slli a3, a7, 37
-; RV64IM-NEXT:    sd a3, 248(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    sd s1, 304(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    and a3, a5, s1
-; RV64IM-NEXT:    or a3, a3, s0
-; RV64IM-NEXT:    slli a5, a7, 38
-; RV64IM-NEXT:    sd a5, 232(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    slli t5, t5, 24
-; RV64IM-NEXT:    and a0, a0, s1
-; RV64IM-NEXT:    or a1, t5, a1
-; RV64IM-NEXT:    slli a5, s9, 32
-; RV64IM-NEXT:    add a5, s9, a5
-; RV64IM-NEXT:    slli s0, t4, 32
-; RV64IM-NEXT:    add t4, t4, s0
-; RV64IM-NEXT:    slli s4, t2, 32
-; RV64IM-NEXT:    slli a0, a0, 40
-; RV64IM-NEXT:    add t2, t2, s4
-; RV64IM-NEXT:    or a2, a2, a3
-; RV64IM-NEXT:    or a0, t0, a0
-; RV64IM-NEXT:    or a0, a0, a1
-; RV64IM-NEXT:    or a0, a0, a2
-; RV64IM-NEXT:    srli a1, a0, 4
-; RV64IM-NEXT:    sd a5, 312(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    and a0, a0, a5
-; RV64IM-NEXT:    and a1, a1, a5
-; RV64IM-NEXT:    slli a0, a0, 4
-; RV64IM-NEXT:    or a0, a1, a0
-; RV64IM-NEXT:    srli a1, a0, 2
-; RV64IM-NEXT:    sd t4, 320(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    and a0, a0, t4
-; RV64IM-NEXT:    and a1, a1, t4
-; RV64IM-NEXT:    slli a0, a0, 2
-; RV64IM-NEXT:    or a0, a1, a0
-; RV64IM-NEXT:    srli a1, a0, 1
-; RV64IM-NEXT:    sd t2, 328(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    and a0, a0, t2
-; RV64IM-NEXT:    and a1, a1, t2
-; RV64IM-NEXT:    slli a0, a0, 1
-; RV64IM-NEXT:    or t0, a1, a0
-; RV64IM-NEXT:    andi a0, t0, 2
-; RV64IM-NEXT:    andi a1, t0, 1
-; RV64IM-NEXT:    andi a2, t0, 4
-; RV64IM-NEXT:    andi a3, t0, 8
-; RV64IM-NEXT:    andi a5, t0, 16
-; RV64IM-NEXT:    mul a0, t0, a0
-; RV64IM-NEXT:    mul a1, t0, a1
-; RV64IM-NEXT:    xor a0, a1, a0
-; RV64IM-NEXT:    sd a0, 296(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    andi a0, t0, 32
-; RV64IM-NEXT:    mul a1, t0, a2
-; RV64IM-NEXT:    mul a2, t0, a3
-; RV64IM-NEXT:    xor a1, a1, a2
-; RV64IM-NEXT:    sd a1, 288(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    andi a1, t0, 256
-; RV64IM-NEXT:    mul a2, t0, a5
-; RV64IM-NEXT:    mul a0, t0, a0
-; RV64IM-NEXT:    xor a0, a2, a0
-; RV64IM-NEXT:    sd a0, 280(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    andi a0, t0, 512
-; RV64IM-NEXT:    mul a1, t0, a1
-; RV64IM-NEXT:    mul a0, t0, a0
-; RV64IM-NEXT:    xor a0, a1, a0
-; RV64IM-NEXT:    sd a0, 272(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    slli t4, a7, 39
-; RV64IM-NEXT:    and a0, t0, s5
-; RV64IM-NEXT:    and a1, t0, t1
-; RV64IM-NEXT:    mul a0, t0, a0
-; RV64IM-NEXT:    mul a1, t0, a1
-; RV64IM-NEXT:    xor a0, a0, a1
-; RV64IM-NEXT:    sd a0, 264(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    slli a0, a7, 40
-; RV64IM-NEXT:    and a1, t0, a4
-; RV64IM-NEXT:    and a2, t0, s7
-; RV64IM-NEXT:    mul a1, t0, a1
-; RV64IM-NEXT:    mul a2, t0, a2
-; RV64IM-NEXT:    xor a1, a1, a2
-; RV64IM-NEXT:    sd a1, 240(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    slli a1, a7, 41
-; RV64IM-NEXT:    and a2, t0, s8
-; RV64IM-NEXT:    and a3, t0, s10
-; RV64IM-NEXT:    mul a2, t0, a2
-; RV64IM-NEXT:    mul a3, t0, a3
-; RV64IM-NEXT:    xor a2, a2, a3
-; RV64IM-NEXT:    sd a2, 224(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    slli a2, a7, 48
-; RV64IM-NEXT:    and a3, t0, s11
-; RV64IM-NEXT:    and a4, t0, ra
-; RV64IM-NEXT:    mul a3, t0, a3
-; RV64IM-NEXT:    mul a4, t0, a4
-; RV64IM-NEXT:    xor a3, a3, a4
-; RV64IM-NEXT:    sd a3, 216(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    slli a3, a7, 49
-; RV64IM-NEXT:    and a0, t0, a0
-; RV64IM-NEXT:    and a1, t0, a1
-; RV64IM-NEXT:    mul a0, t0, a0
-; RV64IM-NEXT:    mul a1, t0, a1
-; RV64IM-NEXT:    xor a0, a0, a1
-; RV64IM-NEXT:    sd a0, 208(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    slli a0, a7, 56
-; RV64IM-NEXT:    and a1, t0, a2
-; RV64IM-NEXT:    and a2, t0, a3
-; RV64IM-NEXT:    mul a1, t0, a1
-; RV64IM-NEXT:    mul a2, t0, a2
-; RV64IM-NEXT:    xor a1, a1, a2
-; RV64IM-NEXT:    sd a1, 200(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    slli a1, a7, 57
-; RV64IM-NEXT:    and a0, t0, a0
-; RV64IM-NEXT:    and a1, t0, a1
-; RV64IM-NEXT:    mul a0, t0, a0
-; RV64IM-NEXT:    mul a1, t0, a1
-; RV64IM-NEXT:    xor a0, a0, a1
-; RV64IM-NEXT:    sd a0, 192(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    slli a2, a7, 42
-; RV64IM-NEXT:    slli ra, a7, 43
-; RV64IM-NEXT:    slli a3, a7, 44
-; RV64IM-NEXT:    slli a4, a7, 45
-; RV64IM-NEXT:    slli t5, a7, 46
-; RV64IM-NEXT:    slli s0, a7, 47
-; RV64IM-NEXT:    slli s1, a7, 50
-; RV64IM-NEXT:    slli a0, a7, 51
-; RV64IM-NEXT:    sd a0, 184(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    slli a0, a7, 52
-; RV64IM-NEXT:    sd a0, 176(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    slli a0, a7, 53
-; RV64IM-NEXT:    sd a0, 168(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    slli a0, a7, 54
-; RV64IM-NEXT:    sd a0, 160(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    slli a0, a7, 55
-; RV64IM-NEXT:    sd a0, 152(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    slli a0, a7, 58
-; RV64IM-NEXT:    sd a0, 144(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    slli a0, a7, 59
-; RV64IM-NEXT:    sd a0, 136(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    slli a0, a7, 60
-; RV64IM-NEXT:    sd a0, 120(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    slli a0, a7, 61
-; RV64IM-NEXT:    sd a0, 80(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    slli a7, a7, 62
-; RV64IM-NEXT:    sd a7, 48(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    and a0, t0, t3
-; RV64IM-NEXT:    sd a0, 128(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    lui s7, 1
-; RV64IM-NEXT:    and a0, t0, s7
-; RV64IM-NEXT:    sd a0, 112(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    lui s8, 8
-; RV64IM-NEXT:    and a0, t0, s8
-; RV64IM-NEXT:    sd a0, 104(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    and a0, t0, s3
-; RV64IM-NEXT:    sd a0, 96(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    lui s6, 32
-; RV64IM-NEXT:    and a0, t0, s6
-; RV64IM-NEXT:    sd a0, 88(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    lui s10, 64
-; RV64IM-NEXT:    and a0, t0, s10
-; RV64IM-NEXT:    sd a0, 72(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    lui s11, 512
-; RV64IM-NEXT:    and a0, t0, s11
-; RV64IM-NEXT:    sd a0, 64(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    lui s4, 1024
-; RV64IM-NEXT:    and a0, t0, s4
-; RV64IM-NEXT:    sd a0, 56(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    lui s5, 2048
-; RV64IM-NEXT:    and a0, t0, s5
-; RV64IM-NEXT:    sd a0, 40(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    lui s9, 16384
-; RV64IM-NEXT:    and a0, t0, s9
-; RV64IM-NEXT:    sd a0, 32(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    lui a5, 32768
-; RV64IM-NEXT:    and a5, t0, a5
-; RV64IM-NEXT:    lui a6, 65536
-; RV64IM-NEXT:    and a6, t0, a6
-; RV64IM-NEXT:    lui t1, 131072
-; RV64IM-NEXT:    and t1, t0, t1
-; RV64IM-NEXT:    lui t2, 262144
-; RV64IM-NEXT:    and t2, t0, t2
-; RV64IM-NEXT:    and a0, t0, t6
-; RV64IM-NEXT:    sd a0, 24(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    and a0, t0, s2
-; RV64IM-NEXT:    sd a0, 16(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    ld a0, 256(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    and a0, t0, a0
-; RV64IM-NEXT:    sd a0, 8(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    ld a0, 248(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    and a0, t0, a0
-; RV64IM-NEXT:    sd a0, 0(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    ld a0, 232(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    and a0, t0, a0
-; RV64IM-NEXT:    and a1, t0, t4
-; RV64IM-NEXT:    and a7, t0, a2
-; RV64IM-NEXT:    and ra, t0, ra
-; RV64IM-NEXT:    and t3, t0, a3
-; RV64IM-NEXT:    and t4, t0, a4
-; RV64IM-NEXT:    and t5, t0, t5
-; RV64IM-NEXT:    and t6, t0, s0
-; RV64IM-NEXT:    and s0, t0, s1
-; RV64IM-NEXT:    ld a2, 184(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    and s1, t0, a2
-; RV64IM-NEXT:    ld a2, 176(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    and s2, t0, a2
-; RV64IM-NEXT:    ld a2, 168(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    and s3, t0, a2
-; RV64IM-NEXT:    ld a2, 160(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    and s4, t0, a2
-; RV64IM-NEXT:    ld a2, 152(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    and s5, t0, a2
-; RV64IM-NEXT:    ld a2, 144(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    and s6, t0, a2
-; RV64IM-NEXT:    ld a2, 136(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    and s7, t0, a2
-; RV64IM-NEXT:    ld a2, 120(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    and s8, t0, a2
-; RV64IM-NEXT:    ld a2, 80(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    and s9, t0, a2
-; RV64IM-NEXT:    ld a2, 48(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    and s10, t0, a2
-; RV64IM-NEXT:    andi s11, t0, 64
-; RV64IM-NEXT:    mul a2, t0, s11
-; RV64IM-NEXT:    sd a2, 80(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    andi s11, t0, 128
-; RV64IM-NEXT:    mul a2, t0, s11
-; RV64IM-NEXT:    sd a2, 232(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    andi s11, t0, 1024
-; RV64IM-NEXT:    mul a2, t0, s11
-; RV64IM-NEXT:    sd a2, 48(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    ld a2, 128(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    mul a2, t0, a2
-; RV64IM-NEXT:    sd a2, 120(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    ld a2, 112(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    mul a2, t0, a2
-; RV64IM-NEXT:    sd a2, 176(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    ld a2, 104(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    mul s11, t0, a2
-; RV64IM-NEXT:    ld a2, 96(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    mul a2, t0, a2
-; RV64IM-NEXT:    sd a2, 104(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    ld a2, 88(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    mul a2, t0, a2
-; RV64IM-NEXT:    sd a2, 168(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    ld a2, 72(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    mul a2, t0, a2
-; RV64IM-NEXT:    sd a2, 256(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    ld a2, 64(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    mul a4, t0, a2
-; RV64IM-NEXT:    ld a2, 56(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    mul a2, t0, a2
-; RV64IM-NEXT:    sd a2, 96(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    ld a2, 40(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    mul a2, t0, a2
-; RV64IM-NEXT:    sd a2, 136(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    ld a2, 32(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    mul a3, t0, a2
-; RV64IM-NEXT:    mul a2, t0, a5
-; RV64IM-NEXT:    sd a2, 88(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    mul a2, t0, a6
-; RV64IM-NEXT:    sd a2, 128(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    mul a2, t0, t1
-; RV64IM-NEXT:    sd a2, 160(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    mul a2, t0, t2
-; RV64IM-NEXT:    sd a2, 248(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    srliw t2, t0, 31
-; RV64IM-NEXT:    slli t2, t2, 31
-; RV64IM-NEXT:    ld a2, 24(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    mul a2, t0, a2
-; RV64IM-NEXT:    ld a5, 16(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    mul a5, t0, a5
-; RV64IM-NEXT:    ld a6, 8(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    mul t1, t0, a6
-; RV64IM-NEXT:    ld a6, 0(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    mul a6, t0, a6
-; RV64IM-NEXT:    sd a6, 112(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    mul a0, t0, a0
-; RV64IM-NEXT:    sd a0, 152(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    mul a0, t0, a1
-; RV64IM-NEXT:    sd a0, 184(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    mul a7, t0, a7
-; RV64IM-NEXT:    mul ra, t0, ra
-; RV64IM-NEXT:    mul a6, t0, t3
-; RV64IM-NEXT:    mul t4, t0, t4
-; RV64IM-NEXT:    mul t5, t0, t5
-; RV64IM-NEXT:    mul a0, t0, t6
-; RV64IM-NEXT:    sd a0, 144(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    mul t6, t0, s0
-; RV64IM-NEXT:    mul s0, t0, s1
-; RV64IM-NEXT:    mul s1, t0, s2
-; RV64IM-NEXT:    mul s2, t0, s3
-; RV64IM-NEXT:    mul s3, t0, s4
-; RV64IM-NEXT:    mul s4, t0, s5
-; RV64IM-NEXT:    mul s5, t0, s6
-; RV64IM-NEXT:    mul s6, t0, s7
-; RV64IM-NEXT:    mul s7, t0, s8
-; RV64IM-NEXT:    mul s8, t0, s9
-; RV64IM-NEXT:    mul s9, t0, s10
-; RV64IM-NEXT:    srli s10, t0, 63
-; RV64IM-NEXT:    slli s10, s10, 63
-; RV64IM-NEXT:    mul t2, t0, t2
-; RV64IM-NEXT:    mul t0, t0, s10
-; RV64IM-NEXT:    ld a0, 296(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    ld a1, 288(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor s10, a0, a1
-; RV64IM-NEXT:    ld a0, 280(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    ld a1, 80(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor a0, a0, a1
-; RV64IM-NEXT:    ld a1, 272(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    ld t3, 48(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor a1, a1, t3
-; RV64IM-NEXT:    ld t3, 264(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor s11, t3, s11
-; RV64IM-NEXT:    ld t3, 240(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor a4, t3, a4
-; RV64IM-NEXT:    ld t3, 224(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor a3, t3, a3
-; RV64IM-NEXT:    ld t3, 216(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor a2, t3, a2
-; RV64IM-NEXT:    ld t3, 208(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor a7, t3, a7
-; RV64IM-NEXT:    ld t3, 200(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor t6, t3, t6
-; RV64IM-NEXT:    ld t3, 192(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor s5, t3, s5
-; RV64IM-NEXT:    xor a0, s10, a0
-; RV64IM-NEXT:    ld t3, 120(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor a1, a1, t3
-; RV64IM-NEXT:    ld t3, 104(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor s10, s11, t3
-; RV64IM-NEXT:    ld t3, 96(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor a4, a4, t3
-; RV64IM-NEXT:    ld t3, 88(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor a3, a3, t3
-; RV64IM-NEXT:    xor a2, a2, a5
-; RV64IM-NEXT:    xor a5, a7, ra
-; RV64IM-NEXT:    xor a7, t6, s0
-; RV64IM-NEXT:    xor t6, s5, s6
-; RV64IM-NEXT:    ld t3, 232(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor a0, a0, t3
-; RV64IM-NEXT:    ld t3, 176(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor a1, a1, t3
-; RV64IM-NEXT:    ld t3, 168(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor s0, s10, t3
-; RV64IM-NEXT:    ld t3, 136(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor a4, a4, t3
-; RV64IM-NEXT:    ld t3, 128(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor a3, a3, t3
-; RV64IM-NEXT:    xor a2, a2, t1
-; RV64IM-NEXT:    xor a5, a5, a6
-; RV64IM-NEXT:    xor a6, a7, s1
-; RV64IM-NEXT:    xor a7, t6, s7
-; RV64IM-NEXT:    ld t1, 256(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor t1, s0, t1
-; RV64IM-NEXT:    ld t3, 160(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor a3, a3, t3
-; RV64IM-NEXT:    ld t3, 112(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor a2, a2, t3
-; RV64IM-NEXT:    xor a5, a5, t4
-; RV64IM-NEXT:    xor a6, a6, s2
-; RV64IM-NEXT:    xor a7, a7, s8
-; RV64IM-NEXT:    xor a1, a0, a1
-; RV64IM-NEXT:    xor a1, a1, t1
-; RV64IM-NEXT:    ld t1, 248(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor a3, a3, t1
-; RV64IM-NEXT:    ld t1, 152(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor a2, a2, t1
-; RV64IM-NEXT:    xor a5, a5, t5
-; RV64IM-NEXT:    xor a6, a6, s3
-; RV64IM-NEXT:    xor a7, a7, s9
-; RV64IM-NEXT:    xor a1, a1, a4
-; RV64IM-NEXT:    xor a3, a3, t2
-; RV64IM-NEXT:    ld a4, 184(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor a2, a2, a4
-; RV64IM-NEXT:    ld a4, 144(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor a4, a5, a4
-; RV64IM-NEXT:    xor a5, a6, s4
-; RV64IM-NEXT:    slli a0, a0, 56
-; RV64IM-NEXT:    xor a6, a7, t0
-; RV64IM-NEXT:    ld t0, 304(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    and a7, a1, t0
-; RV64IM-NEXT:    xor a1, a1, a3
-; RV64IM-NEXT:    slli a7, a7, 40
-; RV64IM-NEXT:    xor a1, a1, a2
-; RV64IM-NEXT:    or a0, a0, a7
-; RV64IM-NEXT:    lui a7, 4080
-; RV64IM-NEXT:    and a2, a1, a7
-; RV64IM-NEXT:    xor a4, a1, a4
-; RV64IM-NEXT:    srli a1, a1, 8
-; RV64IM-NEXT:    slli a2, a2, 24
-; RV64IM-NEXT:    xor a5, a4, a5
-; RV64IM-NEXT:    ld a3, 336(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    and a1, a1, a3
-; RV64IM-NEXT:    srli a4, a4, 24
-; RV64IM-NEXT:    srliw a3, a5, 24
-; RV64IM-NEXT:    and a4, a4, a7
-; RV64IM-NEXT:    srli a7, a5, 40
-; RV64IM-NEXT:    xor a5, a5, a6
-; RV64IM-NEXT:    slli a3, a3, 32
-; RV64IM-NEXT:    or a1, a1, a4
-; RV64IM-NEXT:    and a4, a7, t0
-; RV64IM-NEXT:    srli a5, a5, 56
-; RV64IM-NEXT:    or a2, a2, a3
-; RV64IM-NEXT:    or a4, a4, a5
-; RV64IM-NEXT:    or a0, a0, a2
-; RV64IM-NEXT:    or a1, a1, a4
-; RV64IM-NEXT:    or a0, a0, a1
-; RV64IM-NEXT:    srli a1, a0, 4
-; RV64IM-NEXT:    ld a2, 312(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    and a0, a0, a2
-; RV64IM-NEXT:    and a1, a1, a2
-; RV64IM-NEXT:    slli a0, a0, 4
-; RV64IM-NEXT:    or a0, a1, a0
-; RV64IM-NEXT:    srli a1, a0, 2
-; RV64IM-NEXT:    ld a2, 320(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    and a0, a0, a2
-; RV64IM-NEXT:    and a1, a1, a2
-; RV64IM-NEXT:    slli a0, a0, 2
-; RV64IM-NEXT:    or a0, a1, a0
-; RV64IM-NEXT:    srli a1, a0, 1
-; RV64IM-NEXT:    ld a2, 328(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    and a0, a0, a2
-; RV64IM-NEXT:    and a1, a1, a2
-; RV64IM-NEXT:    slli a0, a0, 1
-; RV64IM-NEXT:    or a0, a1, a0
-; RV64IM-NEXT:    ld ra, 440(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    ld s0, 432(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    ld s1, 424(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    ld s2, 416(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    ld s3, 408(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    ld s4, 400(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    ld s5, 392(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    ld s6, 384(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    ld s7, 376(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    ld s8, 368(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    ld s9, 360(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    ld s10, 352(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    ld s11, 344(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    addi sp, sp, 448
-; RV64IM-NEXT:    ret
-  %res = call i8 @llvm.clmulr.i8(i8 %a, i8 %b)
-  ret i8 %res
-}
-
-define i16 @clmulr_i16(i16 %a, i16 %b) nounwind {
-; RV32IM-LABEL: clmulr_i16:
-; RV32IM:       # %bb.0:
-; RV32IM-NEXT:    addi sp, sp, -144
-; RV32IM-NEXT:    sw ra, 140(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    sw s0, 136(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    sw s1, 132(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    sw s2, 128(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    sw s3, 124(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    sw s4, 120(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    sw s5, 116(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    sw s6, 112(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    sw s7, 108(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    sw s8, 104(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    sw s9, 100(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    sw s10, 96(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    sw s11, 92(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    srli a3, a0, 8
-; RV32IM-NEXT:    lui s9, 16
-; RV32IM-NEXT:    srli a4, a0, 24
-; RV32IM-NEXT:    slli a2, a0, 24
-; RV32IM-NEXT:    lui a7, 61681
-; RV32IM-NEXT:    lui ra, 209715
-; RV32IM-NEXT:    lui a1, 349525
-; RV32IM-NEXT:    li s0, 1
-; RV32IM-NEXT:    lui t1, 1
-; RV32IM-NEXT:    lui t2, 2
-; RV32IM-NEXT:    lui t3, 4
-; RV32IM-NEXT:    lui t4, 8
-; RV32IM-NEXT:    lui t0, 32
-; RV32IM-NEXT:    lui a6, 64
-; RV32IM-NEXT:    lui a5, 128
-; RV32IM-NEXT:    lui s1, 256
-; RV32IM-NEXT:    lui t5, 512
-; RV32IM-NEXT:    lui t6, 1024
-; RV32IM-NEXT:    lui s4, 2048
-; RV32IM-NEXT:    lui s2, 4096
-; RV32IM-NEXT:    lui s3, 8192
-; RV32IM-NEXT:    lui s7, 16384
-; RV32IM-NEXT:    lui s5, 32768
-; RV32IM-NEXT:    lui s6, 65536
-; RV32IM-NEXT:    lui s11, 131072
-; RV32IM-NEXT:    lui s8, 262144
-; RV32IM-NEXT:    addi s10, s9, -256
-; RV32IM-NEXT:    and a3, a3, s10
-; RV32IM-NEXT:    or a3, a3, a4
-; RV32IM-NEXT:    addi a7, a7, -241
-; RV32IM-NEXT:    sw a7, 80(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    addi a4, ra, 819
-; RV32IM-NEXT:    sw a4, 84(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    addi a1, a1, 1365
-; RV32IM-NEXT:    sw a1, 88(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    slli s0, s0, 11
-; RV32IM-NEXT:    and a0, a0, s10
-; RV32IM-NEXT:    slli a0, a0, 8
-; RV32IM-NEXT:    or a0, a2, a0
-; RV32IM-NEXT:    or a0, a0, a3
-; RV32IM-NEXT:    srli a2, a0, 4
-; RV32IM-NEXT:    and a0, a0, a7
-; RV32IM-NEXT:    and a2, a2, a7
-; RV32IM-NEXT:    slli a0, a0, 4
-; RV32IM-NEXT:    or a0, a2, a0
-; RV32IM-NEXT:    srli a2, a0, 2
-; RV32IM-NEXT:    and a0, a0, a4
-; RV32IM-NEXT:    and a2, a2, a4
-; RV32IM-NEXT:    slli a0, a0, 2
-; RV32IM-NEXT:    or a0, a2, a0
-; RV32IM-NEXT:    srli a2, a0, 1
-; RV32IM-NEXT:    and a0, a0, a1
-; RV32IM-NEXT:    and a2, a2, a1
-; RV32IM-NEXT:    slli a0, a0, 1
-; RV32IM-NEXT:    or a3, a2, a0
-; RV32IM-NEXT:    andi a0, a3, 2
-; RV32IM-NEXT:    andi a1, a3, 1
-; RV32IM-NEXT:    and a4, a3, s0
-; RV32IM-NEXT:    and a7, a3, t1
-; RV32IM-NEXT:    and s0, a3, t2
-; RV32IM-NEXT:    and ra, a3, t3
-; RV32IM-NEXT:    and a2, a3, t4
-; RV32IM-NEXT:    sw a2, 68(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    and a2, a3, s9
-; RV32IM-NEXT:    sw a2, 64(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    and a2, a3, t0
-; RV32IM-NEXT:    sw a2, 60(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    and a6, a3, a6
-; RV32IM-NEXT:    and a5, a3, a5
-; RV32IM-NEXT:    and s1, a3, s1
-; RV32IM-NEXT:    sw s1, 56(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    and a2, a3, t5
-; RV32IM-NEXT:    sw a2, 52(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    and t6, a3, t6
-; RV32IM-NEXT:    and a2, a3, s4
-; RV32IM-NEXT:    sw a2, 48(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    and s2, a3, s2
-; RV32IM-NEXT:    and a2, a3, s3
-; RV32IM-NEXT:    sw a2, 44(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    and a2, a3, s7
-; RV32IM-NEXT:    sw a2, 40(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    and a2, a3, s5
-; RV32IM-NEXT:    sw a2, 36(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    and a2, a3, s6
-; RV32IM-NEXT:    sw a2, 32(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    and a2, a3, s11
-; RV32IM-NEXT:    sw a2, 28(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    and a2, a3, s8
-; RV32IM-NEXT:    sw a2, 24(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    lui a2, 524288
-; RV32IM-NEXT:    and a2, a3, a2
-; RV32IM-NEXT:    sw a2, 20(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    mul a0, a3, a0
-; RV32IM-NEXT:    sw a0, 72(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    mul a0, a3, a1
-; RV32IM-NEXT:    sw a0, 16(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    andi a0, a3, 4
-; RV32IM-NEXT:    mul a0, a3, a0
-; RV32IM-NEXT:    sw a0, 12(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    andi a1, a3, 8
-; RV32IM-NEXT:    mul a0, a3, a1
-; RV32IM-NEXT:    sw a0, 0(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    andi a2, a3, 16
-; RV32IM-NEXT:    mul s9, a3, a2
-; RV32IM-NEXT:    andi t0, a3, 32
-; RV32IM-NEXT:    mul s6, a3, t0
-; RV32IM-NEXT:    andi t1, a3, 64
-; RV32IM-NEXT:    mul a0, a3, t1
-; RV32IM-NEXT:    sw a0, 4(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    andi t2, a3, 128
-; RV32IM-NEXT:    mul a0, a3, t2
-; RV32IM-NEXT:    sw a0, 76(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    andi t2, a3, 256
-; RV32IM-NEXT:    mul s1, a3, t2
-; RV32IM-NEXT:    andi t3, a3, 512
-; RV32IM-NEXT:    mul t5, a3, t3
-; RV32IM-NEXT:    andi t4, a3, 1024
-; RV32IM-NEXT:    mul s5, a3, t4
-; RV32IM-NEXT:    mul s8, a3, a4
-; RV32IM-NEXT:    mul a0, a3, a7
-; RV32IM-NEXT:    sw a0, 8(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    mul t2, a3, s0
-; RV32IM-NEXT:    mul a7, a3, ra
-; RV32IM-NEXT:    lw a0, 68(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mul s0, a3, a0
-; RV32IM-NEXT:    lw a0, 64(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mul s4, a3, a0
-; RV32IM-NEXT:    lw a0, 60(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mul s7, a3, a0
-; RV32IM-NEXT:    mul a0, a3, a6
-; RV32IM-NEXT:    sw a0, 68(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    mul a6, a3, a5
-; RV32IM-NEXT:    lw a0, 56(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mul a4, a3, a0
-; RV32IM-NEXT:    lw a0, 52(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mul t1, a3, a0
-; RV32IM-NEXT:    mul t4, a3, t6
-; RV32IM-NEXT:    lw a0, 48(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mul s3, a3, a0
-; RV32IM-NEXT:    mul a2, a3, s2
-; RV32IM-NEXT:    lw a0, 44(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mul a1, a3, a0
-; RV32IM-NEXT:    lw a0, 40(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mul a5, a3, a0
-; RV32IM-NEXT:    lw a0, 36(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mul t0, a3, a0
-; RV32IM-NEXT:    lw a0, 32(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mul t3, a3, a0
-; RV32IM-NEXT:    lw a0, 28(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mul t6, a3, a0
-; RV32IM-NEXT:    lw a0, 24(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mul s2, a3, a0
-; RV32IM-NEXT:    lw a0, 20(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mul a3, a3, a0
-; RV32IM-NEXT:    lw a0, 72(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw s11, 16(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    xor a0, s11, a0
-; RV32IM-NEXT:    lw s11, 12(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw ra, 0(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    xor s11, s11, ra
-; RV32IM-NEXT:    xor s6, s9, s6
-; RV32IM-NEXT:    xor t5, s1, t5
-; RV32IM-NEXT:    xor a7, t2, a7
-; RV32IM-NEXT:    xor a4, a6, a4
-; RV32IM-NEXT:    xor a1, a2, a1
-; RV32IM-NEXT:    xor a0, a0, s11
-; RV32IM-NEXT:    lw a2, 4(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    xor a2, s6, a2
-; RV32IM-NEXT:    xor a6, t5, s5
-; RV32IM-NEXT:    xor a7, a7, s0
-; RV32IM-NEXT:    xor a4, a4, t1
-; RV32IM-NEXT:    xor a1, a1, a5
-; RV32IM-NEXT:    xor a0, a0, a2
-; RV32IM-NEXT:    xor a2, a6, s8
-; RV32IM-NEXT:    xor a5, a7, s4
-; RV32IM-NEXT:    xor a4, a4, t4
-; RV32IM-NEXT:    xor a1, a1, t0
-; RV32IM-NEXT:    lw a6, 76(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    xor a0, a0, a6
-; RV32IM-NEXT:    lw a6, 8(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    xor a2, a2, a6
-; RV32IM-NEXT:    xor a5, a5, s7
-; RV32IM-NEXT:    xor a4, a4, s3
-; RV32IM-NEXT:    xor a1, a1, t3
-; RV32IM-NEXT:    lw a6, 68(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    xor a5, a5, a6
-; RV32IM-NEXT:    xor a1, a1, t6
-; RV32IM-NEXT:    xor a2, a0, a2
-; RV32IM-NEXT:    xor a2, a2, a5
-; RV32IM-NEXT:    slli a0, a0, 24
-; RV32IM-NEXT:    xor a1, a1, s2
-; RV32IM-NEXT:    xor a2, a2, a4
-; RV32IM-NEXT:    xor a1, a1, a3
-; RV32IM-NEXT:    and a3, a2, s10
-; RV32IM-NEXT:    srli a4, a2, 8
-; RV32IM-NEXT:    xor a1, a2, a1
-; RV32IM-NEXT:    slli a3, a3, 8
-; RV32IM-NEXT:    and a2, a4, s10
-; RV32IM-NEXT:    srli a1, a1, 24
-; RV32IM-NEXT:    or a0, a0, a3
-; RV32IM-NEXT:    or a1, a2, a1
-; RV32IM-NEXT:    or a0, a0, a1
-; RV32IM-NEXT:    srli a1, a0, 4
-; RV32IM-NEXT:    lw a2, 80(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    and a0, a0, a2
-; RV32IM-NEXT:    and a1, a1, a2
-; RV32IM-NEXT:    slli a0, a0, 4
-; RV32IM-NEXT:    or a0, a1, a0
-; RV32IM-NEXT:    srli a1, a0, 2
-; RV32IM-NEXT:    lw a2, 84(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    and a0, a0, a2
-; RV32IM-NEXT:    and a1, a1, a2
-; RV32IM-NEXT:    slli a0, a0, 2
-; RV32IM-NEXT:    or a0, a1, a0
-; RV32IM-NEXT:    srli a1, a0, 1
-; RV32IM-NEXT:    lw a2, 88(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    and a0, a0, a2
-; RV32IM-NEXT:    and a1, a1, a2
-; RV32IM-NEXT:    slli a0, a0, 1
-; RV32IM-NEXT:    or a0, a1, a0
-; RV32IM-NEXT:    lw ra, 140(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw s0, 136(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw s1, 132(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw s2, 128(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw s3, 124(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw s4, 120(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw s5, 116(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw s6, 112(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw s7, 108(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw s8, 104(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw s9, 100(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw s10, 96(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw s11, 92(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    addi sp, sp, 144
-; RV32IM-NEXT:    ret
-;
-; RV64IM-LABEL: clmulr_i16:
-; RV64IM:       # %bb.0:
-; RV64IM-NEXT:    addi sp, sp, -448
-; RV64IM-NEXT:    sd ra, 440(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    sd s0, 432(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    sd s1, 424(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    sd s2, 416(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    sd s3, 408(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    sd s4, 400(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    sd s5, 392(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    sd s6, 384(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    sd s7, 376(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    sd s8, 368(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    sd s9, 360(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    sd s10, 352(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    sd s11, 344(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    srli a2, a0, 24
-; RV64IM-NEXT:    srli a6, a0, 8
-; RV64IM-NEXT:    li a3, 255
-; RV64IM-NEXT:    srli a5, a0, 40
-; RV64IM-NEXT:    lui s3, 16
-; RV64IM-NEXT:    srli s0, a0, 56
-; RV64IM-NEXT:    srliw t2, a0, 24
-; RV64IM-NEXT:    slli t0, a0, 56
-; RV64IM-NEXT:    lui t3, 61681
-; RV64IM-NEXT:    lui t4, 209715
-; RV64IM-NEXT:    lui t6, 349525
-; RV64IM-NEXT:    li a7, 1
-; RV64IM-NEXT:    lui s5, 2
-; RV64IM-NEXT:    lui t1, 4
-; RV64IM-NEXT:    lui a4, 128
-; RV64IM-NEXT:    lui s7, 256
-; RV64IM-NEXT:    lui s8, 4096
-; RV64IM-NEXT:    lui s10, 8192
-; RV64IM-NEXT:    lui a1, 4080
-; RV64IM-NEXT:    and a2, a2, a1
-; RV64IM-NEXT:    slli a3, a3, 24
-; RV64IM-NEXT:    sd a3, 336(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    addi s1, s3, -256
-; RV64IM-NEXT:    and t5, a0, a1
-; RV64IM-NEXT:    slli a1, t2, 32
-; RV64IM-NEXT:    addi s9, t3, -241
-; RV64IM-NEXT:    addi t4, t4, 819
-; RV64IM-NEXT:    addi t2, t6, 1365
-; RV64IM-NEXT:    slli t3, a7, 11
-; RV64IM-NEXT:    slli s11, a7, 32
-; RV64IM-NEXT:    slli ra, a7, 33
-; RV64IM-NEXT:    slli t6, a7, 34
-; RV64IM-NEXT:    slli s2, a7, 35
-; RV64IM-NEXT:    slli s4, a7, 36
-; RV64IM-NEXT:    sd s4, 256(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    and a3, a6, a3
-; RV64IM-NEXT:    or a2, a3, a2
-; RV64IM-NEXT:    slli a3, a7, 37
-; RV64IM-NEXT:    sd a3, 248(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    sd s1, 304(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    and a3, a5, s1
-; RV64IM-NEXT:    or a3, a3, s0
-; RV64IM-NEXT:    slli a5, a7, 38
-; RV64IM-NEXT:    sd a5, 232(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    slli t5, t5, 24
-; RV64IM-NEXT:    and a0, a0, s1
-; RV64IM-NEXT:    or a1, t5, a1
-; RV64IM-NEXT:    slli a5, s9, 32
-; RV64IM-NEXT:    add a5, s9, a5
-; RV64IM-NEXT:    slli s0, t4, 32
-; RV64IM-NEXT:    add t4, t4, s0
-; RV64IM-NEXT:    slli s4, t2, 32
-; RV64IM-NEXT:    slli a0, a0, 40
-; RV64IM-NEXT:    add t2, t2, s4
-; RV64IM-NEXT:    or a2, a2, a3
-; RV64IM-NEXT:    or a0, t0, a0
-; RV64IM-NEXT:    or a0, a0, a1
-; RV64IM-NEXT:    or a0, a0, a2
-; RV64IM-NEXT:    srli a1, a0, 4
-; RV64IM-NEXT:    sd a5, 312(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    and a0, a0, a5
-; RV64IM-NEXT:    and a1, a1, a5
-; RV64IM-NEXT:    slli a0, a0, 4
-; RV64IM-NEXT:    or a0, a1, a0
-; RV64IM-NEXT:    srli a1, a0, 2
-; RV64IM-NEXT:    sd t4, 320(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    and a0, a0, t4
-; RV64IM-NEXT:    and a1, a1, t4
-; RV64IM-NEXT:    slli a0, a0, 2
-; RV64IM-NEXT:    or a0, a1, a0
-; RV64IM-NEXT:    srli a1, a0, 1
-; RV64IM-NEXT:    sd t2, 328(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    and a0, a0, t2
-; RV64IM-NEXT:    and a1, a1, t2
-; RV64IM-NEXT:    slli a0, a0, 1
-; RV64IM-NEXT:    or t0, a1, a0
-; RV64IM-NEXT:    andi a0, t0, 2
-; RV64IM-NEXT:    andi a1, t0, 1
-; RV64IM-NEXT:    andi a2, t0, 4
-; RV64IM-NEXT:    andi a3, t0, 8
-; RV64IM-NEXT:    andi a5, t0, 16
-; RV64IM-NEXT:    mul a0, t0, a0
-; RV64IM-NEXT:    mul a1, t0, a1
-; RV64IM-NEXT:    xor a0, a1, a0
-; RV64IM-NEXT:    sd a0, 296(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    andi a0, t0, 32
-; RV64IM-NEXT:    mul a1, t0, a2
-; RV64IM-NEXT:    mul a2, t0, a3
-; RV64IM-NEXT:    xor a1, a1, a2
-; RV64IM-NEXT:    sd a1, 288(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    andi a1, t0, 256
-; RV64IM-NEXT:    mul a2, t0, a5
-; RV64IM-NEXT:    mul a0, t0, a0
-; RV64IM-NEXT:    xor a0, a2, a0
-; RV64IM-NEXT:    sd a0, 280(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    andi a0, t0, 512
-; RV64IM-NEXT:    mul a1, t0, a1
-; RV64IM-NEXT:    mul a0, t0, a0
-; RV64IM-NEXT:    xor a0, a1, a0
-; RV64IM-NEXT:    sd a0, 272(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    slli t4, a7, 39
-; RV64IM-NEXT:    and a0, t0, s5
-; RV64IM-NEXT:    and a1, t0, t1
-; RV64IM-NEXT:    mul a0, t0, a0
-; RV64IM-NEXT:    mul a1, t0, a1
-; RV64IM-NEXT:    xor a0, a0, a1
-; RV64IM-NEXT:    sd a0, 264(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    slli a0, a7, 40
-; RV64IM-NEXT:    and a1, t0, a4
-; RV64IM-NEXT:    and a2, t0, s7
-; RV64IM-NEXT:    mul a1, t0, a1
-; RV64IM-NEXT:    mul a2, t0, a2
-; RV64IM-NEXT:    xor a1, a1, a2
-; RV64IM-NEXT:    sd a1, 240(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    slli a1, a7, 41
-; RV64IM-NEXT:    and a2, t0, s8
-; RV64IM-NEXT:    and a3, t0, s10
-; RV64IM-NEXT:    mul a2, t0, a2
-; RV64IM-NEXT:    mul a3, t0, a3
-; RV64IM-NEXT:    xor a2, a2, a3
-; RV64IM-NEXT:    sd a2, 224(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    slli a2, a7, 48
-; RV64IM-NEXT:    and a3, t0, s11
-; RV64IM-NEXT:    and a4, t0, ra
-; RV64IM-NEXT:    mul a3, t0, a3
-; RV64IM-NEXT:    mul a4, t0, a4
-; RV64IM-NEXT:    xor a3, a3, a4
-; RV64IM-NEXT:    sd a3, 216(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    slli a3, a7, 49
-; RV64IM-NEXT:    and a0, t0, a0
-; RV64IM-NEXT:    and a1, t0, a1
-; RV64IM-NEXT:    mul a0, t0, a0
-; RV64IM-NEXT:    mul a1, t0, a1
-; RV64IM-NEXT:    xor a0, a0, a1
-; RV64IM-NEXT:    sd a0, 208(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    slli a0, a7, 56
-; RV64IM-NEXT:    and a1, t0, a2
-; RV64IM-NEXT:    and a2, t0, a3
-; RV64IM-NEXT:    mul a1, t0, a1
-; RV64IM-NEXT:    mul a2, t0, a2
-; RV64IM-NEXT:    xor a1, a1, a2
-; RV64IM-NEXT:    sd a1, 200(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    slli a1, a7, 57
-; RV64IM-NEXT:    and a0, t0, a0
-; RV64IM-NEXT:    and a1, t0, a1
-; RV64IM-NEXT:    mul a0, t0, a0
-; RV64IM-NEXT:    mul a1, t0, a1
-; RV64IM-NEXT:    xor a0, a0, a1
-; RV64IM-NEXT:    sd a0, 192(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    slli a2, a7, 42
-; RV64IM-NEXT:    slli ra, a7, 43
-; RV64IM-NEXT:    slli a3, a7, 44
-; RV64IM-NEXT:    slli a4, a7, 45
-; RV64IM-NEXT:    slli t5, a7, 46
-; RV64IM-NEXT:    slli s0, a7, 47
-; RV64IM-NEXT:    slli s1, a7, 50
-; RV64IM-NEXT:    slli a0, a7, 51
-; RV64IM-NEXT:    sd a0, 184(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    slli a0, a7, 52
-; RV64IM-NEXT:    sd a0, 176(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    slli a0, a7, 53
-; RV64IM-NEXT:    sd a0, 168(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    slli a0, a7, 54
-; RV64IM-NEXT:    sd a0, 160(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    slli a0, a7, 55
-; RV64IM-NEXT:    sd a0, 152(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    slli a0, a7, 58
-; RV64IM-NEXT:    sd a0, 144(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    slli a0, a7, 59
-; RV64IM-NEXT:    sd a0, 136(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    slli a0, a7, 60
-; RV64IM-NEXT:    sd a0, 120(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    slli a0, a7, 61
-; RV64IM-NEXT:    sd a0, 80(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    slli a7, a7, 62
-; RV64IM-NEXT:    sd a7, 48(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    and a0, t0, t3
-; RV64IM-NEXT:    sd a0, 128(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    lui s7, 1
-; RV64IM-NEXT:    and a0, t0, s7
-; RV64IM-NEXT:    sd a0, 112(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    lui s8, 8
-; RV64IM-NEXT:    and a0, t0, s8
-; RV64IM-NEXT:    sd a0, 104(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    and a0, t0, s3
-; RV64IM-NEXT:    sd a0, 96(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    lui s6, 32
-; RV64IM-NEXT:    and a0, t0, s6
-; RV64IM-NEXT:    sd a0, 88(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    lui s10, 64
-; RV64IM-NEXT:    and a0, t0, s10
-; RV64IM-NEXT:    sd a0, 72(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    lui s11, 512
-; RV64IM-NEXT:    and a0, t0, s11
-; RV64IM-NEXT:    sd a0, 64(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    lui s4, 1024
-; RV64IM-NEXT:    and a0, t0, s4
-; RV64IM-NEXT:    sd a0, 56(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    lui s5, 2048
-; RV64IM-NEXT:    and a0, t0, s5
-; RV64IM-NEXT:    sd a0, 40(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    lui s9, 16384
-; RV64IM-NEXT:    and a0, t0, s9
-; RV64IM-NEXT:    sd a0, 32(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    lui a5, 32768
-; RV64IM-NEXT:    and a5, t0, a5
-; RV64IM-NEXT:    lui a6, 65536
-; RV64IM-NEXT:    and a6, t0, a6
-; RV64IM-NEXT:    lui t1, 131072
-; RV64IM-NEXT:    and t1, t0, t1
-; RV64IM-NEXT:    lui t2, 262144
-; RV64IM-NEXT:    and t2, t0, t2
-; RV64IM-NEXT:    and a0, t0, t6
-; RV64IM-NEXT:    sd a0, 24(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    and a0, t0, s2
-; RV64IM-NEXT:    sd a0, 16(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    ld a0, 256(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    and a0, t0, a0
-; RV64IM-NEXT:    sd a0, 8(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    ld a0, 248(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    and a0, t0, a0
-; RV64IM-NEXT:    sd a0, 0(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    ld a0, 232(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    and a0, t0, a0
-; RV64IM-NEXT:    and a1, t0, t4
-; RV64IM-NEXT:    and a7, t0, a2
-; RV64IM-NEXT:    and ra, t0, ra
-; RV64IM-NEXT:    and t3, t0, a3
-; RV64IM-NEXT:    and t4, t0, a4
-; RV64IM-NEXT:    and t5, t0, t5
-; RV64IM-NEXT:    and t6, t0, s0
-; RV64IM-NEXT:    and s0, t0, s1
-; RV64IM-NEXT:    ld a2, 184(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    and s1, t0, a2
-; RV64IM-NEXT:    ld a2, 176(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    and s2, t0, a2
-; RV64IM-NEXT:    ld a2, 168(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    and s3, t0, a2
-; RV64IM-NEXT:    ld a2, 160(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    and s4, t0, a2
-; RV64IM-NEXT:    ld a2, 152(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    and s5, t0, a2
-; RV64IM-NEXT:    ld a2, 144(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    and s6, t0, a2
-; RV64IM-NEXT:    ld a2, 136(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    and s7, t0, a2
-; RV64IM-NEXT:    ld a2, 120(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    and s8, t0, a2
-; RV64IM-NEXT:    ld a2, 80(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    and s9, t0, a2
-; RV64IM-NEXT:    ld a2, 48(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    and s10, t0, a2
-; RV64IM-NEXT:    andi s11, t0, 64
-; RV64IM-NEXT:    mul a2, t0, s11
-; RV64IM-NEXT:    sd a2, 80(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    andi s11, t0, 128
-; RV64IM-NEXT:    mul a2, t0, s11
-; RV64IM-NEXT:    sd a2, 232(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    andi s11, t0, 1024
-; RV64IM-NEXT:    mul a2, t0, s11
-; RV64IM-NEXT:    sd a2, 48(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    ld a2, 128(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    mul a2, t0, a2
-; RV64IM-NEXT:    sd a2, 120(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    ld a2, 112(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    mul a2, t0, a2
-; RV64IM-NEXT:    sd a2, 176(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    ld a2, 104(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    mul s11, t0, a2
-; RV64IM-NEXT:    ld a2, 96(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    mul a2, t0, a2
-; RV64IM-NEXT:    sd a2, 104(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    ld a2, 88(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    mul a2, t0, a2
-; RV64IM-NEXT:    sd a2, 168(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    ld a2, 72(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    mul a2, t0, a2
-; RV64IM-NEXT:    sd a2, 256(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    ld a2, 64(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    mul a4, t0, a2
-; RV64IM-NEXT:    ld a2, 56(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    mul a2, t0, a2
-; RV64IM-NEXT:    sd a2, 96(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    ld a2, 40(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    mul a2, t0, a2
-; RV64IM-NEXT:    sd a2, 136(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    ld a2, 32(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    mul a3, t0, a2
-; RV64IM-NEXT:    mul a2, t0, a5
-; RV64IM-NEXT:    sd a2, 88(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    mul a2, t0, a6
-; RV64IM-NEXT:    sd a2, 128(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    mul a2, t0, t1
-; RV64IM-NEXT:    sd a2, 160(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    mul a2, t0, t2
-; RV64IM-NEXT:    sd a2, 248(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    srliw t2, t0, 31
-; RV64IM-NEXT:    slli t2, t2, 31
-; RV64IM-NEXT:    ld a2, 24(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    mul a2, t0, a2
-; RV64IM-NEXT:    ld a5, 16(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    mul a5, t0, a5
-; RV64IM-NEXT:    ld a6, 8(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    mul t1, t0, a6
-; RV64IM-NEXT:    ld a6, 0(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    mul a6, t0, a6
-; RV64IM-NEXT:    sd a6, 112(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    mul a0, t0, a0
-; RV64IM-NEXT:    sd a0, 152(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    mul a0, t0, a1
-; RV64IM-NEXT:    sd a0, 184(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    mul a7, t0, a7
-; RV64IM-NEXT:    mul ra, t0, ra
-; RV64IM-NEXT:    mul a6, t0, t3
-; RV64IM-NEXT:    mul t4, t0, t4
-; RV64IM-NEXT:    mul t5, t0, t5
-; RV64IM-NEXT:    mul a0, t0, t6
-; RV64IM-NEXT:    sd a0, 144(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    mul t6, t0, s0
-; RV64IM-NEXT:    mul s0, t0, s1
-; RV64IM-NEXT:    mul s1, t0, s2
-; RV64IM-NEXT:    mul s2, t0, s3
-; RV64IM-NEXT:    mul s3, t0, s4
-; RV64IM-NEXT:    mul s4, t0, s5
-; RV64IM-NEXT:    mul s5, t0, s6
-; RV64IM-NEXT:    mul s6, t0, s7
-; RV64IM-NEXT:    mul s7, t0, s8
-; RV64IM-NEXT:    mul s8, t0, s9
-; RV64IM-NEXT:    mul s9, t0, s10
-; RV64IM-NEXT:    srli s10, t0, 63
-; RV64IM-NEXT:    slli s10, s10, 63
-; RV64IM-NEXT:    mul t2, t0, t2
-; RV64IM-NEXT:    mul t0, t0, s10
-; RV64IM-NEXT:    ld a0, 296(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    ld a1, 288(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor s10, a0, a1
-; RV64IM-NEXT:    ld a0, 280(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    ld a1, 80(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor a0, a0, a1
-; RV64IM-NEXT:    ld a1, 272(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    ld t3, 48(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor a1, a1, t3
-; RV64IM-NEXT:    ld t3, 264(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor s11, t3, s11
-; RV64IM-NEXT:    ld t3, 240(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor a4, t3, a4
-; RV64IM-NEXT:    ld t3, 224(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor a3, t3, a3
-; RV64IM-NEXT:    ld t3, 216(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor a2, t3, a2
-; RV64IM-NEXT:    ld t3, 208(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor a7, t3, a7
-; RV64IM-NEXT:    ld t3, 200(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor t6, t3, t6
-; RV64IM-NEXT:    ld t3, 192(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor s5, t3, s5
-; RV64IM-NEXT:    xor a0, s10, a0
-; RV64IM-NEXT:    ld t3, 120(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor a1, a1, t3
-; RV64IM-NEXT:    ld t3, 104(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor s10, s11, t3
-; RV64IM-NEXT:    ld t3, 96(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor a4, a4, t3
-; RV64IM-NEXT:    ld t3, 88(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor a3, a3, t3
-; RV64IM-NEXT:    xor a2, a2, a5
-; RV64IM-NEXT:    xor a5, a7, ra
-; RV64IM-NEXT:    xor a7, t6, s0
-; RV64IM-NEXT:    xor t6, s5, s6
-; RV64IM-NEXT:    ld t3, 232(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor a0, a0, t3
-; RV64IM-NEXT:    ld t3, 176(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor a1, a1, t3
-; RV64IM-NEXT:    ld t3, 168(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor s0, s10, t3
-; RV64IM-NEXT:    ld t3, 136(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor a4, a4, t3
-; RV64IM-NEXT:    ld t3, 128(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor a3, a3, t3
-; RV64IM-NEXT:    xor a2, a2, t1
-; RV64IM-NEXT:    xor a5, a5, a6
-; RV64IM-NEXT:    xor a6, a7, s1
-; RV64IM-NEXT:    xor a7, t6, s7
-; RV64IM-NEXT:    ld t1, 256(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor t1, s0, t1
-; RV64IM-NEXT:    ld t3, 160(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor a3, a3, t3
-; RV64IM-NEXT:    ld t3, 112(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor a2, a2, t3
-; RV64IM-NEXT:    xor a5, a5, t4
-; RV64IM-NEXT:    xor a6, a6, s2
-; RV64IM-NEXT:    xor a7, a7, s8
-; RV64IM-NEXT:    xor a1, a0, a1
-; RV64IM-NEXT:    xor a1, a1, t1
-; RV64IM-NEXT:    ld t1, 248(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor a3, a3, t1
-; RV64IM-NEXT:    ld t1, 152(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor a2, a2, t1
-; RV64IM-NEXT:    xor a5, a5, t5
-; RV64IM-NEXT:    xor a6, a6, s3
-; RV64IM-NEXT:    xor a7, a7, s9
-; RV64IM-NEXT:    xor a1, a1, a4
-; RV64IM-NEXT:    xor a3, a3, t2
-; RV64IM-NEXT:    ld a4, 184(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor a2, a2, a4
-; RV64IM-NEXT:    ld a4, 144(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor a4, a5, a4
-; RV64IM-NEXT:    xor a5, a6, s4
-; RV64IM-NEXT:    slli a0, a0, 56
-; RV64IM-NEXT:    xor a6, a7, t0
-; RV64IM-NEXT:    ld t0, 304(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    and a7, a1, t0
-; RV64IM-NEXT:    xor a1, a1, a3
-; RV64IM-NEXT:    slli a7, a7, 40
-; RV64IM-NEXT:    xor a1, a1, a2
-; RV64IM-NEXT:    or a0, a0, a7
-; RV64IM-NEXT:    lui a7, 4080
-; RV64IM-NEXT:    and a2, a1, a7
-; RV64IM-NEXT:    xor a4, a1, a4
-; RV64IM-NEXT:    srli a1, a1, 8
-; RV64IM-NEXT:    slli a2, a2, 24
-; RV64IM-NEXT:    xor a5, a4, a5
-; RV64IM-NEXT:    ld a3, 336(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    and a1, a1, a3
-; RV64IM-NEXT:    srli a4, a4, 24
-; RV64IM-NEXT:    srliw a3, a5, 24
-; RV64IM-NEXT:    and a4, a4, a7
-; RV64IM-NEXT:    srli a7, a5, 40
-; RV64IM-NEXT:    xor a5, a5, a6
-; RV64IM-NEXT:    slli a3, a3, 32
-; RV64IM-NEXT:    or a1, a1, a4
-; RV64IM-NEXT:    and a4, a7, t0
-; RV64IM-NEXT:    srli a5, a5, 56
-; RV64IM-NEXT:    or a2, a2, a3
-; RV64IM-NEXT:    or a4, a4, a5
-; RV64IM-NEXT:    or a0, a0, a2
-; RV64IM-NEXT:    or a1, a1, a4
-; RV64IM-NEXT:    or a0, a0, a1
-; RV64IM-NEXT:    srli a1, a0, 4
-; RV64IM-NEXT:    ld a2, 312(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    and a0, a0, a2
-; RV64IM-NEXT:    and a1, a1, a2
-; RV64IM-NEXT:    slli a0, a0, 4
-; RV64IM-NEXT:    or a0, a1, a0
-; RV64IM-NEXT:    srli a1, a0, 2
-; RV64IM-NEXT:    ld a2, 320(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    and a0, a0, a2
-; RV64IM-NEXT:    and a1, a1, a2
-; RV64IM-NEXT:    slli a0, a0, 2
-; RV64IM-NEXT:    or a0, a1, a0
-; RV64IM-NEXT:    srli a1, a0, 1
-; RV64IM-NEXT:    ld a2, 328(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    and a0, a0, a2
-; RV64IM-NEXT:    and a1, a1, a2
-; RV64IM-NEXT:    slli a0, a0, 1
-; RV64IM-NEXT:    or a0, a1, a0
-; RV64IM-NEXT:    ld ra, 440(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    ld s0, 432(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    ld s1, 424(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    ld s2, 416(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    ld s3, 408(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    ld s4, 400(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    ld s5, 392(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    ld s6, 384(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    ld s7, 376(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    ld s8, 368(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    ld s9, 360(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    ld s10, 352(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    ld s11, 344(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    addi sp, sp, 448
-; RV64IM-NEXT:    ret
-  %res = call i16 @llvm.clmulr.i16(i16 %a, i16 %b)
-  ret i16 %res
-}
-
-define i32 @clmulr_i32(i32 %a, i32 %b) nounwind {
-; RV32IM-LABEL: clmulr_i32:
-; RV32IM:       # %bb.0:
-; RV32IM-NEXT:    addi sp, sp, -144
-; RV32IM-NEXT:    sw ra, 140(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    sw s0, 136(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    sw s1, 132(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    sw s2, 128(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    sw s3, 124(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    sw s4, 120(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    sw s5, 116(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    sw s6, 112(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    sw s7, 108(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    sw s8, 104(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    sw s9, 100(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    sw s10, 96(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    sw s11, 92(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    srli a3, a0, 8
-; RV32IM-NEXT:    lui s9, 16
-; RV32IM-NEXT:    srli a4, a0, 24
-; RV32IM-NEXT:    slli a2, a0, 24
-; RV32IM-NEXT:    lui a7, 61681
-; RV32IM-NEXT:    lui ra, 209715
-; RV32IM-NEXT:    lui a1, 349525
-; RV32IM-NEXT:    li s0, 1
-; RV32IM-NEXT:    lui t1, 1
-; RV32IM-NEXT:    lui t2, 2
-; RV32IM-NEXT:    lui t3, 4
-; RV32IM-NEXT:    lui t4, 8
-; RV32IM-NEXT:    lui t0, 32
-; RV32IM-NEXT:    lui a6, 64
-; RV32IM-NEXT:    lui a5, 128
-; RV32IM-NEXT:    lui s1, 256
-; RV32IM-NEXT:    lui t5, 512
-; RV32IM-NEXT:    lui t6, 1024
-; RV32IM-NEXT:    lui s4, 2048
-; RV32IM-NEXT:    lui s2, 4096
-; RV32IM-NEXT:    lui s3, 8192
-; RV32IM-NEXT:    lui s7, 16384
-; RV32IM-NEXT:    lui s5, 32768
-; RV32IM-NEXT:    lui s6, 65536
-; RV32IM-NEXT:    lui s11, 131072
-; RV32IM-NEXT:    lui s8, 262144
-; RV32IM-NEXT:    addi s10, s9, -256
-; RV32IM-NEXT:    and a3, a3, s10
-; RV32IM-NEXT:    or a3, a3, a4
-; RV32IM-NEXT:    addi a7, a7, -241
-; RV32IM-NEXT:    sw a7, 80(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    addi a4, ra, 819
-; RV32IM-NEXT:    sw a4, 84(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    addi a1, a1, 1365
-; RV32IM-NEXT:    sw a1, 88(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    slli s0, s0, 11
-; RV32IM-NEXT:    and a0, a0, s10
-; RV32IM-NEXT:    slli a0, a0, 8
-; RV32IM-NEXT:    or a0, a2, a0
-; RV32IM-NEXT:    or a0, a0, a3
-; RV32IM-NEXT:    srli a2, a0, 4
-; RV32IM-NEXT:    and a0, a0, a7
-; RV32IM-NEXT:    and a2, a2, a7
-; RV32IM-NEXT:    slli a0, a0, 4
-; RV32IM-NEXT:    or a0, a2, a0
-; RV32IM-NEXT:    srli a2, a0, 2
-; RV32IM-NEXT:    and a0, a0, a4
-; RV32IM-NEXT:    and a2, a2, a4
-; RV32IM-NEXT:    slli a0, a0, 2
-; RV32IM-NEXT:    or a0, a2, a0
-; RV32IM-NEXT:    srli a2, a0, 1
-; RV32IM-NEXT:    and a0, a0, a1
-; RV32IM-NEXT:    and a2, a2, a1
-; RV32IM-NEXT:    slli a0, a0, 1
-; RV32IM-NEXT:    or a3, a2, a0
-; RV32IM-NEXT:    andi a0, a3, 2
-; RV32IM-NEXT:    andi a1, a3, 1
-; RV32IM-NEXT:    and a4, a3, s0
-; RV32IM-NEXT:    and a7, a3, t1
-; RV32IM-NEXT:    and s0, a3, t2
-; RV32IM-NEXT:    and ra, a3, t3
-; RV32IM-NEXT:    and a2, a3, t4
-; RV32IM-NEXT:    sw a2, 68(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    and a2, a3, s9
-; RV32IM-NEXT:    sw a2, 64(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    and a2, a3, t0
-; RV32IM-NEXT:    sw a2, 60(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    and a6, a3, a6
-; RV32IM-NEXT:    and a5, a3, a5
-; RV32IM-NEXT:    and s1, a3, s1
-; RV32IM-NEXT:    sw s1, 56(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    and a2, a3, t5
-; RV32IM-NEXT:    sw a2, 52(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    and t6, a3, t6
-; RV32IM-NEXT:    and a2, a3, s4
-; RV32IM-NEXT:    sw a2, 48(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    and s2, a3, s2
-; RV32IM-NEXT:    and a2, a3, s3
-; RV32IM-NEXT:    sw a2, 44(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    and a2, a3, s7
-; RV32IM-NEXT:    sw a2, 40(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    and a2, a3, s5
-; RV32IM-NEXT:    sw a2, 36(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    and a2, a3, s6
-; RV32IM-NEXT:    sw a2, 32(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    and a2, a3, s11
-; RV32IM-NEXT:    sw a2, 28(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    and a2, a3, s8
-; RV32IM-NEXT:    sw a2, 24(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    lui a2, 524288
-; RV32IM-NEXT:    and a2, a3, a2
-; RV32IM-NEXT:    sw a2, 20(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    mul a0, a3, a0
-; RV32IM-NEXT:    sw a0, 72(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    mul a0, a3, a1
-; RV32IM-NEXT:    sw a0, 16(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    andi a0, a3, 4
-; RV32IM-NEXT:    mul a0, a3, a0
-; RV32IM-NEXT:    sw a0, 12(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    andi a1, a3, 8
-; RV32IM-NEXT:    mul a0, a3, a1
-; RV32IM-NEXT:    sw a0, 0(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    andi a2, a3, 16
-; RV32IM-NEXT:    mul s9, a3, a2
-; RV32IM-NEXT:    andi t0, a3, 32
-; RV32IM-NEXT:    mul s6, a3, t0
-; RV32IM-NEXT:    andi t1, a3, 64
-; RV32IM-NEXT:    mul a0, a3, t1
-; RV32IM-NEXT:    sw a0, 4(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    andi t2, a3, 128
-; RV32IM-NEXT:    mul a0, a3, t2
-; RV32IM-NEXT:    sw a0, 76(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    andi t2, a3, 256
-; RV32IM-NEXT:    mul s1, a3, t2
-; RV32IM-NEXT:    andi t3, a3, 512
-; RV32IM-NEXT:    mul t5, a3, t3
-; RV32IM-NEXT:    andi t4, a3, 1024
-; RV32IM-NEXT:    mul s5, a3, t4
-; RV32IM-NEXT:    mul s8, a3, a4
-; RV32IM-NEXT:    mul a0, a3, a7
-; RV32IM-NEXT:    sw a0, 8(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    mul t2, a3, s0
-; RV32IM-NEXT:    mul a7, a3, ra
-; RV32IM-NEXT:    lw a0, 68(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mul s0, a3, a0
-; RV32IM-NEXT:    lw a0, 64(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mul s4, a3, a0
-; RV32IM-NEXT:    lw a0, 60(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mul s7, a3, a0
-; RV32IM-NEXT:    mul a0, a3, a6
-; RV32IM-NEXT:    sw a0, 68(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    mul a6, a3, a5
-; RV32IM-NEXT:    lw a0, 56(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mul a4, a3, a0
-; RV32IM-NEXT:    lw a0, 52(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mul t1, a3, a0
-; RV32IM-NEXT:    mul t4, a3, t6
-; RV32IM-NEXT:    lw a0, 48(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mul s3, a3, a0
-; RV32IM-NEXT:    mul a2, a3, s2
-; RV32IM-NEXT:    lw a0, 44(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mul a1, a3, a0
-; RV32IM-NEXT:    lw a0, 40(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mul a5, a3, a0
-; RV32IM-NEXT:    lw a0, 36(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mul t0, a3, a0
-; RV32IM-NEXT:    lw a0, 32(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mul t3, a3, a0
-; RV32IM-NEXT:    lw a0, 28(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mul t6, a3, a0
-; RV32IM-NEXT:    lw a0, 24(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mul s2, a3, a0
-; RV32IM-NEXT:    lw a0, 20(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mul a3, a3, a0
-; RV32IM-NEXT:    lw a0, 72(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw s11, 16(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    xor a0, s11, a0
-; RV32IM-NEXT:    lw s11, 12(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw ra, 0(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    xor s11, s11, ra
-; RV32IM-NEXT:    xor s6, s9, s6
-; RV32IM-NEXT:    xor t5, s1, t5
-; RV32IM-NEXT:    xor a7, t2, a7
-; RV32IM-NEXT:    xor a4, a6, a4
-; RV32IM-NEXT:    xor a1, a2, a1
-; RV32IM-NEXT:    xor a0, a0, s11
-; RV32IM-NEXT:    lw a2, 4(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    xor a2, s6, a2
-; RV32IM-NEXT:    xor a6, t5, s5
-; RV32IM-NEXT:    xor a7, a7, s0
-; RV32IM-NEXT:    xor a4, a4, t1
-; RV32IM-NEXT:    xor a1, a1, a5
-; RV32IM-NEXT:    xor a0, a0, a2
-; RV32IM-NEXT:    xor a2, a6, s8
-; RV32IM-NEXT:    xor a5, a7, s4
-; RV32IM-NEXT:    xor a4, a4, t4
-; RV32IM-NEXT:    xor a1, a1, t0
-; RV32IM-NEXT:    lw a6, 76(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    xor a0, a0, a6
-; RV32IM-NEXT:    lw a6, 8(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    xor a2, a2, a6
-; RV32IM-NEXT:    xor a5, a5, s7
-; RV32IM-NEXT:    xor a4, a4, s3
-; RV32IM-NEXT:    xor a1, a1, t3
-; RV32IM-NEXT:    lw a6, 68(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    xor a5, a5, a6
-; RV32IM-NEXT:    xor a1, a1, t6
-; RV32IM-NEXT:    xor a2, a0, a2
-; RV32IM-NEXT:    xor a2, a2, a5
-; RV32IM-NEXT:    slli a0, a0, 24
-; RV32IM-NEXT:    xor a1, a1, s2
-; RV32IM-NEXT:    xor a2, a2, a4
-; RV32IM-NEXT:    xor a1, a1, a3
-; RV32IM-NEXT:    and a3, a2, s10
-; RV32IM-NEXT:    srli a4, a2, 8
-; RV32IM-NEXT:    xor a1, a2, a1
-; RV32IM-NEXT:    slli a3, a3, 8
-; RV32IM-NEXT:    and a2, a4, s10
-; RV32IM-NEXT:    srli a1, a1, 24
-; RV32IM-NEXT:    or a0, a0, a3
-; RV32IM-NEXT:    or a1, a2, a1
-; RV32IM-NEXT:    or a0, a0, a1
-; RV32IM-NEXT:    srli a1, a0, 4
-; RV32IM-NEXT:    lw a2, 80(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    and a0, a0, a2
-; RV32IM-NEXT:    and a1, a1, a2
-; RV32IM-NEXT:    slli a0, a0, 4
-; RV32IM-NEXT:    or a0, a1, a0
-; RV32IM-NEXT:    srli a1, a0, 2
-; RV32IM-NEXT:    lw a2, 84(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    and a0, a0, a2
-; RV32IM-NEXT:    and a1, a1, a2
-; RV32IM-NEXT:    slli a0, a0, 2
-; RV32IM-NEXT:    or a0, a1, a0
-; RV32IM-NEXT:    srli a1, a0, 1
-; RV32IM-NEXT:    lw a2, 88(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    and a0, a0, a2
-; RV32IM-NEXT:    and a1, a1, a2
-; RV32IM-NEXT:    slli a0, a0, 1
-; RV32IM-NEXT:    or a0, a1, a0
-; RV32IM-NEXT:    lw ra, 140(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw s0, 136(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw s1, 132(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw s2, 128(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw s3, 124(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw s4, 120(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw s5, 116(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw s6, 112(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw s7, 108(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw s8, 104(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw s9, 100(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw s10, 96(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw s11, 92(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    addi sp, sp, 144
-; RV32IM-NEXT:    ret
-;
-; RV64IM-LABEL: clmulr_i32:
-; RV64IM:       # %bb.0:
-; RV64IM-NEXT:    addi sp, sp, -448
-; RV64IM-NEXT:    sd ra, 440(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    sd s0, 432(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    sd s1, 424(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    sd s2, 416(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    sd s3, 408(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    sd s4, 400(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    sd s5, 392(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    sd s6, 384(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    sd s7, 376(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    sd s8, 368(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    sd s9, 360(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    sd s10, 352(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    sd s11, 344(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    srli a2, a0, 24
-; RV64IM-NEXT:    srli a6, a0, 8
-; RV64IM-NEXT:    li a3, 255
-; RV64IM-NEXT:    srli a5, a0, 40
-; RV64IM-NEXT:    lui s3, 16
-; RV64IM-NEXT:    srli s0, a0, 56
-; RV64IM-NEXT:    srliw t2, a0, 24
-; RV64IM-NEXT:    slli t0, a0, 56
-; RV64IM-NEXT:    lui t3, 61681
-; RV64IM-NEXT:    lui t4, 209715
-; RV64IM-NEXT:    lui t6, 349525
-; RV64IM-NEXT:    li a7, 1
-; RV64IM-NEXT:    lui s5, 2
-; RV64IM-NEXT:    lui t1, 4
-; RV64IM-NEXT:    lui a4, 128
-; RV64IM-NEXT:    lui s7, 256
-; RV64IM-NEXT:    lui s8, 4096
-; RV64IM-NEXT:    lui s10, 8192
-; RV64IM-NEXT:    lui a1, 4080
-; RV64IM-NEXT:    and a2, a2, a1
-; RV64IM-NEXT:    slli a3, a3, 24
-; RV64IM-NEXT:    sd a3, 336(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    addi s1, s3, -256
-; RV64IM-NEXT:    and t5, a0, a1
-; RV64IM-NEXT:    slli a1, t2, 32
-; RV64IM-NEXT:    addi s9, t3, -241
-; RV64IM-NEXT:    addi t4, t4, 819
-; RV64IM-NEXT:    addi t2, t6, 1365
-; RV64IM-NEXT:    slli t3, a7, 11
-; RV64IM-NEXT:    slli s11, a7, 32
-; RV64IM-NEXT:    slli ra, a7, 33
-; RV64IM-NEXT:    slli t6, a7, 34
-; RV64IM-NEXT:    slli s2, a7, 35
-; RV64IM-NEXT:    slli s4, a7, 36
-; RV64IM-NEXT:    sd s4, 256(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    and a3, a6, a3
-; RV64IM-NEXT:    or a2, a3, a2
-; RV64IM-NEXT:    slli a3, a7, 37
-; RV64IM-NEXT:    sd a3, 248(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    sd s1, 304(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    and a3, a5, s1
-; RV64IM-NEXT:    or a3, a3, s0
-; RV64IM-NEXT:    slli a5, a7, 38
-; RV64IM-NEXT:    sd a5, 232(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    slli t5, t5, 24
-; RV64IM-NEXT:    and a0, a0, s1
-; RV64IM-NEXT:    or a1, t5, a1
-; RV64IM-NEXT:    slli a5, s9, 32
-; RV64IM-NEXT:    add a5, s9, a5
-; RV64IM-NEXT:    slli s0, t4, 32
-; RV64IM-NEXT:    add t4, t4, s0
-; RV64IM-NEXT:    slli s4, t2, 32
-; RV64IM-NEXT:    slli a0, a0, 40
-; RV64IM-NEXT:    add t2, t2, s4
-; RV64IM-NEXT:    or a2, a2, a3
-; RV64IM-NEXT:    or a0, t0, a0
-; RV64IM-NEXT:    or a0, a0, a1
-; RV64IM-NEXT:    or a0, a0, a2
-; RV64IM-NEXT:    srli a1, a0, 4
-; RV64IM-NEXT:    sd a5, 312(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    and a0, a0, a5
-; RV64IM-NEXT:    and a1, a1, a5
-; RV64IM-NEXT:    slli a0, a0, 4
-; RV64IM-NEXT:    or a0, a1, a0
-; RV64IM-NEXT:    srli a1, a0, 2
-; RV64IM-NEXT:    sd t4, 320(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    and a0, a0, t4
-; RV64IM-NEXT:    and a1, a1, t4
-; RV64IM-NEXT:    slli a0, a0, 2
-; RV64IM-NEXT:    or a0, a1, a0
-; RV64IM-NEXT:    srli a1, a0, 1
-; RV64IM-NEXT:    sd t2, 328(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    and a0, a0, t2
-; RV64IM-NEXT:    and a1, a1, t2
-; RV64IM-NEXT:    slli a0, a0, 1
-; RV64IM-NEXT:    or t0, a1, a0
-; RV64IM-NEXT:    andi a0, t0, 2
-; RV64IM-NEXT:    andi a1, t0, 1
-; RV64IM-NEXT:    andi a2, t0, 4
-; RV64IM-NEXT:    andi a3, t0, 8
-; RV64IM-NEXT:    andi a5, t0, 16
-; RV64IM-NEXT:    mul a0, t0, a0
-; RV64IM-NEXT:    mul a1, t0, a1
-; RV64IM-NEXT:    xor a0, a1, a0
-; RV64IM-NEXT:    sd a0, 296(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    andi a0, t0, 32
-; RV64IM-NEXT:    mul a1, t0, a2
-; RV64IM-NEXT:    mul a2, t0, a3
-; RV64IM-NEXT:    xor a1, a1, a2
-; RV64IM-NEXT:    sd a1, 288(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    andi a1, t0, 256
-; RV64IM-NEXT:    mul a2, t0, a5
-; RV64IM-NEXT:    mul a0, t0, a0
-; RV64IM-NEXT:    xor a0, a2, a0
-; RV64IM-NEXT:    sd a0, 280(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    andi a0, t0, 512
-; RV64IM-NEXT:    mul a1, t0, a1
-; RV64IM-NEXT:    mul a0, t0, a0
-; RV64IM-NEXT:    xor a0, a1, a0
-; RV64IM-NEXT:    sd a0, 272(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    slli t4, a7, 39
-; RV64IM-NEXT:    and a0, t0, s5
-; RV64IM-NEXT:    and a1, t0, t1
-; RV64IM-NEXT:    mul a0, t0, a0
-; RV64IM-NEXT:    mul a1, t0, a1
-; RV64IM-NEXT:    xor a0, a0, a1
-; RV64IM-NEXT:    sd a0, 264(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    slli a0, a7, 40
-; RV64IM-NEXT:    and a1, t0, a4
-; RV64IM-NEXT:    and a2, t0, s7
-; RV64IM-NEXT:    mul a1, t0, a1
-; RV64IM-NEXT:    mul a2, t0, a2
-; RV64IM-NEXT:    xor a1, a1, a2
-; RV64IM-NEXT:    sd a1, 240(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    slli a1, a7, 41
-; RV64IM-NEXT:    and a2, t0, s8
-; RV64IM-NEXT:    and a3, t0, s10
-; RV64IM-NEXT:    mul a2, t0, a2
-; RV64IM-NEXT:    mul a3, t0, a3
-; RV64IM-NEXT:    xor a2, a2, a3
-; RV64IM-NEXT:    sd a2, 224(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    slli a2, a7, 48
-; RV64IM-NEXT:    and a3, t0, s11
-; RV64IM-NEXT:    and a4, t0, ra
-; RV64IM-NEXT:    mul a3, t0, a3
-; RV64IM-NEXT:    mul a4, t0, a4
-; RV64IM-NEXT:    xor a3, a3, a4
-; RV64IM-NEXT:    sd a3, 216(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    slli a3, a7, 49
-; RV64IM-NEXT:    and a0, t0, a0
-; RV64IM-NEXT:    and a1, t0, a1
-; RV64IM-NEXT:    mul a0, t0, a0
-; RV64IM-NEXT:    mul a1, t0, a1
-; RV64IM-NEXT:    xor a0, a0, a1
-; RV64IM-NEXT:    sd a0, 208(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    slli a0, a7, 56
-; RV64IM-NEXT:    and a1, t0, a2
-; RV64IM-NEXT:    and a2, t0, a3
-; RV64IM-NEXT:    mul a1, t0, a1
-; RV64IM-NEXT:    mul a2, t0, a2
-; RV64IM-NEXT:    xor a1, a1, a2
-; RV64IM-NEXT:    sd a1, 200(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    slli a1, a7, 57
-; RV64IM-NEXT:    and a0, t0, a0
-; RV64IM-NEXT:    and a1, t0, a1
-; RV64IM-NEXT:    mul a0, t0, a0
-; RV64IM-NEXT:    mul a1, t0, a1
-; RV64IM-NEXT:    xor a0, a0, a1
-; RV64IM-NEXT:    sd a0, 192(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    slli a2, a7, 42
-; RV64IM-NEXT:    slli ra, a7, 43
-; RV64IM-NEXT:    slli a3, a7, 44
-; RV64IM-NEXT:    slli a4, a7, 45
-; RV64IM-NEXT:    slli t5, a7, 46
-; RV64IM-NEXT:    slli s0, a7, 47
-; RV64IM-NEXT:    slli s1, a7, 50
-; RV64IM-NEXT:    slli a0, a7, 51
-; RV64IM-NEXT:    sd a0, 184(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    slli a0, a7, 52
-; RV64IM-NEXT:    sd a0, 176(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    slli a0, a7, 53
-; RV64IM-NEXT:    sd a0, 168(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    slli a0, a7, 54
-; RV64IM-NEXT:    sd a0, 160(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    slli a0, a7, 55
-; RV64IM-NEXT:    sd a0, 152(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    slli a0, a7, 58
-; RV64IM-NEXT:    sd a0, 144(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    slli a0, a7, 59
-; RV64IM-NEXT:    sd a0, 136(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    slli a0, a7, 60
-; RV64IM-NEXT:    sd a0, 120(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    slli a0, a7, 61
-; RV64IM-NEXT:    sd a0, 80(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    slli a7, a7, 62
-; RV64IM-NEXT:    sd a7, 48(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    and a0, t0, t3
-; RV64IM-NEXT:    sd a0, 128(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    lui s7, 1
-; RV64IM-NEXT:    and a0, t0, s7
-; RV64IM-NEXT:    sd a0, 112(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    lui s8, 8
-; RV64IM-NEXT:    and a0, t0, s8
-; RV64IM-NEXT:    sd a0, 104(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    and a0, t0, s3
-; RV64IM-NEXT:    sd a0, 96(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    lui s6, 32
-; RV64IM-NEXT:    and a0, t0, s6
-; RV64IM-NEXT:    sd a0, 88(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    lui s10, 64
-; RV64IM-NEXT:    and a0, t0, s10
-; RV64IM-NEXT:    sd a0, 72(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    lui s11, 512
-; RV64IM-NEXT:    and a0, t0, s11
-; RV64IM-NEXT:    sd a0, 64(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    lui s4, 1024
-; RV64IM-NEXT:    and a0, t0, s4
-; RV64IM-NEXT:    sd a0, 56(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    lui s5, 2048
-; RV64IM-NEXT:    and a0, t0, s5
-; RV64IM-NEXT:    sd a0, 40(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    lui s9, 16384
-; RV64IM-NEXT:    and a0, t0, s9
-; RV64IM-NEXT:    sd a0, 32(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    lui a5, 32768
-; RV64IM-NEXT:    and a5, t0, a5
-; RV64IM-NEXT:    lui a6, 65536
-; RV64IM-NEXT:    and a6, t0, a6
-; RV64IM-NEXT:    lui t1, 131072
-; RV64IM-NEXT:    and t1, t0, t1
-; RV64IM-NEXT:    lui t2, 262144
-; RV64IM-NEXT:    and t2, t0, t2
-; RV64IM-NEXT:    and a0, t0, t6
-; RV64IM-NEXT:    sd a0, 24(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    and a0, t0, s2
-; RV64IM-NEXT:    sd a0, 16(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    ld a0, 256(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    and a0, t0, a0
-; RV64IM-NEXT:    sd a0, 8(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    ld a0, 248(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    and a0, t0, a0
-; RV64IM-NEXT:    sd a0, 0(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    ld a0, 232(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    and a0, t0, a0
-; RV64IM-NEXT:    and a1, t0, t4
-; RV64IM-NEXT:    and a7, t0, a2
-; RV64IM-NEXT:    and ra, t0, ra
-; RV64IM-NEXT:    and t3, t0, a3
-; RV64IM-NEXT:    and t4, t0, a4
-; RV64IM-NEXT:    and t5, t0, t5
-; RV64IM-NEXT:    and t6, t0, s0
-; RV64IM-NEXT:    and s0, t0, s1
-; RV64IM-NEXT:    ld a2, 184(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    and s1, t0, a2
-; RV64IM-NEXT:    ld a2, 176(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    and s2, t0, a2
-; RV64IM-NEXT:    ld a2, 168(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    and s3, t0, a2
-; RV64IM-NEXT:    ld a2, 160(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    and s4, t0, a2
-; RV64IM-NEXT:    ld a2, 152(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    and s5, t0, a2
-; RV64IM-NEXT:    ld a2, 144(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    and s6, t0, a2
-; RV64IM-NEXT:    ld a2, 136(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    and s7, t0, a2
-; RV64IM-NEXT:    ld a2, 120(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    and s8, t0, a2
-; RV64IM-NEXT:    ld a2, 80(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    and s9, t0, a2
-; RV64IM-NEXT:    ld a2, 48(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    and s10, t0, a2
-; RV64IM-NEXT:    andi s11, t0, 64
-; RV64IM-NEXT:    mul a2, t0, s11
-; RV64IM-NEXT:    sd a2, 80(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    andi s11, t0, 128
-; RV64IM-NEXT:    mul a2, t0, s11
-; RV64IM-NEXT:    sd a2, 232(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    andi s11, t0, 1024
-; RV64IM-NEXT:    mul a2, t0, s11
-; RV64IM-NEXT:    sd a2, 48(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    ld a2, 128(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    mul a2, t0, a2
-; RV64IM-NEXT:    sd a2, 120(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    ld a2, 112(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    mul a2, t0, a2
-; RV64IM-NEXT:    sd a2, 176(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    ld a2, 104(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    mul s11, t0, a2
-; RV64IM-NEXT:    ld a2, 96(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    mul a2, t0, a2
-; RV64IM-NEXT:    sd a2, 104(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    ld a2, 88(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    mul a2, t0, a2
-; RV64IM-NEXT:    sd a2, 168(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    ld a2, 72(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    mul a2, t0, a2
-; RV64IM-NEXT:    sd a2, 256(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    ld a2, 64(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    mul a4, t0, a2
-; RV64IM-NEXT:    ld a2, 56(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    mul a2, t0, a2
-; RV64IM-NEXT:    sd a2, 96(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    ld a2, 40(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    mul a2, t0, a2
-; RV64IM-NEXT:    sd a2, 136(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    ld a2, 32(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    mul a3, t0, a2
-; RV64IM-NEXT:    mul a2, t0, a5
-; RV64IM-NEXT:    sd a2, 88(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    mul a2, t0, a6
-; RV64IM-NEXT:    sd a2, 128(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    mul a2, t0, t1
-; RV64IM-NEXT:    sd a2, 160(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    mul a2, t0, t2
-; RV64IM-NEXT:    sd a2, 248(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    srliw t2, t0, 31
-; RV64IM-NEXT:    slli t2, t2, 31
-; RV64IM-NEXT:    ld a2, 24(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    mul a2, t0, a2
-; RV64IM-NEXT:    ld a5, 16(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    mul a5, t0, a5
-; RV64IM-NEXT:    ld a6, 8(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    mul t1, t0, a6
-; RV64IM-NEXT:    ld a6, 0(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    mul a6, t0, a6
-; RV64IM-NEXT:    sd a6, 112(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    mul a0, t0, a0
-; RV64IM-NEXT:    sd a0, 152(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    mul a0, t0, a1
-; RV64IM-NEXT:    sd a0, 184(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    mul a7, t0, a7
-; RV64IM-NEXT:    mul ra, t0, ra
-; RV64IM-NEXT:    mul a6, t0, t3
-; RV64IM-NEXT:    mul t4, t0, t4
-; RV64IM-NEXT:    mul t5, t0, t5
-; RV64IM-NEXT:    mul a0, t0, t6
-; RV64IM-NEXT:    sd a0, 144(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    mul t6, t0, s0
-; RV64IM-NEXT:    mul s0, t0, s1
-; RV64IM-NEXT:    mul s1, t0, s2
-; RV64IM-NEXT:    mul s2, t0, s3
-; RV64IM-NEXT:    mul s3, t0, s4
-; RV64IM-NEXT:    mul s4, t0, s5
-; RV64IM-NEXT:    mul s5, t0, s6
-; RV64IM-NEXT:    mul s6, t0, s7
-; RV64IM-NEXT:    mul s7, t0, s8
-; RV64IM-NEXT:    mul s8, t0, s9
-; RV64IM-NEXT:    mul s9, t0, s10
-; RV64IM-NEXT:    srli s10, t0, 63
-; RV64IM-NEXT:    slli s10, s10, 63
-; RV64IM-NEXT:    mul t2, t0, t2
-; RV64IM-NEXT:    mul t0, t0, s10
-; RV64IM-NEXT:    ld a0, 296(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    ld a1, 288(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor s10, a0, a1
-; RV64IM-NEXT:    ld a0, 280(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    ld a1, 80(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor a0, a0, a1
-; RV64IM-NEXT:    ld a1, 272(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    ld t3, 48(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor a1, a1, t3
-; RV64IM-NEXT:    ld t3, 264(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor s11, t3, s11
-; RV64IM-NEXT:    ld t3, 240(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor a4, t3, a4
-; RV64IM-NEXT:    ld t3, 224(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor a3, t3, a3
-; RV64IM-NEXT:    ld t3, 216(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor a2, t3, a2
-; RV64IM-NEXT:    ld t3, 208(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor a7, t3, a7
-; RV64IM-NEXT:    ld t3, 200(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor t6, t3, t6
-; RV64IM-NEXT:    ld t3, 192(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor s5, t3, s5
-; RV64IM-NEXT:    xor a0, s10, a0
-; RV64IM-NEXT:    ld t3, 120(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor a1, a1, t3
-; RV64IM-NEXT:    ld t3, 104(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor s10, s11, t3
-; RV64IM-NEXT:    ld t3, 96(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor a4, a4, t3
-; RV64IM-NEXT:    ld t3, 88(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor a3, a3, t3
-; RV64IM-NEXT:    xor a2, a2, a5
-; RV64IM-NEXT:    xor a5, a7, ra
-; RV64IM-NEXT:    xor a7, t6, s0
-; RV64IM-NEXT:    xor t6, s5, s6
-; RV64IM-NEXT:    ld t3, 232(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor a0, a0, t3
-; RV64IM-NEXT:    ld t3, 176(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor a1, a1, t3
-; RV64IM-NEXT:    ld t3, 168(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor s0, s10, t3
-; RV64IM-NEXT:    ld t3, 136(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor a4, a4, t3
-; RV64IM-NEXT:    ld t3, 128(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor a3, a3, t3
-; RV64IM-NEXT:    xor a2, a2, t1
-; RV64IM-NEXT:    xor a5, a5, a6
-; RV64IM-NEXT:    xor a6, a7, s1
-; RV64IM-NEXT:    xor a7, t6, s7
-; RV64IM-NEXT:    ld t1, 256(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor t1, s0, t1
-; RV64IM-NEXT:    ld t3, 160(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor a3, a3, t3
-; RV64IM-NEXT:    ld t3, 112(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor a2, a2, t3
-; RV64IM-NEXT:    xor a5, a5, t4
-; RV64IM-NEXT:    xor a6, a6, s2
-; RV64IM-NEXT:    xor a7, a7, s8
-; RV64IM-NEXT:    xor a1, a0, a1
-; RV64IM-NEXT:    xor a1, a1, t1
-; RV64IM-NEXT:    ld t1, 248(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor a3, a3, t1
-; RV64IM-NEXT:    ld t1, 152(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor a2, a2, t1
-; RV64IM-NEXT:    xor a5, a5, t5
-; RV64IM-NEXT:    xor a6, a6, s3
-; RV64IM-NEXT:    xor a7, a7, s9
-; RV64IM-NEXT:    xor a1, a1, a4
-; RV64IM-NEXT:    xor a3, a3, t2
-; RV64IM-NEXT:    ld a4, 184(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor a2, a2, a4
-; RV64IM-NEXT:    ld a4, 144(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor a4, a5, a4
-; RV64IM-NEXT:    xor a5, a6, s4
-; RV64IM-NEXT:    slli a0, a0, 56
-; RV64IM-NEXT:    xor a6, a7, t0
-; RV64IM-NEXT:    ld t0, 304(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    and a7, a1, t0
-; RV64IM-NEXT:    xor a1, a1, a3
-; RV64IM-NEXT:    slli a7, a7, 40
-; RV64IM-NEXT:    xor a1, a1, a2
-; RV64IM-NEXT:    or a0, a0, a7
-; RV64IM-NEXT:    lui a7, 4080
-; RV64IM-NEXT:    and a2, a1, a7
-; RV64IM-NEXT:    xor a4, a1, a4
-; RV64IM-NEXT:    srli a1, a1, 8
-; RV64IM-NEXT:    slli a2, a2, 24
-; RV64IM-NEXT:    xor a5, a4, a5
-; RV64IM-NEXT:    ld a3, 336(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    and a1, a1, a3
-; RV64IM-NEXT:    srli a4, a4, 24
-; RV64IM-NEXT:    srliw a3, a5, 24
-; RV64IM-NEXT:    and a4, a4, a7
-; RV64IM-NEXT:    srli a7, a5, 40
-; RV64IM-NEXT:    xor a5, a5, a6
-; RV64IM-NEXT:    slli a3, a3, 32
-; RV64IM-NEXT:    or a1, a1, a4
-; RV64IM-NEXT:    and a4, a7, t0
-; RV64IM-NEXT:    srli a5, a5, 56
-; RV64IM-NEXT:    or a2, a2, a3
-; RV64IM-NEXT:    or a4, a4, a5
-; RV64IM-NEXT:    or a0, a0, a2
-; RV64IM-NEXT:    or a1, a1, a4
-; RV64IM-NEXT:    or a0, a0, a1
-; RV64IM-NEXT:    srli a1, a0, 4
-; RV64IM-NEXT:    ld a2, 312(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    and a0, a0, a2
-; RV64IM-NEXT:    and a1, a1, a2
-; RV64IM-NEXT:    slli a0, a0, 4
-; RV64IM-NEXT:    or a0, a1, a0
-; RV64IM-NEXT:    srli a1, a0, 2
-; RV64IM-NEXT:    ld a2, 320(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    and a0, a0, a2
-; RV64IM-NEXT:    and a1, a1, a2
-; RV64IM-NEXT:    slli a0, a0, 2
-; RV64IM-NEXT:    or a0, a1, a0
-; RV64IM-NEXT:    srli a1, a0, 1
-; RV64IM-NEXT:    ld a2, 328(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    and a0, a0, a2
-; RV64IM-NEXT:    and a1, a1, a2
-; RV64IM-NEXT:    slli a0, a0, 1
-; RV64IM-NEXT:    or a0, a1, a0
-; RV64IM-NEXT:    ld ra, 440(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    ld s0, 432(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    ld s1, 424(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    ld s2, 416(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    ld s3, 408(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    ld s4, 400(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    ld s5, 392(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    ld s6, 384(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    ld s7, 376(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    ld s8, 368(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    ld s9, 360(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    ld s10, 352(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    ld s11, 344(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    addi sp, sp, 448
-; RV64IM-NEXT:    ret
-  %res = call i32 @llvm.clmulr.i32(i32 %a, i32 %b)
-  ret i32 %res
-}
-
-define i64 @clmulr_i64(i64 %a, i64 %b) nounwind {
-; RV32IM-LABEL: clmulr_i64:
-; RV32IM:       # %bb.0:
-; RV32IM-NEXT:    addi sp, sp, -512
-; RV32IM-NEXT:    sw ra, 508(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    sw s0, 504(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    sw s1, 500(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    sw s2, 496(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    sw s3, 492(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    sw s4, 488(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    sw s5, 484(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    sw s6, 480(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    sw s7, 476(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    sw s8, 472(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    sw s9, 468(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    sw s10, 464(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    sw s11, 460(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    srli t3, a0, 8
-; RV32IM-NEXT:    lui s8, 16
-; RV32IM-NEXT:    srli t4, a0, 24
-; RV32IM-NEXT:    slli s2, a0, 24
-; RV32IM-NEXT:    lui t5, 61681
-; RV32IM-NEXT:    lui t6, 209715
-; RV32IM-NEXT:    lui s0, 349525
-; RV32IM-NEXT:    srli s4, a1, 8
-; RV32IM-NEXT:    srli s1, a1, 24
-; RV32IM-NEXT:    slli s3, a1, 24
-; RV32IM-NEXT:    li s10, 1
-; RV32IM-NEXT:    lui a3, 1
-; RV32IM-NEXT:    lui a4, 2
-; RV32IM-NEXT:    lui a5, 4
-; RV32IM-NEXT:    lui a6, 8
-; RV32IM-NEXT:    lui a7, 32
-; RV32IM-NEXT:    lui t0, 64
-; RV32IM-NEXT:    lui t1, 128
-; RV32IM-NEXT:    lui t2, 256
-; RV32IM-NEXT:    lui a2, 512
-; RV32IM-NEXT:    addi s7, s8, -256
-; RV32IM-NEXT:    sw s7, 396(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    addi s6, t5, -241
-; RV32IM-NEXT:    addi s5, t6, 819
-; RV32IM-NEXT:    addi t6, s0, 1365
-; RV32IM-NEXT:    slli s10, s10, 11
-; RV32IM-NEXT:    and t3, t3, s7
-; RV32IM-NEXT:    and a0, a0, s7
-; RV32IM-NEXT:    and t5, s4, s7
-; RV32IM-NEXT:    and a1, a1, s7
-; RV32IM-NEXT:    or t3, t3, t4
-; RV32IM-NEXT:    slli a0, a0, 8
-; RV32IM-NEXT:    or t4, t5, s1
-; RV32IM-NEXT:    slli a1, a1, 8
-; RV32IM-NEXT:    or a0, s2, a0
-; RV32IM-NEXT:    or a1, s3, a1
-; RV32IM-NEXT:    or a0, a0, t3
-; RV32IM-NEXT:    or a1, a1, t4
-; RV32IM-NEXT:    srli t3, a0, 4
-; RV32IM-NEXT:    sw s6, 400(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    and a0, a0, s6
-; RV32IM-NEXT:    srli t4, a1, 4
-; RV32IM-NEXT:    and a1, a1, s6
-; RV32IM-NEXT:    and t3, t3, s6
-; RV32IM-NEXT:    slli a0, a0, 4
-; RV32IM-NEXT:    and t4, t4, s6
-; RV32IM-NEXT:    slli a1, a1, 4
-; RV32IM-NEXT:    or a0, t3, a0
-; RV32IM-NEXT:    or a1, t4, a1
-; RV32IM-NEXT:    srli t3, a0, 2
-; RV32IM-NEXT:    sw s5, 404(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    and a0, a0, s5
-; RV32IM-NEXT:    srli t4, a1, 2
-; RV32IM-NEXT:    and a1, a1, s5
-; RV32IM-NEXT:    and t3, t3, s5
-; RV32IM-NEXT:    slli a0, a0, 2
-; RV32IM-NEXT:    and t4, t4, s5
-; RV32IM-NEXT:    slli a1, a1, 2
-; RV32IM-NEXT:    or a0, t3, a0
-; RV32IM-NEXT:    or a1, t4, a1
-; RV32IM-NEXT:    srli t3, a0, 1
-; RV32IM-NEXT:    sw t6, 408(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    and a0, a0, t6
-; RV32IM-NEXT:    srli t4, a1, 1
-; RV32IM-NEXT:    and a1, a1, t6
-; RV32IM-NEXT:    and t3, t3, t6
-; RV32IM-NEXT:    slli a0, a0, 1
-; RV32IM-NEXT:    and t4, t4, t6
-; RV32IM-NEXT:    slli a1, a1, 1
-; RV32IM-NEXT:    or s2, t3, a0
-; RV32IM-NEXT:    or a0, t4, a1
-; RV32IM-NEXT:    and a1, a0, s10
-; RV32IM-NEXT:    sw a1, 432(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    and a1, a0, a3
-; RV32IM-NEXT:    sw a1, 436(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    and a1, a0, a4
-; RV32IM-NEXT:    sw a1, 440(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    and a1, a0, a5
-; RV32IM-NEXT:    sw a1, 340(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    and a1, a0, a6
-; RV32IM-NEXT:    sw a1, 412(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    and a1, a0, s8
-; RV32IM-NEXT:    sw a1, 444(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    and a1, a0, a7
-; RV32IM-NEXT:    sw a1, 452(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    and ra, a0, t0
-; RV32IM-NEXT:    and a1, a0, t1
-; RV32IM-NEXT:    sw a1, 344(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    and a1, a0, t2
-; RV32IM-NEXT:    sw a1, 448(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    and a1, a0, a2
-; RV32IM-NEXT:    sw a1, 456(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    and a1, s2, s10
-; RV32IM-NEXT:    sw a1, 384(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    and a1, s2, a3
-; RV32IM-NEXT:    sw a1, 380(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    and a1, s2, a4
-; RV32IM-NEXT:    sw a1, 376(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    and a1, s2, a5
-; RV32IM-NEXT:    sw a1, 368(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    and a1, s2, a6
-; RV32IM-NEXT:    sw a1, 348(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    and a1, s2, s8
-; RV32IM-NEXT:    sw a1, 336(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    and a1, s2, a7
-; RV32IM-NEXT:    sw a1, 324(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    and a1, s2, t0
-; RV32IM-NEXT:    sw a1, 320(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    and a1, s2, t1
-; RV32IM-NEXT:    sw a1, 312(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    and a1, s2, t2
-; RV32IM-NEXT:    sw a1, 308(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    and a1, s2, a2
-; RV32IM-NEXT:    sw a1, 300(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    lui a1, 1024
-; RV32IM-NEXT:    and a2, a0, a1
-; RV32IM-NEXT:    sw a2, 424(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    and a1, s2, a1
-; RV32IM-NEXT:    sw a1, 164(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    lui a1, 2048
-; RV32IM-NEXT:    and a2, a0, a1
-; RV32IM-NEXT:    sw a2, 428(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    and a1, s2, a1
-; RV32IM-NEXT:    sw a1, 136(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    lui a1, 4096
-; RV32IM-NEXT:    and a2, a0, a1
-; RV32IM-NEXT:    sw a2, 416(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    and a1, s2, a1
-; RV32IM-NEXT:    sw a1, 132(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    lui a1, 8192
-; RV32IM-NEXT:    and s1, a0, a1
-; RV32IM-NEXT:    sw s1, 108(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    and a1, s2, a1
-; RV32IM-NEXT:    sw a1, 128(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    lui a1, 16384
-; RV32IM-NEXT:    and a2, a0, a1
-; RV32IM-NEXT:    sw a2, 48(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    and a1, s2, a1
-; RV32IM-NEXT:    sw a1, 112(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    lui a1, 32768
-; RV32IM-NEXT:    and a2, a0, a1
-; RV32IM-NEXT:    sw a2, 420(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    and a1, s2, a1
-; RV32IM-NEXT:    sw a1, 104(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    lui a1, 65536
-; RV32IM-NEXT:    and t3, a0, a1
-; RV32IM-NEXT:    sw t3, 116(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    and a1, s2, a1
-; RV32IM-NEXT:    sw a1, 100(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    lui a1, 131072
-; RV32IM-NEXT:    and a2, a0, a1
-; RV32IM-NEXT:    sw a2, 16(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    and a1, s2, a1
-; RV32IM-NEXT:    sw a1, 72(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    lui a1, 262144
-; RV32IM-NEXT:    and t2, a0, a1
-; RV32IM-NEXT:    sw t2, 120(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    and a1, s2, a1
-; RV32IM-NEXT:    sw a1, 68(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    lui a1, 524288
-; RV32IM-NEXT:    and t1, a0, a1
-; RV32IM-NEXT:    sw t1, 124(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    and a1, s2, a1
-; RV32IM-NEXT:    sw a1, 64(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    andi t0, a0, 4
-; RV32IM-NEXT:    sw t0, 96(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    andi t4, a0, 2
-; RV32IM-NEXT:    andi a7, a0, 1
-; RV32IM-NEXT:    sw a7, 92(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    andi t5, a0, 8
-; RV32IM-NEXT:    andi a6, a0, 16
-; RV32IM-NEXT:    sw a6, 84(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    andi a5, a0, 32
-; RV32IM-NEXT:    sw a5, 80(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    andi a4, a0, 64
-; RV32IM-NEXT:    sw a4, 76(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    andi a3, a0, 128
-; RV32IM-NEXT:    sw a3, 88(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    andi a2, a0, 256
-; RV32IM-NEXT:    andi a1, a0, 512
-; RV32IM-NEXT:    andi s11, a0, 1024
-; RV32IM-NEXT:    andi s3, s2, 1
-; RV32IM-NEXT:    andi s5, s2, 2
-; RV32IM-NEXT:    andi s7, s2, 4
-; RV32IM-NEXT:    andi t6, s2, 8
-; RV32IM-NEXT:    andi s0, s2, 16
-; RV32IM-NEXT:    sw s0, 392(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    andi s0, s2, 32
-; RV32IM-NEXT:    andi s4, s2, 64
-; RV32IM-NEXT:    andi s6, s2, 128
-; RV32IM-NEXT:    andi s8, s2, 256
-; RV32IM-NEXT:    andi s9, s2, 512
-; RV32IM-NEXT:    andi s10, s2, 1024
-; RV32IM-NEXT:    sw s10, 360(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    mul s10, s2, t0
-; RV32IM-NEXT:    sw s10, 292(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    mul s10, s2, t4
-; RV32IM-NEXT:    sw s10, 288(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    mul s10, s2, a7
-; RV32IM-NEXT:    sw s10, 332(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    mul s10, s2, t5
-; RV32IM-NEXT:    sw s10, 284(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    mul s10, s2, a6
-; RV32IM-NEXT:    sw s10, 280(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    mul s10, s2, a5
-; RV32IM-NEXT:    sw s10, 276(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    mul s10, s2, a4
-; RV32IM-NEXT:    sw s10, 272(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    mul s10, s2, a3
-; RV32IM-NEXT:    sw s10, 268(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    mul s10, s2, a2
-; RV32IM-NEXT:    mv t0, a2
-; RV32IM-NEXT:    sw s10, 264(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    mul s10, s2, a1
-; RV32IM-NEXT:    mv a7, a1
-; RV32IM-NEXT:    sw s10, 260(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    mul s10, s2, s11
-; RV32IM-NEXT:    mv a6, s11
-; RV32IM-NEXT:    sw s10, 256(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    lw s10, 432(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mul s10, s2, s10
-; RV32IM-NEXT:    sw s10, 252(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    lw s10, 436(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mul s10, s2, s10
-; RV32IM-NEXT:    sw s10, 248(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    lw s10, 440(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mul s10, s2, s10
-; RV32IM-NEXT:    sw s10, 244(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    lw s10, 340(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mul s11, s2, s10
-; RV32IM-NEXT:    sw s11, 240(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    lw s11, 412(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mul s11, s2, s11
-; RV32IM-NEXT:    sw s11, 236(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    lw s11, 444(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mul s11, s2, s11
-; RV32IM-NEXT:    sw s11, 232(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    lw s11, 452(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mul s11, s2, s11
-; RV32IM-NEXT:    sw s11, 228(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    mul s11, s2, ra
-; RV32IM-NEXT:    sw s11, 224(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    mv a5, ra
-; RV32IM-NEXT:    lw s11, 344(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mul ra, s2, s11
-; RV32IM-NEXT:    sw ra, 220(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    lw ra, 448(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mul ra, s2, ra
-; RV32IM-NEXT:    sw ra, 216(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    lw ra, 456(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mul ra, s2, ra
-; RV32IM-NEXT:    sw ra, 212(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    lw ra, 424(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mul ra, s2, ra
-; RV32IM-NEXT:    sw ra, 208(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    lw ra, 428(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mul ra, s2, ra
-; RV32IM-NEXT:    sw ra, 204(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    lw ra, 416(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mul ra, s2, ra
-; RV32IM-NEXT:    sw ra, 200(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    mul ra, s2, s1
-; RV32IM-NEXT:    sw ra, 196(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    lw ra, 48(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mul s1, s2, ra
-; RV32IM-NEXT:    sw s1, 192(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    lw s1, 420(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mul s1, s2, s1
-; RV32IM-NEXT:    sw s1, 188(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    mul s1, s2, t3
-; RV32IM-NEXT:    sw s1, 184(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    lw s1, 16(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mul a4, s2, s1
-; RV32IM-NEXT:    sw a4, 180(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    mul a4, s2, t2
-; RV32IM-NEXT:    sw a4, 176(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    mul a3, s2, t1
-; RV32IM-NEXT:    sw a3, 172(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    mul s2, a0, s3
-; RV32IM-NEXT:    sw s2, 352(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    mul s2, a0, s5
-; RV32IM-NEXT:    sw s2, 364(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    mul s2, a0, s7
-; RV32IM-NEXT:    sw s2, 372(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    mul a4, a0, t6
-; RV32IM-NEXT:    sw a4, 388(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    lw a1, 392(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mul a4, a0, a1
-; RV32IM-NEXT:    sw a4, 392(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    mul a2, a0, s0
-; RV32IM-NEXT:    sw a2, 160(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    mul a2, a0, s4
-; RV32IM-NEXT:    sw a2, 156(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    mul a2, a0, s6
-; RV32IM-NEXT:    sw a2, 304(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    mul a2, a0, s8
-; RV32IM-NEXT:    sw a2, 152(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    mul a2, a0, s9
-; RV32IM-NEXT:    sw a2, 148(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    lw a1, 360(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mul a2, a0, a1
-; RV32IM-NEXT:    sw a2, 296(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    lw a2, 384(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mul a2, a0, a2
-; RV32IM-NEXT:    sw a2, 316(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    lw a2, 380(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mul a4, a0, a2
-; RV32IM-NEXT:    sw a4, 328(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    lw a2, 376(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mul a4, a0, a2
-; RV32IM-NEXT:    sw a4, 356(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    lw a2, 368(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mul a4, a0, a2
-; RV32IM-NEXT:    sw a4, 360(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    lw a2, 348(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mul a4, a0, a2
-; RV32IM-NEXT:    sw a4, 368(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    lw a2, 336(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mul a4, a0, a2
-; RV32IM-NEXT:    sw a4, 376(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    lw a2, 324(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mul a4, a0, a2
-; RV32IM-NEXT:    sw a4, 380(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    lw a2, 320(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mul a4, a0, a2
-; RV32IM-NEXT:    sw a4, 384(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    lw a2, 312(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mul a2, a0, a2
-; RV32IM-NEXT:    sw a2, 144(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    lw a2, 308(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mul a2, a0, a2
-; RV32IM-NEXT:    sw a2, 140(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    lw a2, 300(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mul a2, a0, a2
-; RV32IM-NEXT:    sw a2, 168(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    lw a2, 164(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mul a2, a0, a2
-; RV32IM-NEXT:    sw a2, 308(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    lw a2, 136(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mul a2, a0, a2
-; RV32IM-NEXT:    sw a2, 320(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    lw a1, 132(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mul a2, a0, a1
-; RV32IM-NEXT:    sw a2, 132(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    lw a1, 128(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mul a2, a0, a1
-; RV32IM-NEXT:    sw a2, 128(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    lw a1, 112(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mul a2, a0, a1
-; RV32IM-NEXT:    sw a2, 164(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    lw a1, 104(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mul a2, a0, a1
-; RV32IM-NEXT:    sw a2, 300(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    lw a1, 100(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mul a2, a0, a1
-; RV32IM-NEXT:    sw a2, 312(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    lw a1, 72(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mul a4, a0, a1
-; RV32IM-NEXT:    sw a4, 324(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    lw a1, 68(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mul a4, a0, a1
-; RV32IM-NEXT:    sw a4, 336(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    lw a1, 64(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mul a4, a0, a1
-; RV32IM-NEXT:    sw a4, 348(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    mulhu t6, a0, t4
-; RV32IM-NEXT:    mul a1, a0, t4
-; RV32IM-NEXT:    sw a1, 104(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    lw a1, 92(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mul a1, a0, a1
-; RV32IM-NEXT:    sw a1, 100(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    lw a1, 96(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mulhu t2, a0, a1
-; RV32IM-NEXT:    mul a1, a0, a1
-; RV32IM-NEXT:    sw a1, 96(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    mulhu t3, a0, t5
-; RV32IM-NEXT:    mul a1, a0, t5
-; RV32IM-NEXT:    sw a1, 92(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    lw a1, 84(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mulhu t4, a0, a1
-; RV32IM-NEXT:    mul a1, a0, a1
-; RV32IM-NEXT:    sw a1, 84(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    lw a1, 80(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mulhu s0, a0, a1
-; RV32IM-NEXT:    mul a1, a0, a1
-; RV32IM-NEXT:    sw a1, 80(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    lw a1, 76(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mulhu s2, a0, a1
-; RV32IM-NEXT:    mul a1, a0, a1
-; RV32IM-NEXT:    sw a1, 112(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    lw a1, 88(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mulhu s3, a0, a1
-; RV32IM-NEXT:    mul a1, a0, a1
-; RV32IM-NEXT:    sw a1, 136(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    mulhu a2, a0, t0
-; RV32IM-NEXT:    sw a2, 64(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    mul a1, a0, t0
-; RV32IM-NEXT:    sw a1, 76(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    mulhu a2, a0, a7
-; RV32IM-NEXT:    sw a2, 60(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    mul a1, a0, a7
-; RV32IM-NEXT:    sw a1, 72(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    mulhu a2, a0, a6
-; RV32IM-NEXT:    sw a2, 52(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    mul a1, a0, a6
-; RV32IM-NEXT:    sw a1, 88(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    lw a1, 432(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mulhu a2, a0, a1
-; RV32IM-NEXT:    sw a2, 40(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    mul a1, a0, a1
-; RV32IM-NEXT:    sw a1, 432(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    lw a1, 436(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mulhu a2, a0, a1
-; RV32IM-NEXT:    sw a2, 36(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    mul a1, a0, a1
-; RV32IM-NEXT:    sw a1, 436(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    lw a1, 440(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mulhu a2, a0, a1
-; RV32IM-NEXT:    sw a2, 32(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    mul a1, a0, a1
-; RV32IM-NEXT:    sw a1, 68(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    mulhu a2, a0, s10
-; RV32IM-NEXT:    sw a2, 28(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    mul a1, a0, s10
-; RV32IM-NEXT:    sw a1, 340(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    lw a1, 412(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mulhu a2, a0, a1
-; RV32IM-NEXT:    sw a2, 24(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    mul a1, a0, a1
-; RV32IM-NEXT:    sw a1, 412(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    lw a1, 444(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mulhu a2, a0, a1
-; RV32IM-NEXT:    sw a2, 20(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    mul a1, a0, a1
-; RV32IM-NEXT:    sw a1, 440(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    lw a1, 452(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mulhu a2, a0, a1
-; RV32IM-NEXT:    sw a2, 12(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    mul a1, a0, a1
-; RV32IM-NEXT:    sw a1, 444(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    mulhu a2, a0, a5
-; RV32IM-NEXT:    sw a2, 8(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    mul a1, a0, a5
-; RV32IM-NEXT:    sw a1, 452(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    mv a1, s11
-; RV32IM-NEXT:    mulhu s11, a0, s11
-; RV32IM-NEXT:    mul a1, a0, a1
-; RV32IM-NEXT:    sw a1, 56(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    lw a1, 448(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mulhu a5, a0, a1
-; RV32IM-NEXT:    mul a1, a0, a1
-; RV32IM-NEXT:    sw a1, 44(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    lw a1, 456(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mulhu s10, a0, a1
-; RV32IM-NEXT:    mul a1, a0, a1
-; RV32IM-NEXT:    sw a1, 344(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    lw a1, 424(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mulhu s8, a0, a1
-; RV32IM-NEXT:    mul a1, a0, a1
-; RV32IM-NEXT:    sw a1, 424(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    lw a1, 428(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mulhu s9, a0, a1
-; RV32IM-NEXT:    mul a1, a0, a1
-; RV32IM-NEXT:    sw a1, 456(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    lw a1, 416(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mulhu a7, a0, a1
-; RV32IM-NEXT:    mul a1, a0, a1
-; RV32IM-NEXT:    sw a1, 4(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    lw a1, 108(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mulhu a3, a0, a1
-; RV32IM-NEXT:    mul a1, a0, a1
-; RV32IM-NEXT:    sw a1, 0(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    mulhu a2, a0, ra
-; RV32IM-NEXT:    mul a1, a0, ra
-; RV32IM-NEXT:    sw a1, 48(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    lw a1, 420(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mulhu t5, a0, a1
-; RV32IM-NEXT:    mul a1, a0, a1
-; RV32IM-NEXT:    sw a1, 108(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    lw a1, 116(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mulhu t0, a0, a1
-; RV32IM-NEXT:    mul a1, a0, a1
-; RV32IM-NEXT:    sw a1, 416(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    mulhu a6, a0, s1
-; RV32IM-NEXT:    mul a1, a0, s1
-; RV32IM-NEXT:    sw a1, 420(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    lw a1, 120(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mulhu a4, a0, a1
-; RV32IM-NEXT:    mul a1, a0, a1
-; RV32IM-NEXT:    sw a1, 428(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    lw t1, 124(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    mulhu a1, a0, t1
-; RV32IM-NEXT:    mul a0, a0, t1
-; RV32IM-NEXT:    sw a0, 448(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    lw a0, 292(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    or a0, t2, a0
-; RV32IM-NEXT:    sw a0, 116(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    lw a0, 288(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    or s7, t6, a0
-; RV32IM-NEXT:    lw a0, 284(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    or s5, t3, a0
-; RV32IM-NEXT:    lw a0, 280(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    or s6, t4, a0
-; RV32IM-NEXT:    lw a0, 276(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    or s4, s0, a0
-; RV32IM-NEXT:    lw a0, 272(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    or a0, s2, a0
-; RV32IM-NEXT:    sw a0, 124(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    lw a0, 268(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    or s3, s3, a0
-; RV32IM-NEXT:    lw a0, 264(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw t1, 64(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    or s2, t1, a0
-; RV32IM-NEXT:    lw a0, 260(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw t1, 60(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    or a0, t1, a0
-; RV32IM-NEXT:    sw a0, 120(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    lw a0, 256(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw t1, 52(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    or a0, t1, a0
-; RV32IM-NEXT:    sw a0, 272(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    lw a0, 252(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw s0, 40(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    or s0, s0, a0
-; RV32IM-NEXT:    lw a0, 248(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw t1, 36(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    or t6, t1, a0
-; RV32IM-NEXT:    lw a0, 244(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw t1, 32(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    or a0, t1, a0
-; RV32IM-NEXT:    sw a0, 252(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    lw a0, 240(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw t1, 28(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    or a0, t1, a0
-; RV32IM-NEXT:    sw a0, 264(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    lw a0, 236(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw t1, 24(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    or a0, t1, a0
-; RV32IM-NEXT:    sw a0, 284(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    lw a0, 232(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw t1, 20(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    or t4, t1, a0
-; RV32IM-NEXT:    lw a0, 228(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw t1, 12(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    or t3, t1, a0
-; RV32IM-NEXT:    lw a0, 224(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw t1, 8(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    or a0, t1, a0
-; RV32IM-NEXT:    sw a0, 248(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    lw a0, 220(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    or a0, s11, a0
-; RV32IM-NEXT:    sw a0, 260(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    lw a0, 216(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    or a0, a5, a0
-; RV32IM-NEXT:    sw a0, 276(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    lw a0, 212(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    or a0, s10, a0
-; RV32IM-NEXT:    sw a0, 288(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    lw a0, 208(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    or s8, s8, a0
-; RV32IM-NEXT:    lw a0, 204(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    or s10, s9, a0
-; RV32IM-NEXT:    lw a0, 200(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    or s11, a7, a0
-; RV32IM-NEXT:    lw a0, 196(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    or a0, a3, a0
-; RV32IM-NEXT:    sw a0, 256(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    lw a0, 192(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    or a0, a2, a0
-; RV32IM-NEXT:    sw a0, 268(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    lw a0, 188(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    or a0, t5, a0
-; RV32IM-NEXT:    sw a0, 280(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    lw a0, 184(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    or a0, t0, a0
-; RV32IM-NEXT:    sw a0, 292(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    lw a0, 180(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    or t2, a6, a0
-; RV32IM-NEXT:    lw a0, 176(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    or t1, a4, a0
-; RV32IM-NEXT:    lw s1, 172(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    or s1, a1, s1
-; RV32IM-NEXT:    lw a0, 160(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw a1, 156(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    xor t0, a0, a1
-; RV32IM-NEXT:    lw a0, 152(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw a1, 148(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    xor t5, a0, a1
-; RV32IM-NEXT:    lw a0, 144(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw a1, 140(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    xor a6, a0, a1
-; RV32IM-NEXT:    lw a0, 132(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw a1, 128(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    xor s9, a0, a1
-; RV32IM-NEXT:    lw a0, 104(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw a1, 100(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    xor a7, a1, a0
-; RV32IM-NEXT:    lw a0, 96(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw a1, 92(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    xor a0, a0, a1
-; RV32IM-NEXT:    lw a1, 84(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw a2, 80(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    xor a1, a1, a2
-; RV32IM-NEXT:    lw a2, 76(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw a3, 72(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    xor a2, a2, a3
-; RV32IM-NEXT:    lw a3, 68(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw a4, 340(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    xor a3, a3, a4
-; RV32IM-NEXT:    lw a4, 56(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw a5, 44(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    xor a4, a4, a5
-; RV32IM-NEXT:    lw a5, 4(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw ra, 0(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    xor a5, a5, ra
-; RV32IM-NEXT:    lw ra, 332(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    xor s7, ra, s7
-; RV32IM-NEXT:    lw ra, 116(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    xor s5, ra, s5
-; RV32IM-NEXT:    xor s4, s6, s4
-; RV32IM-NEXT:    xor s2, s3, s2
-; RV32IM-NEXT:    xor t6, s0, t6
-; RV32IM-NEXT:    xor t3, t4, t3
-; RV32IM-NEXT:    xor t4, s8, s10
-; RV32IM-NEXT:    xor t1, t2, t1
-; RV32IM-NEXT:    lw t2, 304(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    xor t0, t0, t2
-; RV32IM-NEXT:    lw t2, 296(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    xor t2, t5, t2
-; RV32IM-NEXT:    lw t5, 168(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    xor a6, a6, t5
-; RV32IM-NEXT:    lw t5, 164(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    xor t5, s9, t5
-; RV32IM-NEXT:    xor a0, a7, a0
-; RV32IM-NEXT:    lw a7, 112(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    xor a1, a1, a7
-; RV32IM-NEXT:    lw a7, 88(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    xor a2, a2, a7
-; RV32IM-NEXT:    lw a7, 412(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    xor a3, a3, a7
-; RV32IM-NEXT:    lw a7, 344(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    xor a4, a4, a7
-; RV32IM-NEXT:    lw a7, 48(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    xor a5, a5, a7
-; RV32IM-NEXT:    xor a7, s7, s5
-; RV32IM-NEXT:    lw s0, 124(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    xor s0, s4, s0
-; RV32IM-NEXT:    lw s3, 120(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    xor s2, s2, s3
-; RV32IM-NEXT:    lw s3, 252(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    xor t6, t6, s3
-; RV32IM-NEXT:    lw s3, 248(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    xor t3, t3, s3
-; RV32IM-NEXT:    xor t4, t4, s11
-; RV32IM-NEXT:    xor t1, t1, s1
-; RV32IM-NEXT:    lw s1, 316(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    xor t2, t2, s1
-; RV32IM-NEXT:    lw s1, 308(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    xor a6, a6, s1
-; RV32IM-NEXT:    lw s1, 300(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    xor t5, t5, s1
-; RV32IM-NEXT:    xor a0, a0, a1
-; RV32IM-NEXT:    lw a1, 432(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    xor a1, a2, a1
-; RV32IM-NEXT:    lw a2, 440(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    xor a2, a3, a2
-; RV32IM-NEXT:    lw a3, 424(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    xor a3, a4, a3
-; RV32IM-NEXT:    lw a4, 108(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    xor a4, a5, a4
-; RV32IM-NEXT:    xor a5, a7, s0
-; RV32IM-NEXT:    lw a7, 272(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    xor a7, s2, a7
-; RV32IM-NEXT:    lw s0, 264(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    xor t6, t6, s0
-; RV32IM-NEXT:    lw s0, 260(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    xor t3, t3, s0
-; RV32IM-NEXT:    lw s0, 256(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    xor t4, t4, s0
-; RV32IM-NEXT:    lw s0, 352(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    xor t1, t1, s0
-; RV32IM-NEXT:    lw s0, 328(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    xor t2, t2, s0
-; RV32IM-NEXT:    lw s0, 320(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    xor a6, a6, s0
-; RV32IM-NEXT:    lw s0, 312(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    xor t5, t5, s0
-; RV32IM-NEXT:    lw s0, 136(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    xor a0, a0, s0
-; RV32IM-NEXT:    lw s0, 436(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    xor a1, a1, s0
-; RV32IM-NEXT:    lw s0, 444(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    xor a2, a2, s0
-; RV32IM-NEXT:    lw s0, 456(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    xor a3, a3, s0
-; RV32IM-NEXT:    lw s0, 416(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    xor a4, a4, s0
-; RV32IM-NEXT:    xor a5, a5, a7
-; RV32IM-NEXT:    lw a7, 284(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    xor a7, t6, a7
-; RV32IM-NEXT:    lw t6, 276(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    xor t3, t3, t6
-; RV32IM-NEXT:    lw t6, 268(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    xor t4, t4, t6
-; RV32IM-NEXT:    lw t6, 364(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    xor t1, t1, t6
-; RV32IM-NEXT:    lw t6, 356(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    xor t2, t2, t6
-; RV32IM-NEXT:    lw t6, 324(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    xor t5, t5, t6
-; RV32IM-NEXT:    lw t6, 452(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    xor a2, a2, t6
-; RV32IM-NEXT:    lw t6, 420(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    xor a4, a4, t6
-; RV32IM-NEXT:    xor a5, a5, a7
-; RV32IM-NEXT:    lw a7, 288(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    xor a7, t3, a7
-; RV32IM-NEXT:    lw t3, 280(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    xor t3, t4, t3
-; RV32IM-NEXT:    lw t4, 372(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    xor t1, t1, t4
-; RV32IM-NEXT:    lw t4, 360(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    xor t2, t2, t4
-; RV32IM-NEXT:    lw t4, 336(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    xor t4, t5, t4
-; RV32IM-NEXT:    xor a1, a0, a1
-; RV32IM-NEXT:    xor a1, a1, a2
-; RV32IM-NEXT:    lw a2, 428(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    xor a2, a4, a2
-; RV32IM-NEXT:    xor a4, a5, a7
-; RV32IM-NEXT:    lw a5, 292(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    xor a5, t3, a5
-; RV32IM-NEXT:    lw a7, 388(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    xor a7, t1, a7
-; RV32IM-NEXT:    lw t1, 368(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    xor t1, t2, t1
-; RV32IM-NEXT:    lw t2, 348(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    xor t2, t4, t2
-; RV32IM-NEXT:    xor a1, a1, a3
-; RV32IM-NEXT:    lw a3, 448(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    xor a2, a2, a3
-; RV32IM-NEXT:    xor a4, a4, a5
-; RV32IM-NEXT:    lw a3, 392(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    xor a3, a7, a3
-; RV32IM-NEXT:    lw a5, 376(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    xor a5, t1, a5
-; RV32IM-NEXT:    xor a3, a4, a3
-; RV32IM-NEXT:    lw a4, 380(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    xor a4, a5, a4
-; RV32IM-NEXT:    xor a3, a3, t0
-; RV32IM-NEXT:    slli a0, a0, 24
-; RV32IM-NEXT:    lw a5, 384(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    xor a4, a4, a5
-; RV32IM-NEXT:    lw a7, 396(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    and a5, a1, a7
-; RV32IM-NEXT:    slli a5, a5, 8
-; RV32IM-NEXT:    or a0, a0, a5
-; RV32IM-NEXT:    xor a2, a1, a2
-; RV32IM-NEXT:    srli a1, a1, 8
-; RV32IM-NEXT:    and a1, a1, a7
-; RV32IM-NEXT:    srli a2, a2, 24
-; RV32IM-NEXT:    or a1, a1, a2
-; RV32IM-NEXT:    or a0, a0, a1
-; RV32IM-NEXT:    xor a4, a3, a4
-; RV32IM-NEXT:    xor a1, a4, a6
-; RV32IM-NEXT:    and a2, a1, a7
-; RV32IM-NEXT:    xor a4, a1, t2
-; RV32IM-NEXT:    srli a1, a1, 8
-; RV32IM-NEXT:    and a1, a1, a7
-; RV32IM-NEXT:    srli a5, a0, 4
-; RV32IM-NEXT:    lw a6, 400(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    and a0, a0, a6
-; RV32IM-NEXT:    and a5, a5, a6
-; RV32IM-NEXT:    slli a0, a0, 4
-; RV32IM-NEXT:    or a0, a5, a0
-; RV32IM-NEXT:    slli a3, a3, 24
-; RV32IM-NEXT:    slli a2, a2, 8
-; RV32IM-NEXT:    or a2, a3, a2
-; RV32IM-NEXT:    srli a4, a4, 24
-; RV32IM-NEXT:    or a1, a1, a4
-; RV32IM-NEXT:    or a1, a2, a1
-; RV32IM-NEXT:    srli a2, a0, 2
-; RV32IM-NEXT:    lw a3, 404(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    and a0, a0, a3
-; RV32IM-NEXT:    and a2, a2, a3
-; RV32IM-NEXT:    slli a0, a0, 2
-; RV32IM-NEXT:    or a0, a2, a0
-; RV32IM-NEXT:    srli a2, a1, 4
-; RV32IM-NEXT:    and a1, a1, a6
-; RV32IM-NEXT:    and a2, a2, a6
-; RV32IM-NEXT:    slli a1, a1, 4
-; RV32IM-NEXT:    or a1, a2, a1
-; RV32IM-NEXT:    srli a2, a1, 2
-; RV32IM-NEXT:    and a1, a1, a3
-; RV32IM-NEXT:    and a2, a2, a3
-; RV32IM-NEXT:    srli a3, a0, 1
-; RV32IM-NEXT:    lw a5, 408(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    and a4, a0, a5
-; RV32IM-NEXT:    and a3, a3, a5
-; RV32IM-NEXT:    slli a1, a1, 2
-; RV32IM-NEXT:    or a1, a2, a1
-; RV32IM-NEXT:    srli a0, a1, 1
-; RV32IM-NEXT:    and a1, a1, a5
-; RV32IM-NEXT:    and a0, a0, a5
-; RV32IM-NEXT:    slli a1, a1, 1
-; RV32IM-NEXT:    or a0, a0, a1
-; RV32IM-NEXT:    slli a1, a4, 1
-; RV32IM-NEXT:    or a1, a3, a1
-; RV32IM-NEXT:    lw ra, 508(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw s0, 504(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw s1, 500(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw s2, 496(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw s3, 492(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw s4, 488(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw s5, 484(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw s6, 480(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw s7, 476(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw s8, 472(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw s9, 468(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw s10, 464(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw s11, 460(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    addi sp, sp, 512
-; RV32IM-NEXT:    ret
-;
-; RV64IM-LABEL: clmulr_i64:
-; RV64IM:       # %bb.0:
-; RV64IM-NEXT:    addi sp, sp, -448
-; RV64IM-NEXT:    sd ra, 440(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    sd s0, 432(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    sd s1, 424(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    sd s2, 416(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    sd s3, 408(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    sd s4, 400(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    sd s5, 392(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    sd s6, 384(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    sd s7, 376(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    sd s8, 368(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    sd s9, 360(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    sd s10, 352(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    sd s11, 344(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    srli a2, a0, 24
-; RV64IM-NEXT:    srli a6, a0, 8
-; RV64IM-NEXT:    li a3, 255
-; RV64IM-NEXT:    srli a5, a0, 40
-; RV64IM-NEXT:    lui s3, 16
-; RV64IM-NEXT:    srli s0, a0, 56
-; RV64IM-NEXT:    srliw t2, a0, 24
-; RV64IM-NEXT:    slli t0, a0, 56
-; RV64IM-NEXT:    lui t3, 61681
-; RV64IM-NEXT:    lui t4, 209715
-; RV64IM-NEXT:    lui t6, 349525
-; RV64IM-NEXT:    li a7, 1
-; RV64IM-NEXT:    lui s5, 2
-; RV64IM-NEXT:    lui t1, 4
-; RV64IM-NEXT:    lui a4, 128
-; RV64IM-NEXT:    lui s7, 256
-; RV64IM-NEXT:    lui s8, 4096
-; RV64IM-NEXT:    lui s10, 8192
-; RV64IM-NEXT:    lui a1, 4080
-; RV64IM-NEXT:    and a2, a2, a1
-; RV64IM-NEXT:    slli a3, a3, 24
-; RV64IM-NEXT:    sd a3, 336(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    addi s1, s3, -256
-; RV64IM-NEXT:    and t5, a0, a1
-; RV64IM-NEXT:    slli a1, t2, 32
-; RV64IM-NEXT:    addi s9, t3, -241
-; RV64IM-NEXT:    addi t4, t4, 819
-; RV64IM-NEXT:    addi t2, t6, 1365
-; RV64IM-NEXT:    slli t3, a7, 11
-; RV64IM-NEXT:    slli s11, a7, 32
-; RV64IM-NEXT:    slli ra, a7, 33
-; RV64IM-NEXT:    slli t6, a7, 34
-; RV64IM-NEXT:    slli s2, a7, 35
-; RV64IM-NEXT:    slli s4, a7, 36
-; RV64IM-NEXT:    sd s4, 256(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    and a3, a6, a3
-; RV64IM-NEXT:    or a2, a3, a2
-; RV64IM-NEXT:    slli a3, a7, 37
-; RV64IM-NEXT:    sd a3, 248(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    sd s1, 304(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    and a3, a5, s1
-; RV64IM-NEXT:    or a3, a3, s0
-; RV64IM-NEXT:    slli a5, a7, 38
-; RV64IM-NEXT:    sd a5, 232(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    slli t5, t5, 24
-; RV64IM-NEXT:    and a0, a0, s1
-; RV64IM-NEXT:    or a1, t5, a1
-; RV64IM-NEXT:    slli a5, s9, 32
-; RV64IM-NEXT:    add a5, s9, a5
-; RV64IM-NEXT:    slli s0, t4, 32
-; RV64IM-NEXT:    add t4, t4, s0
-; RV64IM-NEXT:    slli s4, t2, 32
-; RV64IM-NEXT:    slli a0, a0, 40
-; RV64IM-NEXT:    add t2, t2, s4
-; RV64IM-NEXT:    or a2, a2, a3
-; RV64IM-NEXT:    or a0, t0, a0
-; RV64IM-NEXT:    or a0, a0, a1
-; RV64IM-NEXT:    or a0, a0, a2
-; RV64IM-NEXT:    srli a1, a0, 4
-; RV64IM-NEXT:    sd a5, 312(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    and a0, a0, a5
-; RV64IM-NEXT:    and a1, a1, a5
-; RV64IM-NEXT:    slli a0, a0, 4
-; RV64IM-NEXT:    or a0, a1, a0
-; RV64IM-NEXT:    srli a1, a0, 2
-; RV64IM-NEXT:    sd t4, 320(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    and a0, a0, t4
-; RV64IM-NEXT:    and a1, a1, t4
-; RV64IM-NEXT:    slli a0, a0, 2
-; RV64IM-NEXT:    or a0, a1, a0
-; RV64IM-NEXT:    srli a1, a0, 1
-; RV64IM-NEXT:    sd t2, 328(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    and a0, a0, t2
-; RV64IM-NEXT:    and a1, a1, t2
-; RV64IM-NEXT:    slli a0, a0, 1
-; RV64IM-NEXT:    or t0, a1, a0
-; RV64IM-NEXT:    andi a0, t0, 2
-; RV64IM-NEXT:    andi a1, t0, 1
-; RV64IM-NEXT:    andi a2, t0, 4
-; RV64IM-NEXT:    andi a3, t0, 8
-; RV64IM-NEXT:    andi a5, t0, 16
-; RV64IM-NEXT:    mul a0, t0, a0
-; RV64IM-NEXT:    mul a1, t0, a1
-; RV64IM-NEXT:    xor a0, a1, a0
-; RV64IM-NEXT:    sd a0, 296(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    andi a0, t0, 32
-; RV64IM-NEXT:    mul a1, t0, a2
-; RV64IM-NEXT:    mul a2, t0, a3
-; RV64IM-NEXT:    xor a1, a1, a2
-; RV64IM-NEXT:    sd a1, 288(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    andi a1, t0, 256
-; RV64IM-NEXT:    mul a2, t0, a5
-; RV64IM-NEXT:    mul a0, t0, a0
-; RV64IM-NEXT:    xor a0, a2, a0
-; RV64IM-NEXT:    sd a0, 280(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    andi a0, t0, 512
-; RV64IM-NEXT:    mul a1, t0, a1
-; RV64IM-NEXT:    mul a0, t0, a0
-; RV64IM-NEXT:    xor a0, a1, a0
-; RV64IM-NEXT:    sd a0, 272(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    slli t4, a7, 39
-; RV64IM-NEXT:    and a0, t0, s5
-; RV64IM-NEXT:    and a1, t0, t1
-; RV64IM-NEXT:    mul a0, t0, a0
-; RV64IM-NEXT:    mul a1, t0, a1
-; RV64IM-NEXT:    xor a0, a0, a1
-; RV64IM-NEXT:    sd a0, 264(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    slli a0, a7, 40
-; RV64IM-NEXT:    and a1, t0, a4
-; RV64IM-NEXT:    and a2, t0, s7
-; RV64IM-NEXT:    mul a1, t0, a1
-; RV64IM-NEXT:    mul a2, t0, a2
-; RV64IM-NEXT:    xor a1, a1, a2
-; RV64IM-NEXT:    sd a1, 240(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    slli a1, a7, 41
-; RV64IM-NEXT:    and a2, t0, s8
-; RV64IM-NEXT:    and a3, t0, s10
-; RV64IM-NEXT:    mul a2, t0, a2
-; RV64IM-NEXT:    mul a3, t0, a3
-; RV64IM-NEXT:    xor a2, a2, a3
-; RV64IM-NEXT:    sd a2, 224(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    slli a2, a7, 48
-; RV64IM-NEXT:    and a3, t0, s11
-; RV64IM-NEXT:    and a4, t0, ra
-; RV64IM-NEXT:    mul a3, t0, a3
-; RV64IM-NEXT:    mul a4, t0, a4
-; RV64IM-NEXT:    xor a3, a3, a4
-; RV64IM-NEXT:    sd a3, 216(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    slli a3, a7, 49
-; RV64IM-NEXT:    and a0, t0, a0
-; RV64IM-NEXT:    and a1, t0, a1
-; RV64IM-NEXT:    mul a0, t0, a0
-; RV64IM-NEXT:    mul a1, t0, a1
-; RV64IM-NEXT:    xor a0, a0, a1
-; RV64IM-NEXT:    sd a0, 208(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    slli a0, a7, 56
-; RV64IM-NEXT:    and a1, t0, a2
-; RV64IM-NEXT:    and a2, t0, a3
-; RV64IM-NEXT:    mul a1, t0, a1
-; RV64IM-NEXT:    mul a2, t0, a2
-; RV64IM-NEXT:    xor a1, a1, a2
-; RV64IM-NEXT:    sd a1, 200(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    slli a1, a7, 57
-; RV64IM-NEXT:    and a0, t0, a0
-; RV64IM-NEXT:    and a1, t0, a1
-; RV64IM-NEXT:    mul a0, t0, a0
-; RV64IM-NEXT:    mul a1, t0, a1
-; RV64IM-NEXT:    xor a0, a0, a1
-; RV64IM-NEXT:    sd a0, 192(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    slli a2, a7, 42
-; RV64IM-NEXT:    slli ra, a7, 43
-; RV64IM-NEXT:    slli a3, a7, 44
-; RV64IM-NEXT:    slli a4, a7, 45
-; RV64IM-NEXT:    slli t5, a7, 46
-; RV64IM-NEXT:    slli s0, a7, 47
-; RV64IM-NEXT:    slli s1, a7, 50
-; RV64IM-NEXT:    slli a0, a7, 51
-; RV64IM-NEXT:    sd a0, 184(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    slli a0, a7, 52
-; RV64IM-NEXT:    sd a0, 176(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    slli a0, a7, 53
-; RV64IM-NEXT:    sd a0, 168(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    slli a0, a7, 54
-; RV64IM-NEXT:    sd a0, 160(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    slli a0, a7, 55
-; RV64IM-NEXT:    sd a0, 152(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    slli a0, a7, 58
-; RV64IM-NEXT:    sd a0, 144(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    slli a0, a7, 59
-; RV64IM-NEXT:    sd a0, 136(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    slli a0, a7, 60
-; RV64IM-NEXT:    sd a0, 120(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    slli a0, a7, 61
-; RV64IM-NEXT:    sd a0, 80(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    slli a7, a7, 62
-; RV64IM-NEXT:    sd a7, 48(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    and a0, t0, t3
-; RV64IM-NEXT:    sd a0, 128(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    lui s7, 1
-; RV64IM-NEXT:    and a0, t0, s7
-; RV64IM-NEXT:    sd a0, 112(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    lui s8, 8
-; RV64IM-NEXT:    and a0, t0, s8
-; RV64IM-NEXT:    sd a0, 104(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    and a0, t0, s3
-; RV64IM-NEXT:    sd a0, 96(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    lui s6, 32
-; RV64IM-NEXT:    and a0, t0, s6
-; RV64IM-NEXT:    sd a0, 88(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    lui s10, 64
-; RV64IM-NEXT:    and a0, t0, s10
-; RV64IM-NEXT:    sd a0, 72(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    lui s11, 512
-; RV64IM-NEXT:    and a0, t0, s11
-; RV64IM-NEXT:    sd a0, 64(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    lui s4, 1024
-; RV64IM-NEXT:    and a0, t0, s4
-; RV64IM-NEXT:    sd a0, 56(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    lui s5, 2048
-; RV64IM-NEXT:    and a0, t0, s5
-; RV64IM-NEXT:    sd a0, 40(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    lui s9, 16384
-; RV64IM-NEXT:    and a0, t0, s9
-; RV64IM-NEXT:    sd a0, 32(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    lui a5, 32768
-; RV64IM-NEXT:    and a5, t0, a5
-; RV64IM-NEXT:    lui a6, 65536
-; RV64IM-NEXT:    and a6, t0, a6
-; RV64IM-NEXT:    lui t1, 131072
-; RV64IM-NEXT:    and t1, t0, t1
-; RV64IM-NEXT:    lui t2, 262144
-; RV64IM-NEXT:    and t2, t0, t2
-; RV64IM-NEXT:    and a0, t0, t6
-; RV64IM-NEXT:    sd a0, 24(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    and a0, t0, s2
-; RV64IM-NEXT:    sd a0, 16(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    ld a0, 256(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    and a0, t0, a0
-; RV64IM-NEXT:    sd a0, 8(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    ld a0, 248(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    and a0, t0, a0
-; RV64IM-NEXT:    sd a0, 0(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    ld a0, 232(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    and a0, t0, a0
-; RV64IM-NEXT:    and a1, t0, t4
-; RV64IM-NEXT:    and a7, t0, a2
-; RV64IM-NEXT:    and ra, t0, ra
-; RV64IM-NEXT:    and t3, t0, a3
-; RV64IM-NEXT:    and t4, t0, a4
-; RV64IM-NEXT:    and t5, t0, t5
-; RV64IM-NEXT:    and t6, t0, s0
-; RV64IM-NEXT:    and s0, t0, s1
-; RV64IM-NEXT:    ld a2, 184(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    and s1, t0, a2
-; RV64IM-NEXT:    ld a2, 176(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    and s2, t0, a2
-; RV64IM-NEXT:    ld a2, 168(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    and s3, t0, a2
-; RV64IM-NEXT:    ld a2, 160(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    and s4, t0, a2
-; RV64IM-NEXT:    ld a2, 152(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    and s5, t0, a2
-; RV64IM-NEXT:    ld a2, 144(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    and s6, t0, a2
-; RV64IM-NEXT:    ld a2, 136(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    and s7, t0, a2
-; RV64IM-NEXT:    ld a2, 120(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    and s8, t0, a2
-; RV64IM-NEXT:    ld a2, 80(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    and s9, t0, a2
-; RV64IM-NEXT:    ld a2, 48(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    and s10, t0, a2
-; RV64IM-NEXT:    andi s11, t0, 64
-; RV64IM-NEXT:    mul a2, t0, s11
-; RV64IM-NEXT:    sd a2, 80(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    andi s11, t0, 128
-; RV64IM-NEXT:    mul a2, t0, s11
-; RV64IM-NEXT:    sd a2, 232(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    andi s11, t0, 1024
-; RV64IM-NEXT:    mul a2, t0, s11
-; RV64IM-NEXT:    sd a2, 48(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    ld a2, 128(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    mul a2, t0, a2
-; RV64IM-NEXT:    sd a2, 120(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    ld a2, 112(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    mul a2, t0, a2
-; RV64IM-NEXT:    sd a2, 176(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    ld a2, 104(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    mul s11, t0, a2
-; RV64IM-NEXT:    ld a2, 96(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    mul a2, t0, a2
-; RV64IM-NEXT:    sd a2, 104(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    ld a2, 88(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    mul a2, t0, a2
-; RV64IM-NEXT:    sd a2, 168(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    ld a2, 72(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    mul a2, t0, a2
-; RV64IM-NEXT:    sd a2, 256(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    ld a2, 64(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    mul a4, t0, a2
-; RV64IM-NEXT:    ld a2, 56(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    mul a2, t0, a2
-; RV64IM-NEXT:    sd a2, 96(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    ld a2, 40(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    mul a2, t0, a2
-; RV64IM-NEXT:    sd a2, 136(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    ld a2, 32(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    mul a3, t0, a2
-; RV64IM-NEXT:    mul a2, t0, a5
-; RV64IM-NEXT:    sd a2, 88(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    mul a2, t0, a6
-; RV64IM-NEXT:    sd a2, 128(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    mul a2, t0, t1
-; RV64IM-NEXT:    sd a2, 160(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    mul a2, t0, t2
-; RV64IM-NEXT:    sd a2, 248(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    srliw t2, t0, 31
-; RV64IM-NEXT:    slli t2, t2, 31
-; RV64IM-NEXT:    ld a2, 24(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    mul a2, t0, a2
-; RV64IM-NEXT:    ld a5, 16(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    mul a5, t0, a5
-; RV64IM-NEXT:    ld a6, 8(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    mul t1, t0, a6
-; RV64IM-NEXT:    ld a6, 0(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    mul a6, t0, a6
-; RV64IM-NEXT:    sd a6, 112(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    mul a0, t0, a0
-; RV64IM-NEXT:    sd a0, 152(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    mul a0, t0, a1
-; RV64IM-NEXT:    sd a0, 184(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    mul a7, t0, a7
-; RV64IM-NEXT:    mul ra, t0, ra
-; RV64IM-NEXT:    mul a6, t0, t3
-; RV64IM-NEXT:    mul t4, t0, t4
-; RV64IM-NEXT:    mul t5, t0, t5
-; RV64IM-NEXT:    mul a0, t0, t6
-; RV64IM-NEXT:    sd a0, 144(sp) # 8-byte Folded Spill
-; RV64IM-NEXT:    mul t6, t0, s0
-; RV64IM-NEXT:    mul s0, t0, s1
-; RV64IM-NEXT:    mul s1, t0, s2
-; RV64IM-NEXT:    mul s2, t0, s3
-; RV64IM-NEXT:    mul s3, t0, s4
-; RV64IM-NEXT:    mul s4, t0, s5
-; RV64IM-NEXT:    mul s5, t0, s6
-; RV64IM-NEXT:    mul s6, t0, s7
-; RV64IM-NEXT:    mul s7, t0, s8
-; RV64IM-NEXT:    mul s8, t0, s9
-; RV64IM-NEXT:    mul s9, t0, s10
-; RV64IM-NEXT:    srli s10, t0, 63
-; RV64IM-NEXT:    slli s10, s10, 63
-; RV64IM-NEXT:    mul t2, t0, t2
-; RV64IM-NEXT:    mul t0, t0, s10
-; RV64IM-NEXT:    ld a0, 296(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    ld a1, 288(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor s10, a0, a1
-; RV64IM-NEXT:    ld a0, 280(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    ld a1, 80(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor a0, a0, a1
-; RV64IM-NEXT:    ld a1, 272(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    ld t3, 48(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor a1, a1, t3
-; RV64IM-NEXT:    ld t3, 264(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor s11, t3, s11
-; RV64IM-NEXT:    ld t3, 240(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor a4, t3, a4
-; RV64IM-NEXT:    ld t3, 224(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor a3, t3, a3
-; RV64IM-NEXT:    ld t3, 216(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor a2, t3, a2
-; RV64IM-NEXT:    ld t3, 208(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor a7, t3, a7
-; RV64IM-NEXT:    ld t3, 200(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor t6, t3, t6
-; RV64IM-NEXT:    ld t3, 192(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor s5, t3, s5
-; RV64IM-NEXT:    xor a0, s10, a0
-; RV64IM-NEXT:    ld t3, 120(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor a1, a1, t3
-; RV64IM-NEXT:    ld t3, 104(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor s10, s11, t3
-; RV64IM-NEXT:    ld t3, 96(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor a4, a4, t3
-; RV64IM-NEXT:    ld t3, 88(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor a3, a3, t3
-; RV64IM-NEXT:    xor a2, a2, a5
-; RV64IM-NEXT:    xor a5, a7, ra
-; RV64IM-NEXT:    xor a7, t6, s0
-; RV64IM-NEXT:    xor t6, s5, s6
-; RV64IM-NEXT:    ld t3, 232(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor a0, a0, t3
-; RV64IM-NEXT:    ld t3, 176(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor a1, a1, t3
-; RV64IM-NEXT:    ld t3, 168(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor s0, s10, t3
-; RV64IM-NEXT:    ld t3, 136(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor a4, a4, t3
-; RV64IM-NEXT:    ld t3, 128(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor a3, a3, t3
-; RV64IM-NEXT:    xor a2, a2, t1
-; RV64IM-NEXT:    xor a5, a5, a6
-; RV64IM-NEXT:    xor a6, a7, s1
-; RV64IM-NEXT:    xor a7, t6, s7
-; RV64IM-NEXT:    ld t1, 256(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor t1, s0, t1
-; RV64IM-NEXT:    ld t3, 160(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor a3, a3, t3
-; RV64IM-NEXT:    ld t3, 112(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor a2, a2, t3
-; RV64IM-NEXT:    xor a5, a5, t4
-; RV64IM-NEXT:    xor a6, a6, s2
-; RV64IM-NEXT:    xor a7, a7, s8
-; RV64IM-NEXT:    xor a1, a0, a1
-; RV64IM-NEXT:    xor a1, a1, t1
-; RV64IM-NEXT:    ld t1, 248(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor a3, a3, t1
-; RV64IM-NEXT:    ld t1, 152(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor a2, a2, t1
-; RV64IM-NEXT:    xor a5, a5, t5
-; RV64IM-NEXT:    xor a6, a6, s3
-; RV64IM-NEXT:    xor a7, a7, s9
-; RV64IM-NEXT:    xor a1, a1, a4
-; RV64IM-NEXT:    xor a3, a3, t2
-; RV64IM-NEXT:    ld a4, 184(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor a2, a2, a4
-; RV64IM-NEXT:    ld a4, 144(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    xor a4, a5, a4
-; RV64IM-NEXT:    xor a5, a6, s4
-; RV64IM-NEXT:    slli a0, a0, 56
-; RV64IM-NEXT:    xor a6, a7, t0
-; RV64IM-NEXT:    ld t0, 304(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    and a7, a1, t0
-; RV64IM-NEXT:    xor a1, a1, a3
-; RV64IM-NEXT:    slli a7, a7, 40
-; RV64IM-NEXT:    xor a1, a1, a2
-; RV64IM-NEXT:    or a0, a0, a7
-; RV64IM-NEXT:    lui a7, 4080
-; RV64IM-NEXT:    and a2, a1, a7
-; RV64IM-NEXT:    xor a4, a1, a4
-; RV64IM-NEXT:    srli a1, a1, 8
-; RV64IM-NEXT:    slli a2, a2, 24
-; RV64IM-NEXT:    xor a5, a4, a5
-; RV64IM-NEXT:    ld a3, 336(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    and a1, a1, a3
-; RV64IM-NEXT:    srli a4, a4, 24
-; RV64IM-NEXT:    srliw a3, a5, 24
-; RV64IM-NEXT:    and a4, a4, a7
-; RV64IM-NEXT:    srli a7, a5, 40
-; RV64IM-NEXT:    xor a5, a5, a6
-; RV64IM-NEXT:    slli a3, a3, 32
-; RV64IM-NEXT:    or a1, a1, a4
-; RV64IM-NEXT:    and a4, a7, t0
-; RV64IM-NEXT:    srli a5, a5, 56
-; RV64IM-NEXT:    or a2, a2, a3
-; RV64IM-NEXT:    or a4, a4, a5
-; RV64IM-NEXT:    or a0, a0, a2
-; RV64IM-NEXT:    or a1, a1, a4
-; RV64IM-NEXT:    or a0, a0, a1
-; RV64IM-NEXT:    srli a1, a0, 4
-; RV64IM-NEXT:    ld a2, 312(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    and a0, a0, a2
-; RV64IM-NEXT:    and a1, a1, a2
-; RV64IM-NEXT:    slli a0, a0, 4
-; RV64IM-NEXT:    or a0, a1, a0
-; RV64IM-NEXT:    srli a1, a0, 2
-; RV64IM-NEXT:    ld a2, 320(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    and a0, a0, a2
-; RV64IM-NEXT:    and a1, a1, a2
-; RV64IM-NEXT:    slli a0, a0, 2
-; RV64IM-NEXT:    or a0, a1, a0
-; RV64IM-NEXT:    srli a1, a0, 1
-; RV64IM-NEXT:    ld a2, 328(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    and a0, a0, a2
-; RV64IM-NEXT:    and a1, a1, a2
-; RV64IM-NEXT:    slli a0, a0, 1
-; RV64IM-NEXT:    or a0, a1, a0
-; RV64IM-NEXT:    ld ra, 440(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    ld s0, 432(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    ld s1, 424(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    ld s2, 416(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    ld s3, 408(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    ld s4, 400(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    ld s5, 392(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    ld s6, 384(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    ld s7, 376(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    ld s8, 368(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    ld s9, 360(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    ld s10, 352(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    ld s11, 344(sp) # 8-byte Folded Reload
-; RV64IM-NEXT:    addi sp, sp, 448
-; RV64IM-NEXT:    ret
-  %res = call i64 @llvm.clmulr.i64(i64 %a, i64 %b)
-  ret i64 %res
-}
-
-define i4 @clmulr_constfold_i4() nounwind {
-; CHECK-LABEL: clmulr_constfold_i4:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    li a0, 0
-; CHECK-NEXT:    ret
-  %res = call i4 @llvm.clmulr.i4(i4 1, i4 2)
-  ret i4 %res
-}
-
-define i16 @clmulr_constfold_i16() nounwind {
-; CHECK-LABEL: clmulr_constfold_i16:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, 11
-; CHECK-NEXT:    addi a0, a0, -1365
-; CHECK-NEXT:    ret
-  %res = call i16 @llvm.clmulr.i16(i16 -2, i16 -1)
-  ret i16 %res
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/clmul-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/clmul-sdnode.ll
index dd04be1212587..ff4f1646afd2d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/clmul-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/clmul-sdnode.ll
@@ -5547,18642 +5547,3 @@ define <vscale x 8 x i64> @clmul_nxv8i64(<vscale x 8 x i64> %x, <vscale x 8 x i6
   %a = call <vscale x 8 x i64> @llvm.clmul.nxv8i64(<vscale x 8 x i64> %x, <vscale x 8 x i64> %y)
   ret <vscale x 8 x i64> %a
 }
-
-define <vscale x 1 x i32> @clmulr_nxv1i32(<vscale x 1 x i32> %x, <vscale x 1 x i32> %y) nounwind {
-; CHECK-LABEL: clmulr_nxv1i32:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    sub sp, sp, a0
-; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vsrl.vi v9, v8, 8
-; CHECK-NEXT:    lui a4, 16
-; CHECK-NEXT:    vsrl.vi v10, v8, 24
-; CHECK-NEXT:    vsll.vi v11, v8, 24
-; CHECK-NEXT:    lui a0, 61681
-; CHECK-NEXT:    lui a1, 209715
-; CHECK-NEXT:    lui a5, 349525
-; CHECK-NEXT:    li a6, 16
-; CHECK-NEXT:    addi a3, a4, -256
-; CHECK-NEXT:    addi a2, a0, -241
-; CHECK-NEXT:    addi a1, a1, 819
-; CHECK-NEXT:    addi a0, a5, 1365
-; CHECK-NEXT:    vand.vx v9, v9, a3
-; CHECK-NEXT:    vand.vx v8, v8, a3
-; CHECK-NEXT:    vor.vv v9, v9, v10
-; CHECK-NEXT:    vsll.vi v8, v8, 8
-; CHECK-NEXT:    vor.vv v8, v11, v8
-; CHECK-NEXT:    vor.vv v8, v8, v9
-; CHECK-NEXT:    vsrl.vi v9, v8, 4
-; CHECK-NEXT:    vand.vx v8, v8, a2
-; CHECK-NEXT:    vand.vx v9, v9, a2
-; CHECK-NEXT:    vsll.vi v8, v8, 4
-; CHECK-NEXT:    vor.vv v8, v9, v8
-; CHECK-NEXT:    vsrl.vi v9, v8, 2
-; CHECK-NEXT:    vand.vx v8, v8, a1
-; CHECK-NEXT:    vand.vx v9, v9, a1
-; CHECK-NEXT:    vsll.vi v8, v8, 2
-; CHECK-NEXT:    vor.vv v8, v9, v8
-; CHECK-NEXT:    vsrl.vi v9, v8, 1
-; CHECK-NEXT:    vand.vx v8, v8, a0
-; CHECK-NEXT:    vand.vx v9, v9, a0
-; CHECK-NEXT:    vadd.vv v8, v8, v8
-; CHECK-NEXT:    vor.vv v8, v9, v8
-; CHECK-NEXT:    vand.vx v9, v8, a6
-; CHECK-NEXT:    li a5, 32
-; CHECK-NEXT:    vand.vx v10, v8, a5
-; CHECK-NEXT:    li a5, 64
-; CHECK-NEXT:    vand.vx v11, v8, a5
-; CHECK-NEXT:    li a5, 128
-; CHECK-NEXT:    vand.vx v12, v8, a5
-; CHECK-NEXT:    li a5, 256
-; CHECK-NEXT:    vand.vx v13, v8, a5
-; CHECK-NEXT:    li a5, 512
-; CHECK-NEXT:    vand.vx v14, v8, a5
-; CHECK-NEXT:    li a5, 1024
-; CHECK-NEXT:    vand.vx v15, v8, a5
-; CHECK-NEXT:    li a5, 1
-; CHECK-NEXT:    slli a5, a5, 11
-; CHECK-NEXT:    vand.vx v16, v8, a5
-; CHECK-NEXT:    lui a5, 1
-; CHECK-NEXT:    vand.vx v17, v8, a5
-; CHECK-NEXT:    lui a5, 2
-; CHECK-NEXT:    vand.vx v18, v8, a5
-; CHECK-NEXT:    lui a5, 4
-; CHECK-NEXT:    vand.vx v19, v8, a5
-; CHECK-NEXT:    lui a5, 8
-; CHECK-NEXT:    vand.vx v20, v8, a5
-; CHECK-NEXT:    lui a5, 32
-; CHECK-NEXT:    vand.vx v21, v8, a4
-; CHECK-NEXT:    lui a4, 64
-; CHECK-NEXT:    vand.vx v22, v8, a5
-; CHECK-NEXT:    lui a5, 128
-; CHECK-NEXT:    vand.vx v23, v8, a4
-; CHECK-NEXT:    lui a4, 256
-; CHECK-NEXT:    vand.vx v24, v8, a5
-; CHECK-NEXT:    lui a5, 512
-; CHECK-NEXT:    vand.vx v25, v8, a4
-; CHECK-NEXT:    lui a4, 1024
-; CHECK-NEXT:    vand.vx v26, v8, a5
-; CHECK-NEXT:    lui a5, 2048
-; CHECK-NEXT:    vand.vx v27, v8, a4
-; CHECK-NEXT:    lui a4, 4096
-; CHECK-NEXT:    vand.vx v28, v8, a5
-; CHECK-NEXT:    lui a5, 8192
-; CHECK-NEXT:    vand.vx v29, v8, a4
-; CHECK-NEXT:    lui a4, 16384
-; CHECK-NEXT:    vand.vx v30, v8, a5
-; CHECK-NEXT:    lui a5, 32768
-; CHECK-NEXT:    vand.vx v31, v8, a4
-; CHECK-NEXT:    lui a4, 65536
-; CHECK-NEXT:    vand.vx v7, v8, a5
-; CHECK-NEXT:    lui a5, 131072
-; CHECK-NEXT:    vand.vx v6, v8, a4
-; CHECK-NEXT:    lui a4, 262144
-; CHECK-NEXT:    vand.vx v5, v8, a5
-; CHECK-NEXT:    lui a5, 524288
-; CHECK-NEXT:    vand.vi v4, v8, 2
-; CHECK-NEXT:    vand.vi v3, v8, 1
-; CHECK-NEXT:    vand.vi v2, v8, 4
-; CHECK-NEXT:    vand.vi v1, v8, 8
-; CHECK-NEXT:    vand.vx v0, v8, a4
-; CHECK-NEXT:    vmul.vv v4, v8, v4
-; CHECK-NEXT:    vmul.vv v3, v8, v3
-; CHECK-NEXT:    vmul.vv v2, v8, v2
-; CHECK-NEXT:    vmul.vv v1, v8, v1
-; CHECK-NEXT:    vmul.vv v9, v8, v9
-; CHECK-NEXT:    vmul.vv v10, v8, v10
-; CHECK-NEXT:    vmul.vv v11, v8, v11
-; CHECK-NEXT:    vmul.vv v12, v8, v12
-; CHECK-NEXT:    vmul.vv v13, v8, v13
-; CHECK-NEXT:    vmul.vv v14, v8, v14
-; CHECK-NEXT:    vmul.vv v15, v8, v15
-; CHECK-NEXT:    vmul.vv v16, v8, v16
-; CHECK-NEXT:    vmul.vv v17, v8, v17
-; CHECK-NEXT:    vmul.vv v18, v8, v18
-; CHECK-NEXT:    vmul.vv v19, v8, v19
-; CHECK-NEXT:    vmul.vv v20, v8, v20
-; CHECK-NEXT:    vmul.vv v21, v8, v21
-; CHECK-NEXT:    vmul.vv v22, v8, v22
-; CHECK-NEXT:    vmul.vv v23, v8, v23
-; CHECK-NEXT:    vmul.vv v24, v8, v24
-; CHECK-NEXT:    vmul.vv v25, v8, v25
-; CHECK-NEXT:    vmul.vv v26, v8, v26
-; CHECK-NEXT:    vmul.vv v27, v8, v27
-; CHECK-NEXT:    vmul.vv v28, v8, v28
-; CHECK-NEXT:    vmul.vv v29, v8, v29
-; CHECK-NEXT:    vmul.vv v30, v8, v30
-; CHECK-NEXT:    vmul.vv v31, v8, v31
-; CHECK-NEXT:    vmul.vv v7, v8, v7
-; CHECK-NEXT:    vmul.vv v6, v8, v6
-; CHECK-NEXT:    vmul.vv v5, v8, v5
-; CHECK-NEXT:    vmul.vv v0, v8, v0
-; CHECK-NEXT:    addi a4, sp, 16
-; CHECK-NEXT:    vs1r.v v0, (a4) # vscale x 8-byte Folded Spill
-; CHECK-NEXT:    vand.vx v0, v8, a5
-; CHECK-NEXT:    vmul.vv v8, v8, v0
-; CHECK-NEXT:    vxor.vv v4, v3, v4
-; CHECK-NEXT:    vxor.vv v4, v4, v2
-; CHECK-NEXT:    vxor.vv v4, v4, v1
-; CHECK-NEXT:    vxor.vv v9, v4, v9
-; CHECK-NEXT:    vxor.vv v9, v9, v10
-; CHECK-NEXT:    vxor.vv v9, v9, v11
-; CHECK-NEXT:    vxor.vv v9, v9, v12
-; CHECK-NEXT:    vxor.vv v10, v9, v13
-; CHECK-NEXT:    vxor.vv v10, v10, v14
-; CHECK-NEXT:    vxor.vv v10, v10, v15
-; CHECK-NEXT:    vxor.vv v10, v10, v16
-; CHECK-NEXT:    vxor.vv v10, v10, v17
-; CHECK-NEXT:    vxor.vv v10, v10, v18
-; CHECK-NEXT:    vxor.vv v10, v10, v19
-; CHECK-NEXT:    vxor.vv v10, v10, v20
-; CHECK-NEXT:    vxor.vv v10, v10, v21
-; CHECK-NEXT:    vxor.vv v10, v10, v22
-; CHECK-NEXT:    vxor.vv v10, v10, v23
-; CHECK-NEXT:    vxor.vv v10, v10, v24
-; CHECK-NEXT:    vxor.vv v10, v10, v25
-; CHECK-NEXT:    vxor.vv v10, v10, v26
-; CHECK-NEXT:    vxor.vv v10, v10, v27
-; CHECK-NEXT:    vxor.vv v10, v10, v28
-; CHECK-NEXT:    vsll.vi v9, v9, 24
-; CHECK-NEXT:    vxor.vv v11, v10, v29
-; CHECK-NEXT:    vxor.vv v11, v11, v30
-; CHECK-NEXT:    vand.vx v12, v10, a3
-; CHECK-NEXT:    vsll.vi v12, v12, 8
-; CHECK-NEXT:    vor.vv v9, v9, v12
-; CHECK-NEXT:    vxor.vv v11, v11, v31
-; CHECK-NEXT:    vxor.vv v11, v11, v7
-; CHECK-NEXT:    vxor.vv v11, v11, v6
-; CHECK-NEXT:    vxor.vv v11, v11, v5
-; CHECK-NEXT:    vsrl.vi v10, v10, 8
-; CHECK-NEXT:    vand.vx v10, v10, a3
-; CHECK-NEXT:    addi a3, sp, 16
-; CHECK-NEXT:    vl1r.v v12, (a3) # vscale x 8-byte Folded Reload
-; CHECK-NEXT:    vxor.vv v11, v11, v12
-; CHECK-NEXT:    vxor.vv v8, v11, v8
-; CHECK-NEXT:    vsrl.vi v8, v8, 24
-; CHECK-NEXT:    vor.vv v8, v10, v8
-; CHECK-NEXT:    vor.vv v8, v9, v8
-; CHECK-NEXT:    vsrl.vi v9, v8, 4
-; CHECK-NEXT:    vand.vx v8, v8, a2
-; CHECK-NEXT:    vand.vx v9, v9, a2
-; CHECK-NEXT:    vsll.vi v8, v8, 4
-; CHECK-NEXT:    vor.vv v8, v9, v8
-; CHECK-NEXT:    vsrl.vi v9, v8, 2
-; CHECK-NEXT:    vand.vx v8, v8, a1
-; CHECK-NEXT:    vand.vx v9, v9, a1
-; CHECK-NEXT:    vsll.vi v8, v8, 2
-; CHECK-NEXT:    vor.vv v8, v9, v8
-; CHECK-NEXT:    vsrl.vi v9, v8, 1
-; CHECK-NEXT:    vand.vx v8, v8, a0
-; CHECK-NEXT:    vand.vx v9, v9, a0
-; CHECK-NEXT:    vadd.vv v8, v8, v8
-; CHECK-NEXT:    vor.vv v8, v9, v8
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    add sp, sp, a0
-; CHECK-NEXT:    addi sp, sp, 16
-; CHECK-NEXT:    ret
-  %a = call <vscale x 1 x i32> @llvm.clmulr.nxv1i32(<vscale x 1 x i32> %x, <vscale x 1 x i32> %y)
-  ret <vscale x 1 x i32> %a
-}
-
-define <vscale x 2 x i32> @clmulr_nxv2i32(<vscale x 2 x i32> %x, <vscale x 2 x i32> %y) nounwind {
-; CHECK-LABEL: clmulr_nxv2i32:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    sub sp, sp, a0
-; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vsrl.vi v9, v8, 8
-; CHECK-NEXT:    lui a4, 16
-; CHECK-NEXT:    vsrl.vi v10, v8, 24
-; CHECK-NEXT:    vsll.vi v11, v8, 24
-; CHECK-NEXT:    lui a0, 61681
-; CHECK-NEXT:    lui a1, 209715
-; CHECK-NEXT:    lui a5, 349525
-; CHECK-NEXT:    li a6, 16
-; CHECK-NEXT:    addi a3, a4, -256
-; CHECK-NEXT:    addi a2, a0, -241
-; CHECK-NEXT:    addi a1, a1, 819
-; CHECK-NEXT:    addi a0, a5, 1365
-; CHECK-NEXT:    vand.vx v9, v9, a3
-; CHECK-NEXT:    vand.vx v8, v8, a3
-; CHECK-NEXT:    vor.vv v9, v9, v10
-; CHECK-NEXT:    vsll.vi v8, v8, 8
-; CHECK-NEXT:    vor.vv v8, v11, v8
-; CHECK-NEXT:    vor.vv v8, v8, v9
-; CHECK-NEXT:    vsrl.vi v9, v8, 4
-; CHECK-NEXT:    vand.vx v8, v8, a2
-; CHECK-NEXT:    vand.vx v9, v9, a2
-; CHECK-NEXT:    vsll.vi v8, v8, 4
-; CHECK-NEXT:    vor.vv v8, v9, v8
-; CHECK-NEXT:    vsrl.vi v9, v8, 2
-; CHECK-NEXT:    vand.vx v8, v8, a1
-; CHECK-NEXT:    vand.vx v9, v9, a1
-; CHECK-NEXT:    vsll.vi v8, v8, 2
-; CHECK-NEXT:    vor.vv v8, v9, v8
-; CHECK-NEXT:    vsrl.vi v9, v8, 1
-; CHECK-NEXT:    vand.vx v8, v8, a0
-; CHECK-NEXT:    vand.vx v9, v9, a0
-; CHECK-NEXT:    vadd.vv v8, v8, v8
-; CHECK-NEXT:    vor.vv v8, v9, v8
-; CHECK-NEXT:    vand.vx v9, v8, a6
-; CHECK-NEXT:    li a5, 32
-; CHECK-NEXT:    vand.vx v10, v8, a5
-; CHECK-NEXT:    li a5, 64
-; CHECK-NEXT:    vand.vx v11, v8, a5
-; CHECK-NEXT:    li a5, 128
-; CHECK-NEXT:    vand.vx v12, v8, a5
-; CHECK-NEXT:    li a5, 256
-; CHECK-NEXT:    vand.vx v13, v8, a5
-; CHECK-NEXT:    li a5, 512
-; CHECK-NEXT:    vand.vx v14, v8, a5
-; CHECK-NEXT:    li a5, 1024
-; CHECK-NEXT:    vand.vx v15, v8, a5
-; CHECK-NEXT:    li a5, 1
-; CHECK-NEXT:    slli a5, a5, 11
-; CHECK-NEXT:    vand.vx v16, v8, a5
-; CHECK-NEXT:    lui a5, 1
-; CHECK-NEXT:    vand.vx v17, v8, a5
-; CHECK-NEXT:    lui a5, 2
-; CHECK-NEXT:    vand.vx v18, v8, a5
-; CHECK-NEXT:    lui a5, 4
-; CHECK-NEXT:    vand.vx v19, v8, a5
-; CHECK-NEXT:    lui a5, 8
-; CHECK-NEXT:    vand.vx v20, v8, a5
-; CHECK-NEXT:    lui a5, 32
-; CHECK-NEXT:    vand.vx v21, v8, a4
-; CHECK-NEXT:    lui a4, 64
-; CHECK-NEXT:    vand.vx v22, v8, a5
-; CHECK-NEXT:    lui a5, 128
-; CHECK-NEXT:    vand.vx v23, v8, a4
-; CHECK-NEXT:    lui a4, 256
-; CHECK-NEXT:    vand.vx v24, v8, a5
-; CHECK-NEXT:    lui a5, 512
-; CHECK-NEXT:    vand.vx v25, v8, a4
-; CHECK-NEXT:    lui a4, 1024
-; CHECK-NEXT:    vand.vx v26, v8, a5
-; CHECK-NEXT:    lui a5, 2048
-; CHECK-NEXT:    vand.vx v27, v8, a4
-; CHECK-NEXT:    lui a4, 4096
-; CHECK-NEXT:    vand.vx v28, v8, a5
-; CHECK-NEXT:    lui a5, 8192
-; CHECK-NEXT:    vand.vx v29, v8, a4
-; CHECK-NEXT:    lui a4, 16384
-; CHECK-NEXT:    vand.vx v30, v8, a5
-; CHECK-NEXT:    lui a5, 32768
-; CHECK-NEXT:    vand.vx v31, v8, a4
-; CHECK-NEXT:    lui a4, 65536
-; CHECK-NEXT:    vand.vx v7, v8, a5
-; CHECK-NEXT:    lui a5, 131072
-; CHECK-NEXT:    vand.vx v6, v8, a4
-; CHECK-NEXT:    lui a4, 262144
-; CHECK-NEXT:    vand.vx v5, v8, a5
-; CHECK-NEXT:    lui a5, 524288
-; CHECK-NEXT:    vand.vi v4, v8, 2
-; CHECK-NEXT:    vand.vi v3, v8, 1
-; CHECK-NEXT:    vand.vi v2, v8, 4
-; CHECK-NEXT:    vand.vi v1, v8, 8
-; CHECK-NEXT:    vand.vx v0, v8, a4
-; CHECK-NEXT:    vmul.vv v4, v8, v4
-; CHECK-NEXT:    vmul.vv v3, v8, v3
-; CHECK-NEXT:    vmul.vv v2, v8, v2
-; CHECK-NEXT:    vmul.vv v1, v8, v1
-; CHECK-NEXT:    vmul.vv v9, v8, v9
-; CHECK-NEXT:    vmul.vv v10, v8, v10
-; CHECK-NEXT:    vmul.vv v11, v8, v11
-; CHECK-NEXT:    vmul.vv v12, v8, v12
-; CHECK-NEXT:    vmul.vv v13, v8, v13
-; CHECK-NEXT:    vmul.vv v14, v8, v14
-; CHECK-NEXT:    vmul.vv v15, v8, v15
-; CHECK-NEXT:    vmul.vv v16, v8, v16
-; CHECK-NEXT:    vmul.vv v17, v8, v17
-; CHECK-NEXT:    vmul.vv v18, v8, v18
-; CHECK-NEXT:    vmul.vv v19, v8, v19
-; CHECK-NEXT:    vmul.vv v20, v8, v20
-; CHECK-NEXT:    vmul.vv v21, v8, v21
-; CHECK-NEXT:    vmul.vv v22, v8, v22
-; CHECK-NEXT:    vmul.vv v23, v8, v23
-; CHECK-NEXT:    vmul.vv v24, v8, v24
-; CHECK-NEXT:    vmul.vv v25, v8, v25
-; CHECK-NEXT:    vmul.vv v26, v8, v26
-; CHECK-NEXT:    vmul.vv v27, v8, v27
-; CHECK-NEXT:    vmul.vv v28, v8, v28
-; CHECK-NEXT:    vmul.vv v29, v8, v29
-; CHECK-NEXT:    vmul.vv v30, v8, v30
-; CHECK-NEXT:    vmul.vv v31, v8, v31
-; CHECK-NEXT:    vmul.vv v7, v8, v7
-; CHECK-NEXT:    vmul.vv v6, v8, v6
-; CHECK-NEXT:    vmul.vv v5, v8, v5
-; CHECK-NEXT:    vmul.vv v0, v8, v0
-; CHECK-NEXT:    addi a4, sp, 16
-; CHECK-NEXT:    vs1r.v v0, (a4) # vscale x 8-byte Folded Spill
-; CHECK-NEXT:    vand.vx v0, v8, a5
-; CHECK-NEXT:    vmul.vv v8, v8, v0
-; CHECK-NEXT:    vxor.vv v4, v3, v4
-; CHECK-NEXT:    vxor.vv v4, v4, v2
-; CHECK-NEXT:    vxor.vv v4, v4, v1
-; CHECK-NEXT:    vxor.vv v9, v4, v9
-; CHECK-NEXT:    vxor.vv v9, v9, v10
-; CHECK-NEXT:    vxor.vv v9, v9, v11
-; CHECK-NEXT:    vxor.vv v9, v9, v12
-; CHECK-NEXT:    vxor.vv v10, v9, v13
-; CHECK-NEXT:    vxor.vv v10, v10, v14
-; CHECK-NEXT:    vxor.vv v10, v10, v15
-; CHECK-NEXT:    vxor.vv v10, v10, v16
-; CHECK-NEXT:    vxor.vv v10, v10, v17
-; CHECK-NEXT:    vxor.vv v10, v10, v18
-; CHECK-NEXT:    vxor.vv v10, v10, v19
-; CHECK-NEXT:    vxor.vv v10, v10, v20
-; CHECK-NEXT:    vxor.vv v10, v10, v21
-; CHECK-NEXT:    vxor.vv v10, v10, v22
-; CHECK-NEXT:    vxor.vv v10, v10, v23
-; CHECK-NEXT:    vxor.vv v10, v10, v24
-; CHECK-NEXT:    vxor.vv v10, v10, v25
-; CHECK-NEXT:    vxor.vv v10, v10, v26
-; CHECK-NEXT:    vxor.vv v10, v10, v27
-; CHECK-NEXT:    vxor.vv v10, v10, v28
-; CHECK-NEXT:    vsll.vi v9, v9, 24
-; CHECK-NEXT:    vxor.vv v11, v10, v29
-; CHECK-NEXT:    vxor.vv v11, v11, v30
-; CHECK-NEXT:    vand.vx v12, v10, a3
-; CHECK-NEXT:    vsll.vi v12, v12, 8
-; CHECK-NEXT:    vor.vv v9, v9, v12
-; CHECK-NEXT:    vxor.vv v11, v11, v31
-; CHECK-NEXT:    vxor.vv v11, v11, v7
-; CHECK-NEXT:    vxor.vv v11, v11, v6
-; CHECK-NEXT:    vxor.vv v11, v11, v5
-; CHECK-NEXT:    vsrl.vi v10, v10, 8
-; CHECK-NEXT:    vand.vx v10, v10, a3
-; CHECK-NEXT:    addi a3, sp, 16
-; CHECK-NEXT:    vl1r.v v12, (a3) # vscale x 8-byte Folded Reload
-; CHECK-NEXT:    vxor.vv v11, v11, v12
-; CHECK-NEXT:    vxor.vv v8, v11, v8
-; CHECK-NEXT:    vsrl.vi v8, v8, 24
-; CHECK-NEXT:    vor.vv v8, v10, v8
-; CHECK-NEXT:    vor.vv v8, v9, v8
-; CHECK-NEXT:    vsrl.vi v9, v8, 4
-; CHECK-NEXT:    vand.vx v8, v8, a2
-; CHECK-NEXT:    vand.vx v9, v9, a2
-; CHECK-NEXT:    vsll.vi v8, v8, 4
-; CHECK-NEXT:    vor.vv v8, v9, v8
-; CHECK-NEXT:    vsrl.vi v9, v8, 2
-; CHECK-NEXT:    vand.vx v8, v8, a1
-; CHECK-NEXT:    vand.vx v9, v9, a1
-; CHECK-NEXT:    vsll.vi v8, v8, 2
-; CHECK-NEXT:    vor.vv v8, v9, v8
-; CHECK-NEXT:    vsrl.vi v9, v8, 1
-; CHECK-NEXT:    vand.vx v8, v8, a0
-; CHECK-NEXT:    vand.vx v9, v9, a0
-; CHECK-NEXT:    vadd.vv v8, v8, v8
-; CHECK-NEXT:    vor.vv v8, v9, v8
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    add sp, sp, a0
-; CHECK-NEXT:    addi sp, sp, 16
-; CHECK-NEXT:    ret
-  %a = call <vscale x 2 x i32> @llvm.clmulr.nxv2i32(<vscale x 2 x i32> %x, <vscale x 2 x i32> %y)
-  ret <vscale x 2 x i32> %a
-}
-
-define <vscale x 4 x i32> @clmulr_nxv4i32(<vscale x 4 x i32> %x, <vscale x 4 x i32> %y) nounwind {
-; RV32-LABEL: clmulr_nxv4i32:
-; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -64
-; RV32-NEXT:    sw s0, 60(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s1, 56(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s2, 52(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s3, 48(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s4, 44(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s5, 40(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s6, 36(sp) # 4-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    sub sp, sp, a0
-; RV32-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; RV32-NEXT:    vsrl.vi v10, v8, 8
-; RV32-NEXT:    lui a0, 16
-; RV32-NEXT:    vsrl.vi v12, v8, 24
-; RV32-NEXT:    vsll.vi v14, v8, 24
-; RV32-NEXT:    lui a1, 61681
-; RV32-NEXT:    lui a2, 209715
-; RV32-NEXT:    lui s6, 349525
-; RV32-NEXT:    li t2, 16
-; RV32-NEXT:    li t5, 32
-; RV32-NEXT:    li s2, 64
-; RV32-NEXT:    li s5, 128
-; RV32-NEXT:    li s4, 256
-; RV32-NEXT:    li s3, 512
-; RV32-NEXT:    li s1, 1024
-; RV32-NEXT:    li s0, 1
-; RV32-NEXT:    lui t6, 1
-; RV32-NEXT:    lui t4, 2
-; RV32-NEXT:    lui t3, 4
-; RV32-NEXT:    lui a5, 8
-; RV32-NEXT:    lui a6, 32
-; RV32-NEXT:    lui a7, 64
-; RV32-NEXT:    lui t0, 128
-; RV32-NEXT:    lui t1, 256
-; RV32-NEXT:    addi a4, a0, -256
-; RV32-NEXT:    addi a3, a1, -241
-; RV32-NEXT:    addi a2, a2, 819
-; RV32-NEXT:    addi a1, s6, 1365
-; RV32-NEXT:    vand.vx v10, v10, a4
-; RV32-NEXT:    vand.vx v8, v8, a4
-; RV32-NEXT:    vor.vv v10, v10, v12
-; RV32-NEXT:    vsll.vi v8, v8, 8
-; RV32-NEXT:    vor.vv v8, v14, v8
-; RV32-NEXT:    vor.vv v8, v8, v10
-; RV32-NEXT:    vsrl.vi v10, v8, 4
-; RV32-NEXT:    vand.vx v8, v8, a3
-; RV32-NEXT:    vand.vx v10, v10, a3
-; RV32-NEXT:    vsll.vi v8, v8, 4
-; RV32-NEXT:    vor.vv v8, v10, v8
-; RV32-NEXT:    vsrl.vi v10, v8, 2
-; RV32-NEXT:    vand.vx v8, v8, a2
-; RV32-NEXT:    vand.vx v10, v10, a2
-; RV32-NEXT:    vsll.vi v8, v8, 2
-; RV32-NEXT:    vor.vv v8, v10, v8
-; RV32-NEXT:    vsrl.vi v10, v8, 1
-; RV32-NEXT:    vand.vx v8, v8, a1
-; RV32-NEXT:    vand.vx v10, v10, a1
-; RV32-NEXT:    vadd.vv v8, v8, v8
-; RV32-NEXT:    vor.vv v8, v10, v8
-; RV32-NEXT:    vand.vx v10, v8, t2
-; RV32-NEXT:    lui t2, 512
-; RV32-NEXT:    vand.vx v12, v8, t5
-; RV32-NEXT:    lui t5, 1024
-; RV32-NEXT:    vand.vx v14, v8, s2
-; RV32-NEXT:    lui s2, 2048
-; RV32-NEXT:    vand.vx v16, v8, s5
-; RV32-NEXT:    lui s5, 4096
-; RV32-NEXT:    vand.vx v26, v8, s4
-; RV32-NEXT:    lui s4, 8192
-; RV32-NEXT:    vand.vx v28, v8, s3
-; RV32-NEXT:    lui s3, 16384
-; RV32-NEXT:    vand.vx v18, v8, s1
-; RV32-NEXT:    lui s1, 32768
-; RV32-NEXT:    slli s0, s0, 11
-; RV32-NEXT:    vand.vx v20, v8, s0
-; RV32-NEXT:    lui s0, 65536
-; RV32-NEXT:    vand.vx v22, v8, t6
-; RV32-NEXT:    lui t6, 131072
-; RV32-NEXT:    vand.vx v24, v8, t4
-; RV32-NEXT:    lui t4, 262144
-; RV32-NEXT:    vand.vx v30, v8, t3
-; RV32-NEXT:    lui t3, 524288
-; RV32-NEXT:    vand.vi v6, v8, 2
-; RV32-NEXT:    vand.vi v4, v8, 1
-; RV32-NEXT:    vand.vi v2, v8, 4
-; RV32-NEXT:    vand.vi v0, v8, 8
-; RV32-NEXT:    vmul.vv v6, v8, v6
-; RV32-NEXT:    sw a0, 4(sp) # 4-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv s6, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, s6
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 32
-; RV32-NEXT:    vs2r.v v6, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vmul.vv v6, v8, v4
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv s6, a0
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a0, a0, s6
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 32
-; RV32-NEXT:    vs2r.v v6, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vmul.vv v6, v8, v2
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 32
-; RV32-NEXT:    vs2r.v v6, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vmul.vv v6, v8, v0
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv s6, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add s6, s6, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add s6, s6, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, s6
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 32
-; RV32-NEXT:    vs2r.v v6, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv s6, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add s6, s6, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, s6
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 32
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vmul.vv v10, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv s6, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add s6, s6, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, s6
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 32
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vmul.vv v10, v8, v14
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv s6, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, s6
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 32
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vmul.vv v10, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv s6, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add s6, s6, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, s6
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 32
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vmul.vv v10, v8, v26
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv s6, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, s6
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 32
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vmul.vv v10, v8, v28
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv s6, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, s6
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 32
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vmul.vv v10, v8, v18
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 32
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vmul.vv v10, v8, v20
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv s6, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add s6, s6, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, s6
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 32
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vmul.vv v10, v8, v22
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv s6, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, s6
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 32
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vmul.vv v10, v8, v24
-; RV32-NEXT:    csrr s6, vlenb
-; RV32-NEXT:    slli s6, s6, 1
-; RV32-NEXT:    mv a0, s6
-; RV32-NEXT:    slli s6, s6, 2
-; RV32-NEXT:    add s6, s6, a0
-; RV32-NEXT:    lw a0, 4(sp) # 4-byte Folded Reload
-; RV32-NEXT:    add s6, sp, s6
-; RV32-NEXT:    addi s6, s6, 32
-; RV32-NEXT:    vs2r.v v10, (s6) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vmul.vv v10, v8, v30
-; RV32-NEXT:    csrr s6, vlenb
-; RV32-NEXT:    slli s6, s6, 3
-; RV32-NEXT:    add s6, sp, s6
-; RV32-NEXT:    addi s6, s6, 32
-; RV32-NEXT:    vs2r.v v10, (s6) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vx v10, v8, a5
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a5, vlenb
-; RV32-NEXT:    slli a5, a5, 1
-; RV32-NEXT:    mv s6, a5
-; RV32-NEXT:    slli a5, a5, 1
-; RV32-NEXT:    add a5, a5, s6
-; RV32-NEXT:    add a5, sp, a5
-; RV32-NEXT:    addi a5, a5, 32
-; RV32-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vx v10, v8, a0
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 32
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vx v10, v8, a6
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 32
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vx v10, v8, a7
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    addi a0, sp, 32
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vx v10, v8, t0
-; RV32-NEXT:    vmul.vv v6, v8, v10
-; RV32-NEXT:    vand.vx v10, v8, t1
-; RV32-NEXT:    vmul.vv v30, v8, v10
-; RV32-NEXT:    vand.vx v10, v8, t2
-; RV32-NEXT:    vmul.vv v28, v8, v10
-; RV32-NEXT:    vand.vx v10, v8, t5
-; RV32-NEXT:    vmul.vv v26, v8, v10
-; RV32-NEXT:    vand.vx v10, v8, s2
-; RV32-NEXT:    vmul.vv v22, v8, v10
-; RV32-NEXT:    vand.vx v10, v8, s5
-; RV32-NEXT:    vmul.vv v18, v8, v10
-; RV32-NEXT:    vand.vx v10, v8, s4
-; RV32-NEXT:    vmul.vv v16, v8, v10
-; RV32-NEXT:    vand.vx v10, v8, s3
-; RV32-NEXT:    vmul.vv v24, v8, v10
-; RV32-NEXT:    vand.vx v10, v8, s1
-; RV32-NEXT:    vmul.vv v20, v8, v10
-; RV32-NEXT:    vand.vx v10, v8, s0
-; RV32-NEXT:    vmul.vv v12, v8, v10
-; RV32-NEXT:    vand.vx v10, v8, t6
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    vand.vx v14, v8, t4
-; RV32-NEXT:    vmul.vv v14, v8, v14
-; RV32-NEXT:    vand.vx v0, v8, t3
-; RV32-NEXT:    vmul.vv v8, v8, v0
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 32
-; RV32-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 32
-; RV32-NEXT:    vl2r.v v0, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v0, v0, v2
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 32
-; RV32-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v0, v0, v2
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a5, a5, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a5, a5, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 32
-; RV32-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v0, v0, v2
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a5, a5, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 32
-; RV32-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v0, v0, v2
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a5, a5, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 32
-; RV32-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v0, v0, v2
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 32
-; RV32-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v0, v0, v2
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a5, a5, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 32
-; RV32-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v0, v0, v2
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 32
-; RV32-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v2, v0, v2
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 32
-; RV32-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v2, v2, v4
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 32
-; RV32-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v2, v2, v4
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a5, a5, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 32
-; RV32-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v2, v2, v4
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 32
-; RV32-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v2, v2, v4
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 32
-; RV32-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v2, v2, v4
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 32
-; RV32-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v2, v2, v4
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 32
-; RV32-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v2, v2, v4
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 32
-; RV32-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v2, v2, v4
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 32
-; RV32-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v2, v2, v4
-; RV32-NEXT:    addi a0, sp, 32
-; RV32-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v4, v2, v4
-; RV32-NEXT:    vxor.vv v6, v4, v6
-; RV32-NEXT:    vxor.vv v30, v6, v30
-; RV32-NEXT:    vxor.vv v28, v30, v28
-; RV32-NEXT:    vxor.vv v26, v28, v26
-; RV32-NEXT:    vxor.vv v22, v26, v22
-; RV32-NEXT:    vsll.vi v26, v0, 24
-; RV32-NEXT:    vxor.vv v18, v22, v18
-; RV32-NEXT:    vxor.vv v16, v18, v16
-; RV32-NEXT:    vand.vx v18, v22, a4
-; RV32-NEXT:    vsll.vi v18, v18, 8
-; RV32-NEXT:    vor.vv v18, v26, v18
-; RV32-NEXT:    vxor.vv v16, v16, v24
-; RV32-NEXT:    vxor.vv v16, v16, v20
-; RV32-NEXT:    vxor.vv v12, v16, v12
-; RV32-NEXT:    vxor.vv v10, v12, v10
-; RV32-NEXT:    vsrl.vi v12, v22, 8
-; RV32-NEXT:    vand.vx v12, v12, a4
-; RV32-NEXT:    vxor.vv v10, v10, v14
-; RV32-NEXT:    vxor.vv v8, v10, v8
-; RV32-NEXT:    vsrl.vi v8, v8, 24
-; RV32-NEXT:    vor.vv v8, v12, v8
-; RV32-NEXT:    vor.vv v8, v18, v8
-; RV32-NEXT:    vsrl.vi v10, v8, 4
-; RV32-NEXT:    vand.vx v8, v8, a3
-; RV32-NEXT:    vand.vx v10, v10, a3
-; RV32-NEXT:    vsll.vi v8, v8, 4
-; RV32-NEXT:    vor.vv v8, v10, v8
-; RV32-NEXT:    vsrl.vi v10, v8, 2
-; RV32-NEXT:    vand.vx v8, v8, a2
-; RV32-NEXT:    vand.vx v10, v10, a2
-; RV32-NEXT:    vsll.vi v8, v8, 2
-; RV32-NEXT:    vor.vv v8, v10, v8
-; RV32-NEXT:    vsrl.vi v10, v8, 1
-; RV32-NEXT:    vand.vx v8, v8, a1
-; RV32-NEXT:    vand.vx v10, v10, a1
-; RV32-NEXT:    vadd.vv v8, v8, v8
-; RV32-NEXT:    vor.vv v8, v10, v8
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add sp, sp, a0
-; RV32-NEXT:    lw s0, 60(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s1, 56(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s2, 52(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s3, 48(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s4, 44(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s5, 40(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s6, 36(sp) # 4-byte Folded Reload
-; RV32-NEXT:    addi sp, sp, 64
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: clmulr_nxv4i32:
-; RV64:       # %bb.0:
-; RV64-NEXT:    addi sp, sp, -96
-; RV64-NEXT:    sd s0, 88(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s1, 80(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s2, 72(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s3, 64(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s4, 56(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s5, 48(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s6, 40(sp) # 8-byte Folded Spill
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    sub sp, sp, a0
-; RV64-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; RV64-NEXT:    vsrl.vi v10, v8, 8
-; RV64-NEXT:    lui a0, 16
-; RV64-NEXT:    vsrl.vi v12, v8, 24
-; RV64-NEXT:    vsll.vi v14, v8, 24
-; RV64-NEXT:    lui a1, 61681
-; RV64-NEXT:    lui a2, 209715
-; RV64-NEXT:    lui s6, 349525
-; RV64-NEXT:    li t2, 16
-; RV64-NEXT:    li t5, 32
-; RV64-NEXT:    li s2, 64
-; RV64-NEXT:    li s5, 128
-; RV64-NEXT:    li s4, 256
-; RV64-NEXT:    li s3, 512
-; RV64-NEXT:    li s1, 1024
-; RV64-NEXT:    li s0, 1
-; RV64-NEXT:    lui t6, 1
-; RV64-NEXT:    lui t4, 2
-; RV64-NEXT:    lui t3, 4
-; RV64-NEXT:    lui a5, 8
-; RV64-NEXT:    lui a6, 32
-; RV64-NEXT:    lui a7, 64
-; RV64-NEXT:    lui t0, 128
-; RV64-NEXT:    lui t1, 256
-; RV64-NEXT:    addi a4, a0, -256
-; RV64-NEXT:    addi a3, a1, -241
-; RV64-NEXT:    addi a2, a2, 819
-; RV64-NEXT:    addi a1, s6, 1365
-; RV64-NEXT:    vand.vx v10, v10, a4
-; RV64-NEXT:    vand.vx v8, v8, a4
-; RV64-NEXT:    vor.vv v10, v10, v12
-; RV64-NEXT:    vsll.vi v8, v8, 8
-; RV64-NEXT:    vor.vv v8, v14, v8
-; RV64-NEXT:    vor.vv v8, v8, v10
-; RV64-NEXT:    vsrl.vi v10, v8, 4
-; RV64-NEXT:    vand.vx v8, v8, a3
-; RV64-NEXT:    vand.vx v10, v10, a3
-; RV64-NEXT:    vsll.vi v8, v8, 4
-; RV64-NEXT:    vor.vv v8, v10, v8
-; RV64-NEXT:    vsrl.vi v10, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a2
-; RV64-NEXT:    vand.vx v10, v10, a2
-; RV64-NEXT:    vsll.vi v8, v8, 2
-; RV64-NEXT:    vor.vv v8, v10, v8
-; RV64-NEXT:    vsrl.vi v10, v8, 1
-; RV64-NEXT:    vand.vx v8, v8, a1
-; RV64-NEXT:    vand.vx v10, v10, a1
-; RV64-NEXT:    vadd.vv v8, v8, v8
-; RV64-NEXT:    vor.vv v8, v10, v8
-; RV64-NEXT:    vand.vx v10, v8, t2
-; RV64-NEXT:    lui t2, 512
-; RV64-NEXT:    vand.vx v12, v8, t5
-; RV64-NEXT:    lui t5, 1024
-; RV64-NEXT:    vand.vx v14, v8, s2
-; RV64-NEXT:    lui s2, 2048
-; RV64-NEXT:    vand.vx v16, v8, s5
-; RV64-NEXT:    lui s5, 4096
-; RV64-NEXT:    vand.vx v26, v8, s4
-; RV64-NEXT:    lui s4, 8192
-; RV64-NEXT:    vand.vx v28, v8, s3
-; RV64-NEXT:    lui s3, 16384
-; RV64-NEXT:    vand.vx v18, v8, s1
-; RV64-NEXT:    lui s1, 32768
-; RV64-NEXT:    slli s0, s0, 11
-; RV64-NEXT:    vand.vx v20, v8, s0
-; RV64-NEXT:    lui s0, 65536
-; RV64-NEXT:    vand.vx v22, v8, t6
-; RV64-NEXT:    lui t6, 131072
-; RV64-NEXT:    vand.vx v24, v8, t4
-; RV64-NEXT:    lui t4, 262144
-; RV64-NEXT:    vand.vx v30, v8, t3
-; RV64-NEXT:    lui t3, 524288
-; RV64-NEXT:    vand.vi v6, v8, 2
-; RV64-NEXT:    vand.vi v4, v8, 1
-; RV64-NEXT:    vand.vi v2, v8, 4
-; RV64-NEXT:    vand.vi v0, v8, 8
-; RV64-NEXT:    vmul.vv v6, v8, v6
-; RV64-NEXT:    sd a0, 8(sp) # 8-byte Folded Spill
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv s6, a0
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a0, a0, s6
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vs2r.v v6, (a0) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    vmul.vv v6, v8, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    mv s6, a0
-; RV64-NEXT:    slli a0, a0, 4
-; RV64-NEXT:    add a0, a0, s6
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vs2r.v v6, (a0) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    vmul.vv v6, v8, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vs2r.v v6, (a0) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    vmul.vv v6, v8, v0
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    mv s6, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add s6, s6, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add s6, s6, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, s6
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vs2r.v v6, (a0) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    vmul.vv v10, v8, v10
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv s6, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add s6, s6, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, s6
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    vmul.vv v10, v8, v12
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    mv s6, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add s6, s6, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, s6
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    vmul.vv v10, v8, v14
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    mv s6, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, s6
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    vmul.vv v10, v8, v16
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    mv s6, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add s6, s6, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, s6
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    vmul.vv v10, v8, v26
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv s6, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, s6
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    vmul.vv v10, v8, v28
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    mv s6, a0
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a0, a0, s6
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    vmul.vv v10, v8, v18
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 4
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    vmul.vv v10, v8, v20
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    mv s6, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add s6, s6, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, s6
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    vmul.vv v10, v8, v22
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv s6, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, s6
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    vmul.vv v10, v8, v24
-; RV64-NEXT:    csrr s6, vlenb
-; RV64-NEXT:    slli s6, s6, 1
-; RV64-NEXT:    mv a0, s6
-; RV64-NEXT:    slli s6, s6, 2
-; RV64-NEXT:    add s6, s6, a0
-; RV64-NEXT:    ld a0, 8(sp) # 8-byte Folded Reload
-; RV64-NEXT:    add s6, sp, s6
-; RV64-NEXT:    addi s6, s6, 32
-; RV64-NEXT:    vs2r.v v10, (s6) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    vmul.vv v10, v8, v30
-; RV64-NEXT:    csrr s6, vlenb
-; RV64-NEXT:    slli s6, s6, 3
-; RV64-NEXT:    add s6, sp, s6
-; RV64-NEXT:    addi s6, s6, 32
-; RV64-NEXT:    vs2r.v v10, (s6) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    vand.vx v10, v8, a5
-; RV64-NEXT:    vmul.vv v10, v8, v10
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    mv s6, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add a5, a5, s6
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 32
-; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    vand.vx v10, v8, a0
-; RV64-NEXT:    vmul.vv v10, v8, v10
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    vand.vx v10, v8, a6
-; RV64-NEXT:    vmul.vv v10, v8, v10
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    vand.vx v10, v8, a7
-; RV64-NEXT:    vmul.vv v10, v8, v10
-; RV64-NEXT:    addi a0, sp, 32
-; RV64-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    vand.vx v10, v8, t0
-; RV64-NEXT:    vmul.vv v6, v8, v10
-; RV64-NEXT:    vand.vx v10, v8, t1
-; RV64-NEXT:    vmul.vv v30, v8, v10
-; RV64-NEXT:    vand.vx v10, v8, t2
-; RV64-NEXT:    vmul.vv v28, v8, v10
-; RV64-NEXT:    vand.vx v10, v8, t5
-; RV64-NEXT:    vmul.vv v26, v8, v10
-; RV64-NEXT:    vand.vx v10, v8, s2
-; RV64-NEXT:    vmul.vv v22, v8, v10
-; RV64-NEXT:    vand.vx v10, v8, s5
-; RV64-NEXT:    vmul.vv v18, v8, v10
-; RV64-NEXT:    vand.vx v10, v8, s4
-; RV64-NEXT:    vmul.vv v16, v8, v10
-; RV64-NEXT:    vand.vx v10, v8, s3
-; RV64-NEXT:    vmul.vv v24, v8, v10
-; RV64-NEXT:    vand.vx v10, v8, s1
-; RV64-NEXT:    vmul.vv v20, v8, v10
-; RV64-NEXT:    vand.vx v10, v8, s0
-; RV64-NEXT:    vmul.vv v12, v8, v10
-; RV64-NEXT:    vand.vx v10, v8, t6
-; RV64-NEXT:    vmul.vv v10, v8, v10
-; RV64-NEXT:    vand.vx v14, v8, t4
-; RV64-NEXT:    vmul.vv v14, v8, v14
-; RV64-NEXT:    vand.vx v0, v8, t3
-; RV64-NEXT:    vmul.vv v8, v8, v0
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 4
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl2r.v v0, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a5, a5, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a5, a5, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a5, a5, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a5, a5, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a5, a5, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v2, v0, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v2, v2, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 4
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v2, v2, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a5, a5, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v2, v2, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v2, v2, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v2, v2, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v2, v2, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v2, v2, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v2, v2, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v2, v2, v4
-; RV64-NEXT:    addi a0, sp, 32
-; RV64-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v4, v2, v4
-; RV64-NEXT:    vxor.vv v6, v4, v6
-; RV64-NEXT:    vxor.vv v30, v6, v30
-; RV64-NEXT:    vxor.vv v28, v30, v28
-; RV64-NEXT:    vxor.vv v26, v28, v26
-; RV64-NEXT:    vxor.vv v22, v26, v22
-; RV64-NEXT:    vsll.vi v26, v0, 24
-; RV64-NEXT:    vxor.vv v18, v22, v18
-; RV64-NEXT:    vxor.vv v16, v18, v16
-; RV64-NEXT:    vand.vx v18, v22, a4
-; RV64-NEXT:    vsll.vi v18, v18, 8
-; RV64-NEXT:    vor.vv v18, v26, v18
-; RV64-NEXT:    vxor.vv v16, v16, v24
-; RV64-NEXT:    vxor.vv v16, v16, v20
-; RV64-NEXT:    vxor.vv v12, v16, v12
-; RV64-NEXT:    vxor.vv v10, v12, v10
-; RV64-NEXT:    vsrl.vi v12, v22, 8
-; RV64-NEXT:    vand.vx v12, v12, a4
-; RV64-NEXT:    vxor.vv v10, v10, v14
-; RV64-NEXT:    vxor.vv v8, v10, v8
-; RV64-NEXT:    vsrl.vi v8, v8, 24
-; RV64-NEXT:    vor.vv v8, v12, v8
-; RV64-NEXT:    vor.vv v8, v18, v8
-; RV64-NEXT:    vsrl.vi v10, v8, 4
-; RV64-NEXT:    vand.vx v8, v8, a3
-; RV64-NEXT:    vand.vx v10, v10, a3
-; RV64-NEXT:    vsll.vi v8, v8, 4
-; RV64-NEXT:    vor.vv v8, v10, v8
-; RV64-NEXT:    vsrl.vi v10, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a2
-; RV64-NEXT:    vand.vx v10, v10, a2
-; RV64-NEXT:    vsll.vi v8, v8, 2
-; RV64-NEXT:    vor.vv v8, v10, v8
-; RV64-NEXT:    vsrl.vi v10, v8, 1
-; RV64-NEXT:    vand.vx v8, v8, a1
-; RV64-NEXT:    vand.vx v10, v10, a1
-; RV64-NEXT:    vadd.vv v8, v8, v8
-; RV64-NEXT:    vor.vv v8, v10, v8
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add sp, sp, a0
-; RV64-NEXT:    ld s0, 88(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s1, 80(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s2, 72(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s3, 64(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s4, 56(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s5, 48(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s6, 40(sp) # 8-byte Folded Reload
-; RV64-NEXT:    addi sp, sp, 96
-; RV64-NEXT:    ret
-  %a = call <vscale x 4 x i32> @llvm.clmulr.nxv4i32(<vscale x 4 x i32> %x, <vscale x 4 x i32> %y)
-  ret <vscale x 4 x i32> %a
-}
-
-define <vscale x 8 x i32> @clmulr_nxv8i32(<vscale x 8 x i32> %x, <vscale x 8 x i32> %y) nounwind {
-; RV32-LABEL: clmulr_nxv8i32:
-; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -80
-; RV32-NEXT:    sw ra, 76(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s0, 72(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s1, 68(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s2, 64(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s3, 60(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s4, 56(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s5, 52(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s6, 48(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s7, 44(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s8, 40(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s9, 36(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s10, 32(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s11, 28(sp) # 4-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    sub sp, sp, a0
-; RV32-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; RV32-NEXT:    vsrl.vi v12, v8, 8
-; RV32-NEXT:    lui a5, 16
-; RV32-NEXT:    vsrl.vi v16, v8, 24
-; RV32-NEXT:    vsll.vi v20, v8, 24
-; RV32-NEXT:    lui a1, 61681
-; RV32-NEXT:    lui a2, 209715
-; RV32-NEXT:    lui ra, 349525
-; RV32-NEXT:    li s9, 16
-; RV32-NEXT:    li s8, 32
-; RV32-NEXT:    li s6, 64
-; RV32-NEXT:    li a7, 512
-; RV32-NEXT:    li t0, 1024
-; RV32-NEXT:    li a0, 1
-; RV32-NEXT:    lui t1, 1
-; RV32-NEXT:    lui t2, 2
-; RV32-NEXT:    lui t3, 4
-; RV32-NEXT:    lui t4, 8
-; RV32-NEXT:    lui t5, 32
-; RV32-NEXT:    lui t6, 64
-; RV32-NEXT:    lui s0, 128
-; RV32-NEXT:    lui s1, 256
-; RV32-NEXT:    lui s2, 512
-; RV32-NEXT:    lui s3, 1024
-; RV32-NEXT:    lui s4, 2048
-; RV32-NEXT:    lui s5, 4096
-; RV32-NEXT:    lui s7, 8192
-; RV32-NEXT:    lui s10, 16384
-; RV32-NEXT:    lui s11, 32768
-; RV32-NEXT:    addi a4, a5, -256
-; RV32-NEXT:    addi a3, a1, -241
-; RV32-NEXT:    addi a2, a2, 819
-; RV32-NEXT:    addi a1, ra, 1365
-; RV32-NEXT:    vand.vx v12, v12, a4
-; RV32-NEXT:    vand.vx v8, v8, a4
-; RV32-NEXT:    vor.vv v12, v12, v16
-; RV32-NEXT:    vsll.vi v8, v8, 8
-; RV32-NEXT:    vor.vv v8, v20, v8
-; RV32-NEXT:    vor.vv v8, v8, v12
-; RV32-NEXT:    vsrl.vi v12, v8, 4
-; RV32-NEXT:    vand.vx v8, v8, a3
-; RV32-NEXT:    vand.vx v12, v12, a3
-; RV32-NEXT:    vsll.vi v8, v8, 4
-; RV32-NEXT:    vor.vv v8, v12, v8
-; RV32-NEXT:    vsrl.vi v12, v8, 2
-; RV32-NEXT:    vand.vx v8, v8, a2
-; RV32-NEXT:    vand.vx v12, v12, a2
-; RV32-NEXT:    vsll.vi v8, v8, 2
-; RV32-NEXT:    vor.vv v8, v12, v8
-; RV32-NEXT:    vsrl.vi v12, v8, 1
-; RV32-NEXT:    vand.vx v8, v8, a1
-; RV32-NEXT:    vand.vx v12, v12, a1
-; RV32-NEXT:    vadd.vv v8, v8, v8
-; RV32-NEXT:    vor.vv v8, v12, v8
-; RV32-NEXT:    vand.vx v12, v8, s9
-; RV32-NEXT:    lui s9, 65536
-; RV32-NEXT:    vand.vx v16, v8, s8
-; RV32-NEXT:    lui s8, 131072
-; RV32-NEXT:    vand.vx v20, v8, s6
-; RV32-NEXT:    lui s6, 262144
-; RV32-NEXT:    slli ra, a0, 11
-; RV32-NEXT:    vand.vi v24, v8, 2
-; RV32-NEXT:    vand.vi v28, v8, 1
-; RV32-NEXT:    vand.vi v4, v8, 4
-; RV32-NEXT:    vand.vi v0, v8, 8
-; RV32-NEXT:    vmul.vv v24, v8, v24
-; RV32-NEXT:    sw a4, 4(sp) # 4-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a4, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a4, a4, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a4
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vs4r.v v24, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vmul.vv v24, v8, v28
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a4, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a4, a4, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a4
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vs4r.v v24, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vmul.vv v24, v8, v4
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    mv a4, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a4
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vs4r.v v24, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vmul.vv v24, v8, v0
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a4, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a4, a4, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a4, a4, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a4
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vs4r.v v24, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a4, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a4, a4, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a4
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vmul.vv v12, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a4, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a4, a4, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a4
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vmul.vv v12, v8, v20
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a4, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a4
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    lui a0, 524288
-; RV32-NEXT:    li a6, 128
-; RV32-NEXT:    vand.vx v12, v8, a6
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a4, vlenb
-; RV32-NEXT:    slli a4, a4, 2
-; RV32-NEXT:    mv a6, a4
-; RV32-NEXT:    slli a4, a4, 1
-; RV32-NEXT:    add a6, a6, a4
-; RV32-NEXT:    slli a4, a4, 3
-; RV32-NEXT:    add a4, a4, a6
-; RV32-NEXT:    add a4, sp, a4
-; RV32-NEXT:    addi a4, a4, 16
-; RV32-NEXT:    vs4r.v v12, (a4) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    li a6, 256
-; RV32-NEXT:    vand.vx v12, v8, a6
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a6, vlenb
-; RV32-NEXT:    slli a6, a6, 3
-; RV32-NEXT:    mv a4, a6
-; RV32-NEXT:    slli a6, a6, 3
-; RV32-NEXT:    add a6, a6, a4
-; RV32-NEXT:    lw a4, 4(sp) # 4-byte Folded Reload
-; RV32-NEXT:    add a6, sp, a6
-; RV32-NEXT:    addi a6, a6, 16
-; RV32-NEXT:    vs4r.v v12, (a6) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vx v12, v8, a7
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a6, vlenb
-; RV32-NEXT:    slli a6, a6, 2
-; RV32-NEXT:    mv a7, a6
-; RV32-NEXT:    slli a6, a6, 4
-; RV32-NEXT:    add a6, a6, a7
-; RV32-NEXT:    add a6, sp, a6
-; RV32-NEXT:    addi a6, a6, 16
-; RV32-NEXT:    vs4r.v v12, (a6) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vx v12, v8, t0
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a6, vlenb
-; RV32-NEXT:    slli a6, a6, 6
-; RV32-NEXT:    add a6, sp, a6
-; RV32-NEXT:    addi a6, a6, 16
-; RV32-NEXT:    vs4r.v v12, (a6) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vx v12, v8, ra
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a6, vlenb
-; RV32-NEXT:    slli a6, a6, 2
-; RV32-NEXT:    mv a7, a6
-; RV32-NEXT:    slli a6, a6, 1
-; RV32-NEXT:    add a7, a7, a6
-; RV32-NEXT:    slli a6, a6, 1
-; RV32-NEXT:    add a7, a7, a6
-; RV32-NEXT:    slli a6, a6, 1
-; RV32-NEXT:    add a6, a6, a7
-; RV32-NEXT:    add a6, sp, a6
-; RV32-NEXT:    addi a6, a6, 16
-; RV32-NEXT:    vs4r.v v12, (a6) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vx v12, v8, t1
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a6, vlenb
-; RV32-NEXT:    slli a6, a6, 3
-; RV32-NEXT:    mv a7, a6
-; RV32-NEXT:    slli a6, a6, 1
-; RV32-NEXT:    add a7, a7, a6
-; RV32-NEXT:    slli a6, a6, 1
-; RV32-NEXT:    add a6, a6, a7
-; RV32-NEXT:    add a6, sp, a6
-; RV32-NEXT:    addi a6, a6, 16
-; RV32-NEXT:    vs4r.v v12, (a6) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vx v12, v8, t2
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a6, vlenb
-; RV32-NEXT:    slli a6, a6, 2
-; RV32-NEXT:    mv a7, a6
-; RV32-NEXT:    slli a6, a6, 2
-; RV32-NEXT:    add a7, a7, a6
-; RV32-NEXT:    slli a6, a6, 1
-; RV32-NEXT:    add a6, a6, a7
-; RV32-NEXT:    add a6, sp, a6
-; RV32-NEXT:    addi a6, a6, 16
-; RV32-NEXT:    vs4r.v v12, (a6) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vx v12, v8, t3
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a6, vlenb
-; RV32-NEXT:    slli a6, a6, 4
-; RV32-NEXT:    mv a7, a6
-; RV32-NEXT:    slli a6, a6, 1
-; RV32-NEXT:    add a6, a6, a7
-; RV32-NEXT:    add a6, sp, a6
-; RV32-NEXT:    addi a6, a6, 16
-; RV32-NEXT:    vs4r.v v12, (a6) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vx v12, v8, t4
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a6, vlenb
-; RV32-NEXT:    slli a6, a6, 2
-; RV32-NEXT:    mv a7, a6
-; RV32-NEXT:    slli a6, a6, 1
-; RV32-NEXT:    add a7, a7, a6
-; RV32-NEXT:    slli a6, a6, 2
-; RV32-NEXT:    add a6, a6, a7
-; RV32-NEXT:    add a6, sp, a6
-; RV32-NEXT:    addi a6, a6, 16
-; RV32-NEXT:    vs4r.v v12, (a6) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vx v12, v8, a5
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a5, vlenb
-; RV32-NEXT:    slli a5, a5, 3
-; RV32-NEXT:    mv a6, a5
-; RV32-NEXT:    slli a5, a5, 2
-; RV32-NEXT:    add a5, a5, a6
-; RV32-NEXT:    add a5, sp, a5
-; RV32-NEXT:    addi a5, a5, 16
-; RV32-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vx v12, v8, t5
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a5, vlenb
-; RV32-NEXT:    slli a5, a5, 2
-; RV32-NEXT:    mv a6, a5
-; RV32-NEXT:    slli a5, a5, 3
-; RV32-NEXT:    add a5, a5, a6
-; RV32-NEXT:    add a5, sp, a5
-; RV32-NEXT:    addi a5, a5, 16
-; RV32-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vx v12, v8, t6
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a5, vlenb
-; RV32-NEXT:    slli a5, a5, 5
-; RV32-NEXT:    add a5, sp, a5
-; RV32-NEXT:    addi a5, a5, 16
-; RV32-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vx v12, v8, s0
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a5, vlenb
-; RV32-NEXT:    slli a5, a5, 2
-; RV32-NEXT:    mv a6, a5
-; RV32-NEXT:    slli a5, a5, 1
-; RV32-NEXT:    add a6, a6, a5
-; RV32-NEXT:    slli a5, a5, 1
-; RV32-NEXT:    add a5, a5, a6
-; RV32-NEXT:    add a5, sp, a5
-; RV32-NEXT:    addi a5, a5, 16
-; RV32-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vx v12, v8, s1
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a5, vlenb
-; RV32-NEXT:    slli a5, a5, 3
-; RV32-NEXT:    mv a6, a5
-; RV32-NEXT:    slli a5, a5, 1
-; RV32-NEXT:    add a5, a5, a6
-; RV32-NEXT:    add a5, sp, a5
-; RV32-NEXT:    addi a5, a5, 16
-; RV32-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vx v12, v8, s2
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a5, vlenb
-; RV32-NEXT:    slli a5, a5, 2
-; RV32-NEXT:    mv a6, a5
-; RV32-NEXT:    slli a5, a5, 2
-; RV32-NEXT:    add a5, a5, a6
-; RV32-NEXT:    add a5, sp, a5
-; RV32-NEXT:    addi a5, a5, 16
-; RV32-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vx v12, v8, s3
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a5, vlenb
-; RV32-NEXT:    slli a5, a5, 4
-; RV32-NEXT:    add a5, sp, a5
-; RV32-NEXT:    addi a5, a5, 16
-; RV32-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vx v12, v8, s4
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a5, vlenb
-; RV32-NEXT:    slli a5, a5, 3
-; RV32-NEXT:    add a5, sp, a5
-; RV32-NEXT:    addi a5, a5, 16
-; RV32-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vx v12, v8, s5
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    addi a5, sp, 16
-; RV32-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vx v12, v8, s7
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    vand.vx v16, v8, s10
-; RV32-NEXT:    vmul.vv v16, v8, v16
-; RV32-NEXT:    csrr a5, vlenb
-; RV32-NEXT:    slli a5, a5, 2
-; RV32-NEXT:    mv a6, a5
-; RV32-NEXT:    slli a5, a5, 1
-; RV32-NEXT:    add a5, a5, a6
-; RV32-NEXT:    add a5, sp, a5
-; RV32-NEXT:    addi a5, a5, 16
-; RV32-NEXT:    vs4r.v v16, (a5) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vx v16, v8, s11
-; RV32-NEXT:    vmul.vv v16, v8, v16
-; RV32-NEXT:    csrr a5, vlenb
-; RV32-NEXT:    slli a5, a5, 2
-; RV32-NEXT:    add a5, sp, a5
-; RV32-NEXT:    addi a5, a5, 16
-; RV32-NEXT:    vs4r.v v16, (a5) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vx v16, v8, s9
-; RV32-NEXT:    vmul.vv v28, v8, v16
-; RV32-NEXT:    vand.vx v16, v8, s8
-; RV32-NEXT:    vmul.vv v16, v8, v16
-; RV32-NEXT:    vand.vx v20, v8, s6
-; RV32-NEXT:    vmul.vv v4, v8, v20
-; RV32-NEXT:    vand.vx v20, v8, a0
-; RV32-NEXT:    vmul.vv v20, v8, v20
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a5, a5, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl4r.v v8, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a5, a5, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl4r.v v0, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v0, v8
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl4r.v v0, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v0
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a5, a5, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a5, a5, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl4r.v v0, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v0
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a5, a5, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl4r.v v0, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v0
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a5, a5, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl4r.v v0, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v0
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl4r.v v0, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v0
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a5, a5, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl4r.v v0, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v0
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl4r.v v0, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v0, v8, v0
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v0, v0, v24
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 6
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v0, v0, v24
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a5, a5, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a5, a5, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v0, v0, v24
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a5, a5, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v0, v0, v24
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a5, a5, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v0, v0, v24
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v0, v0, v24
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a5, a5, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v0, v0, v24
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v0, v0, v24
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v0, v0, v24
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v0, v0, v24
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a5, a5, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v0, v0, v24
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v0, v0, v24
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v0, v0, v24
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v0, v0, v24
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v0, v0, v24
-; RV32-NEXT:    addi a0, sp, 16
-; RV32-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v24, v0, v24
-; RV32-NEXT:    vxor.vv v12, v24, v12
-; RV32-NEXT:    vsll.vi v8, v8, 24
-; RV32-NEXT:    vand.vx v24, v0, a4
-; RV32-NEXT:    vsll.vi v24, v24, 8
-; RV32-NEXT:    vor.vv v8, v8, v24
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v12, v12, v24
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v12, v12, v24
-; RV32-NEXT:    vxor.vv v12, v12, v28
-; RV32-NEXT:    vxor.vv v12, v12, v16
-; RV32-NEXT:    vsrl.vi v16, v0, 8
-; RV32-NEXT:    vand.vx v16, v16, a4
-; RV32-NEXT:    vxor.vv v12, v12, v4
-; RV32-NEXT:    vxor.vv v12, v12, v20
-; RV32-NEXT:    vsrl.vi v12, v12, 24
-; RV32-NEXT:    vor.vv v12, v16, v12
-; RV32-NEXT:    vor.vv v8, v8, v12
-; RV32-NEXT:    vsrl.vi v12, v8, 4
-; RV32-NEXT:    vand.vx v8, v8, a3
-; RV32-NEXT:    vand.vx v12, v12, a3
-; RV32-NEXT:    vsll.vi v8, v8, 4
-; RV32-NEXT:    vor.vv v8, v12, v8
-; RV32-NEXT:    vsrl.vi v12, v8, 2
-; RV32-NEXT:    vand.vx v8, v8, a2
-; RV32-NEXT:    vand.vx v12, v12, a2
-; RV32-NEXT:    vsll.vi v8, v8, 2
-; RV32-NEXT:    vor.vv v8, v12, v8
-; RV32-NEXT:    vsrl.vi v12, v8, 1
-; RV32-NEXT:    vand.vx v8, v8, a1
-; RV32-NEXT:    vand.vx v12, v12, a1
-; RV32-NEXT:    vadd.vv v8, v8, v8
-; RV32-NEXT:    vor.vv v8, v12, v8
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add sp, sp, a0
-; RV32-NEXT:    lw ra, 76(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s0, 72(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s1, 68(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s2, 64(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s3, 60(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s4, 56(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s5, 52(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s6, 48(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s7, 44(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s8, 40(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s9, 36(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s10, 32(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s11, 28(sp) # 4-byte Folded Reload
-; RV32-NEXT:    addi sp, sp, 80
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: clmulr_nxv8i32:
-; RV64:       # %bb.0:
-; RV64-NEXT:    addi sp, sp, -144
-; RV64-NEXT:    sd ra, 136(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s0, 128(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s1, 120(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s2, 112(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s3, 104(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s4, 96(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s5, 88(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s6, 80(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s7, 72(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s8, 64(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s9, 56(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s10, 48(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s11, 40(sp) # 8-byte Folded Spill
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    sub sp, sp, a0
-; RV64-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; RV64-NEXT:    vsrl.vi v12, v8, 8
-; RV64-NEXT:    lui a5, 16
-; RV64-NEXT:    vsrl.vi v16, v8, 24
-; RV64-NEXT:    vsll.vi v20, v8, 24
-; RV64-NEXT:    lui a1, 61681
-; RV64-NEXT:    lui a2, 209715
-; RV64-NEXT:    lui ra, 349525
-; RV64-NEXT:    li s9, 16
-; RV64-NEXT:    li s8, 32
-; RV64-NEXT:    li s6, 64
-; RV64-NEXT:    li a7, 512
-; RV64-NEXT:    li t0, 1024
-; RV64-NEXT:    li a0, 1
-; RV64-NEXT:    lui t1, 1
-; RV64-NEXT:    lui t2, 2
-; RV64-NEXT:    lui t3, 4
-; RV64-NEXT:    lui t4, 8
-; RV64-NEXT:    lui t5, 32
-; RV64-NEXT:    lui t6, 64
-; RV64-NEXT:    lui s0, 128
-; RV64-NEXT:    lui s1, 256
-; RV64-NEXT:    lui s2, 512
-; RV64-NEXT:    lui s3, 1024
-; RV64-NEXT:    lui s4, 2048
-; RV64-NEXT:    lui s5, 4096
-; RV64-NEXT:    lui s7, 8192
-; RV64-NEXT:    lui s10, 16384
-; RV64-NEXT:    lui s11, 32768
-; RV64-NEXT:    addi a4, a5, -256
-; RV64-NEXT:    addi a3, a1, -241
-; RV64-NEXT:    addi a2, a2, 819
-; RV64-NEXT:    addi a1, ra, 1365
-; RV64-NEXT:    vand.vx v12, v12, a4
-; RV64-NEXT:    vand.vx v8, v8, a4
-; RV64-NEXT:    vor.vv v12, v12, v16
-; RV64-NEXT:    vsll.vi v8, v8, 8
-; RV64-NEXT:    vor.vv v8, v20, v8
-; RV64-NEXT:    vor.vv v8, v8, v12
-; RV64-NEXT:    vsrl.vi v12, v8, 4
-; RV64-NEXT:    vand.vx v8, v8, a3
-; RV64-NEXT:    vand.vx v12, v12, a3
-; RV64-NEXT:    vsll.vi v8, v8, 4
-; RV64-NEXT:    vor.vv v8, v12, v8
-; RV64-NEXT:    vsrl.vi v12, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a2
-; RV64-NEXT:    vand.vx v12, v12, a2
-; RV64-NEXT:    vsll.vi v8, v8, 2
-; RV64-NEXT:    vor.vv v8, v12, v8
-; RV64-NEXT:    vsrl.vi v12, v8, 1
-; RV64-NEXT:    vand.vx v8, v8, a1
-; RV64-NEXT:    vand.vx v12, v12, a1
-; RV64-NEXT:    vadd.vv v8, v8, v8
-; RV64-NEXT:    vor.vv v8, v12, v8
-; RV64-NEXT:    vand.vx v12, v8, s9
-; RV64-NEXT:    lui s9, 65536
-; RV64-NEXT:    vand.vx v16, v8, s8
-; RV64-NEXT:    lui s8, 131072
-; RV64-NEXT:    vand.vx v20, v8, s6
-; RV64-NEXT:    lui s6, 262144
-; RV64-NEXT:    slli ra, a0, 11
-; RV64-NEXT:    vand.vi v24, v8, 2
-; RV64-NEXT:    vand.vi v28, v8, 1
-; RV64-NEXT:    vand.vi v4, v8, 4
-; RV64-NEXT:    vand.vi v0, v8, 8
-; RV64-NEXT:    vmul.vv v24, v8, v24
-; RV64-NEXT:    sd a4, 8(sp) # 8-byte Folded Spill
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    mv a4, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a4, a4, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a4
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vs4r.v v24, (a0) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    vmul.vv v24, v8, v28
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a4, a0
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a4, a4, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a4
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vs4r.v v24, (a0) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    vmul.vv v24, v8, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 5
-; RV64-NEXT:    mv a4, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a4
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vs4r.v v24, (a0) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    vmul.vv v24, v8, v0
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a4, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a4, a4, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a4, a4, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a4
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vs4r.v v24, (a0) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    mv a4, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a4, a4, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a4
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    vmul.vv v12, v8, v16
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a4, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a4, a4, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a4
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    vmul.vv v12, v8, v20
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 4
-; RV64-NEXT:    mv a4, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a4
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    lui a0, 524288
-; RV64-NEXT:    li a6, 128
-; RV64-NEXT:    vand.vx v12, v8, a6
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a4, vlenb
-; RV64-NEXT:    slli a4, a4, 2
-; RV64-NEXT:    mv a6, a4
-; RV64-NEXT:    slli a4, a4, 1
-; RV64-NEXT:    add a6, a6, a4
-; RV64-NEXT:    slli a4, a4, 3
-; RV64-NEXT:    add a4, a4, a6
-; RV64-NEXT:    add a4, sp, a4
-; RV64-NEXT:    addi a4, a4, 32
-; RV64-NEXT:    vs4r.v v12, (a4) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    li a6, 256
-; RV64-NEXT:    vand.vx v12, v8, a6
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a6, vlenb
-; RV64-NEXT:    slli a6, a6, 3
-; RV64-NEXT:    mv a4, a6
-; RV64-NEXT:    slli a6, a6, 3
-; RV64-NEXT:    add a6, a6, a4
-; RV64-NEXT:    ld a4, 8(sp) # 8-byte Folded Reload
-; RV64-NEXT:    add a6, sp, a6
-; RV64-NEXT:    addi a6, a6, 32
-; RV64-NEXT:    vs4r.v v12, (a6) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    vand.vx v12, v8, a7
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a6, vlenb
-; RV64-NEXT:    slli a6, a6, 2
-; RV64-NEXT:    mv a7, a6
-; RV64-NEXT:    slli a6, a6, 4
-; RV64-NEXT:    add a6, a6, a7
-; RV64-NEXT:    add a6, sp, a6
-; RV64-NEXT:    addi a6, a6, 32
-; RV64-NEXT:    vs4r.v v12, (a6) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    vand.vx v12, v8, t0
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a6, vlenb
-; RV64-NEXT:    slli a6, a6, 6
-; RV64-NEXT:    add a6, sp, a6
-; RV64-NEXT:    addi a6, a6, 32
-; RV64-NEXT:    vs4r.v v12, (a6) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    vand.vx v12, v8, ra
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a6, vlenb
-; RV64-NEXT:    slli a6, a6, 2
-; RV64-NEXT:    mv a7, a6
-; RV64-NEXT:    slli a6, a6, 1
-; RV64-NEXT:    add a7, a7, a6
-; RV64-NEXT:    slli a6, a6, 1
-; RV64-NEXT:    add a7, a7, a6
-; RV64-NEXT:    slli a6, a6, 1
-; RV64-NEXT:    add a6, a6, a7
-; RV64-NEXT:    add a6, sp, a6
-; RV64-NEXT:    addi a6, a6, 32
-; RV64-NEXT:    vs4r.v v12, (a6) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    vand.vx v12, v8, t1
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a6, vlenb
-; RV64-NEXT:    slli a6, a6, 3
-; RV64-NEXT:    mv a7, a6
-; RV64-NEXT:    slli a6, a6, 1
-; RV64-NEXT:    add a7, a7, a6
-; RV64-NEXT:    slli a6, a6, 1
-; RV64-NEXT:    add a6, a6, a7
-; RV64-NEXT:    add a6, sp, a6
-; RV64-NEXT:    addi a6, a6, 32
-; RV64-NEXT:    vs4r.v v12, (a6) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    vand.vx v12, v8, t2
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a6, vlenb
-; RV64-NEXT:    slli a6, a6, 2
-; RV64-NEXT:    mv a7, a6
-; RV64-NEXT:    slli a6, a6, 2
-; RV64-NEXT:    add a7, a7, a6
-; RV64-NEXT:    slli a6, a6, 1
-; RV64-NEXT:    add a6, a6, a7
-; RV64-NEXT:    add a6, sp, a6
-; RV64-NEXT:    addi a6, a6, 32
-; RV64-NEXT:    vs4r.v v12, (a6) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    vand.vx v12, v8, t3
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a6, vlenb
-; RV64-NEXT:    slli a6, a6, 4
-; RV64-NEXT:    mv a7, a6
-; RV64-NEXT:    slli a6, a6, 1
-; RV64-NEXT:    add a6, a6, a7
-; RV64-NEXT:    add a6, sp, a6
-; RV64-NEXT:    addi a6, a6, 32
-; RV64-NEXT:    vs4r.v v12, (a6) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    vand.vx v12, v8, t4
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a6, vlenb
-; RV64-NEXT:    slli a6, a6, 2
-; RV64-NEXT:    mv a7, a6
-; RV64-NEXT:    slli a6, a6, 1
-; RV64-NEXT:    add a7, a7, a6
-; RV64-NEXT:    slli a6, a6, 2
-; RV64-NEXT:    add a6, a6, a7
-; RV64-NEXT:    add a6, sp, a6
-; RV64-NEXT:    addi a6, a6, 32
-; RV64-NEXT:    vs4r.v v12, (a6) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    vand.vx v12, v8, a5
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 3
-; RV64-NEXT:    mv a6, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add a5, a5, a6
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 32
-; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    vand.vx v12, v8, t5
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    mv a6, a5
-; RV64-NEXT:    slli a5, a5, 3
-; RV64-NEXT:    add a5, a5, a6
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 32
-; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    vand.vx v12, v8, t6
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 32
-; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    vand.vx v12, v8, s0
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    mv a6, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add a6, a6, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add a5, a5, a6
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 32
-; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    vand.vx v12, v8, s1
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 3
-; RV64-NEXT:    mv a6, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add a5, a5, a6
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 32
-; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    vand.vx v12, v8, s2
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    mv a6, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add a5, a5, a6
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 32
-; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    vand.vx v12, v8, s3
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 4
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 32
-; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    vand.vx v12, v8, s4
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 3
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 32
-; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    vand.vx v12, v8, s5
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    addi a5, sp, 32
-; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    vand.vx v12, v8, s7
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    vand.vx v16, v8, s10
-; RV64-NEXT:    vmul.vv v16, v8, v16
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    mv a6, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add a5, a5, a6
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 32
-; RV64-NEXT:    vs4r.v v16, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    vand.vx v16, v8, s11
-; RV64-NEXT:    vmul.vv v16, v8, v16
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 32
-; RV64-NEXT:    vs4r.v v16, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    vand.vx v16, v8, s9
-; RV64-NEXT:    vmul.vv v28, v8, v16
-; RV64-NEXT:    vand.vx v16, v8, s8
-; RV64-NEXT:    vmul.vv v16, v8, v16
-; RV64-NEXT:    vand.vx v20, v8, s6
-; RV64-NEXT:    vmul.vv v4, v8, v20
-; RV64-NEXT:    vand.vx v20, v8, a0
-; RV64-NEXT:    vmul.vv v20, v8, v20
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a5, a5, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl4r.v v8, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a5, a5, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl4r.v v0, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v8, v0, v8
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 5
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl4r.v v0, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v8, v8, v0
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a5, a5, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a5, a5, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl4r.v v0, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v8, v8, v0
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a5, a5, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl4r.v v0, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v8, v8, v0
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a5, a5, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl4r.v v0, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v8, v8, v0
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 4
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl4r.v v0, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v8, v8, v0
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a5, a5, a0
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl4r.v v0, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v8, v8, v0
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl4r.v v0, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v8, v0
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 4
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v24
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 6
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v24
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a5, a5, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a5, a5, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v24
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a5, a5, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v24
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a5, a5, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v24
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 4
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v24
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a5, a5, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v24
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v24
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v24
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v24
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a5, a5, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v24
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v24
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v24
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 4
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v24
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v24
-; RV64-NEXT:    addi a0, sp, 32
-; RV64-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v24, v0, v24
-; RV64-NEXT:    vxor.vv v12, v24, v12
-; RV64-NEXT:    vsll.vi v8, v8, 24
-; RV64-NEXT:    vand.vx v24, v0, a4
-; RV64-NEXT:    vsll.vi v24, v24, 8
-; RV64-NEXT:    vor.vv v8, v8, v24
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v12, v12, v24
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v12, v12, v24
-; RV64-NEXT:    vxor.vv v12, v12, v28
-; RV64-NEXT:    vxor.vv v12, v12, v16
-; RV64-NEXT:    vsrl.vi v16, v0, 8
-; RV64-NEXT:    vand.vx v16, v16, a4
-; RV64-NEXT:    vxor.vv v12, v12, v4
-; RV64-NEXT:    vxor.vv v12, v12, v20
-; RV64-NEXT:    vsrl.vi v12, v12, 24
-; RV64-NEXT:    vor.vv v12, v16, v12
-; RV64-NEXT:    vor.vv v8, v8, v12
-; RV64-NEXT:    vsrl.vi v12, v8, 4
-; RV64-NEXT:    vand.vx v8, v8, a3
-; RV64-NEXT:    vand.vx v12, v12, a3
-; RV64-NEXT:    vsll.vi v8, v8, 4
-; RV64-NEXT:    vor.vv v8, v12, v8
-; RV64-NEXT:    vsrl.vi v12, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a2
-; RV64-NEXT:    vand.vx v12, v12, a2
-; RV64-NEXT:    vsll.vi v8, v8, 2
-; RV64-NEXT:    vor.vv v8, v12, v8
-; RV64-NEXT:    vsrl.vi v12, v8, 1
-; RV64-NEXT:    vand.vx v8, v8, a1
-; RV64-NEXT:    vand.vx v12, v12, a1
-; RV64-NEXT:    vadd.vv v8, v8, v8
-; RV64-NEXT:    vor.vv v8, v12, v8
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add sp, sp, a0
-; RV64-NEXT:    ld ra, 136(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s0, 128(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s1, 120(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s2, 112(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s3, 104(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s4, 96(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s5, 88(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s6, 80(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s7, 72(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s8, 64(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s9, 56(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s10, 48(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s11, 40(sp) # 8-byte Folded Reload
-; RV64-NEXT:    addi sp, sp, 144
-; RV64-NEXT:    ret
-  %a = call <vscale x 8 x i32> @llvm.clmulr.nxv8i32(<vscale x 8 x i32> %x, <vscale x 8 x i32> %x)
-  ret <vscale x 8 x i32> %a
-}
-
-define <vscale x 16 x i32> @clmulr_nxv16i32(<vscale x 16 x i32> %x, <vscale x 16 x i32> %y) nounwind {
-; RV32-LABEL: clmulr_nxv16i32:
-; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -80
-; RV32-NEXT:    sw ra, 76(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s0, 72(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s1, 68(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s2, 64(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s3, 60(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s4, 56(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s5, 52(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s6, 48(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s7, 44(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s8, 40(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s9, 36(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s10, 32(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s11, 28(sp) # 4-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    sub sp, sp, a0
-; RV32-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
-; RV32-NEXT:    vsrl.vi v16, v8, 8
-; RV32-NEXT:    lui a5, 16
-; RV32-NEXT:    vsrl.vi v24, v8, 24
-; RV32-NEXT:    vsll.vi v0, v8, 24
-; RV32-NEXT:    lui a1, 61681
-; RV32-NEXT:    lui a2, 209715
-; RV32-NEXT:    lui ra, 349525
-; RV32-NEXT:    li t5, 16
-; RV32-NEXT:    li t2, 32
-; RV32-NEXT:    li a7, 64
-; RV32-NEXT:    li t0, 512
-; RV32-NEXT:    li t1, 1024
-; RV32-NEXT:    li a0, 1
-; RV32-NEXT:    lui t3, 1
-; RV32-NEXT:    lui t4, 2
-; RV32-NEXT:    lui t6, 4
-; RV32-NEXT:    lui s0, 8
-; RV32-NEXT:    lui s1, 32
-; RV32-NEXT:    lui s2, 64
-; RV32-NEXT:    lui s3, 128
-; RV32-NEXT:    lui s4, 256
-; RV32-NEXT:    lui s5, 512
-; RV32-NEXT:    lui s6, 1024
-; RV32-NEXT:    lui s7, 2048
-; RV32-NEXT:    lui s8, 4096
-; RV32-NEXT:    lui s9, 8192
-; RV32-NEXT:    lui s10, 16384
-; RV32-NEXT:    lui s11, 32768
-; RV32-NEXT:    addi a4, a5, -256
-; RV32-NEXT:    addi a3, a1, -241
-; RV32-NEXT:    addi a2, a2, 819
-; RV32-NEXT:    addi a1, ra, 1365
-; RV32-NEXT:    slli a0, a0, 11
-; RV32-NEXT:    vand.vx v16, v16, a4
-; RV32-NEXT:    vand.vx v8, v8, a4
-; RV32-NEXT:    vor.vv v16, v16, v24
-; RV32-NEXT:    vsll.vi v8, v8, 8
-; RV32-NEXT:    vor.vv v8, v0, v8
-; RV32-NEXT:    vor.vv v8, v8, v16
-; RV32-NEXT:    vsrl.vi v16, v8, 4
-; RV32-NEXT:    vand.vx v8, v8, a3
-; RV32-NEXT:    vand.vx v16, v16, a3
-; RV32-NEXT:    vsll.vi v8, v8, 4
-; RV32-NEXT:    vor.vv v8, v16, v8
-; RV32-NEXT:    vsrl.vi v16, v8, 2
-; RV32-NEXT:    vand.vx v8, v8, a2
-; RV32-NEXT:    vand.vx v16, v16, a2
-; RV32-NEXT:    vsll.vi v8, v8, 2
-; RV32-NEXT:    vor.vv v8, v16, v8
-; RV32-NEXT:    vsrl.vi v16, v8, 1
-; RV32-NEXT:    vand.vx v8, v8, a1
-; RV32-NEXT:    vand.vx v16, v16, a1
-; RV32-NEXT:    vadd.vv v8, v8, v8
-; RV32-NEXT:    vor.vv v8, v16, v8
-; RV32-NEXT:    vand.vi v16, v8, 2
-; RV32-NEXT:    vand.vi v24, v8, 1
-; RV32-NEXT:    vand.vi v0, v8, 4
-; RV32-NEXT:    vmul.vv v16, v8, v16
-; RV32-NEXT:    sw a0, 4(sp) # 4-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a6, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a6, a6, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a6, a6, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a6
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    vmul.vv v24, v8, v24
-; RV32-NEXT:    vmul.vv v0, v8, v0
-; RV32-NEXT:    vand.vi v16, v8, 8
-; RV32-NEXT:    vmul.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    mv a6, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a6, a6, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a6
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    lui ra, 65536
-; RV32-NEXT:    vand.vx v16, v8, t5
-; RV32-NEXT:    vmul.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a6, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a6, a6, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a6, a6, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a6
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    lui t5, 131072
-; RV32-NEXT:    vand.vx v16, v8, t2
-; RV32-NEXT:    vmul.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a6, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a6, a6, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a6
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    lui t2, 262144
-; RV32-NEXT:    vand.vx v16, v8, a7
-; RV32-NEXT:    vmul.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a6, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a6, a6, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a6
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    lui a7, 524288
-; RV32-NEXT:    li a6, 128
-; RV32-NEXT:    vand.vx v16, v8, a6
-; RV32-NEXT:    vmul.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 6
-; RV32-NEXT:    mv a6, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a6
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    li a6, 256
-; RV32-NEXT:    vand.vx v16, v8, a6
-; RV32-NEXT:    vmul.vv v16, v8, v16
-; RV32-NEXT:    csrr a6, vlenb
-; RV32-NEXT:    slli a6, a6, 3
-; RV32-NEXT:    mv a0, a6
-; RV32-NEXT:    slli a6, a6, 1
-; RV32-NEXT:    add a0, a0, a6
-; RV32-NEXT:    slli a6, a6, 1
-; RV32-NEXT:    add a0, a0, a6
-; RV32-NEXT:    slli a6, a6, 2
-; RV32-NEXT:    add a6, a6, a0
-; RV32-NEXT:    lw a0, 4(sp) # 4-byte Folded Reload
-; RV32-NEXT:    add a6, sp, a6
-; RV32-NEXT:    addi a6, a6, 16
-; RV32-NEXT:    vs8r.v v16, (a6) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    vand.vx v16, v8, t0
-; RV32-NEXT:    vmul.vv v16, v8, v16
-; RV32-NEXT:    csrr a6, vlenb
-; RV32-NEXT:    slli a6, a6, 4
-; RV32-NEXT:    mv t0, a6
-; RV32-NEXT:    slli a6, a6, 1
-; RV32-NEXT:    add t0, t0, a6
-; RV32-NEXT:    slli a6, a6, 2
-; RV32-NEXT:    add a6, a6, t0
-; RV32-NEXT:    add a6, sp, a6
-; RV32-NEXT:    addi a6, a6, 16
-; RV32-NEXT:    vs8r.v v16, (a6) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    vand.vx v16, v8, t1
-; RV32-NEXT:    vmul.vv v16, v8, v16
-; RV32-NEXT:    csrr a6, vlenb
-; RV32-NEXT:    slli a6, a6, 3
-; RV32-NEXT:    mv t0, a6
-; RV32-NEXT:    slli a6, a6, 2
-; RV32-NEXT:    add t0, t0, a6
-; RV32-NEXT:    slli a6, a6, 2
-; RV32-NEXT:    add a6, a6, t0
-; RV32-NEXT:    add a6, sp, a6
-; RV32-NEXT:    addi a6, a6, 16
-; RV32-NEXT:    vs8r.v v16, (a6) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    vand.vx v16, v8, a0
-; RV32-NEXT:    vmul.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    mv a6, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a6
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    vand.vx v16, v8, t3
-; RV32-NEXT:    vmul.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a6, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a6, a6, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a6
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    vand.vx v16, v8, t4
-; RV32-NEXT:    vmul.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a6, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a6
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    vand.vx v16, v8, t6
-; RV32-NEXT:    vmul.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a6, a0
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a0, a0, a6
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    vand.vx v16, v8, s0
-; RV32-NEXT:    vmul.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 7
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    vand.vx v16, v8, a5
-; RV32-NEXT:    vmul.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a5, a5, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a5, a5, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    vand.vx v16, v8, s1
-; RV32-NEXT:    vmul.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a5, a5, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    vand.vx v16, v8, s2
-; RV32-NEXT:    vmul.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a5, a5, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    vand.vx v16, v8, s3
-; RV32-NEXT:    vmul.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    vand.vx v16, v8, s4
-; RV32-NEXT:    vmul.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a5, a5, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    vand.vx v16, v8, s5
-; RV32-NEXT:    vmul.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    vand.vx v16, v8, s6
-; RV32-NEXT:    vmul.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    vand.vx v16, v8, s7
-; RV32-NEXT:    vmul.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a5, a5, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    vand.vx v16, v8, s8
-; RV32-NEXT:    vmul.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    vand.vx v16, v8, s9
-; RV32-NEXT:    vmul.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    vand.vx v16, v8, s10
-; RV32-NEXT:    vmul.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 6
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    vand.vx v16, v8, s11
-; RV32-NEXT:    vmul.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    vand.vx v16, v8, ra
-; RV32-NEXT:    vmul.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    vand.vx v16, v8, t5
-; RV32-NEXT:    vmul.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    vand.vx v16, v8, t2
-; RV32-NEXT:    vmul.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    vand.vx v16, v8, a7
-; RV32-NEXT:    vmul.vv v8, v8, v16
-; RV32-NEXT:    addi a0, sp, 16
-; RV32-NEXT:    vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a5, a5, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a5, a5, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v24, v8
-; RV32-NEXT:    vxor.vv v8, v8, v0
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a5, a5, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a5, a5, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a5, a5, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a5, a5, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a5, a5, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 6
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vxor.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a5, a5, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a5, a5, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v16, v8
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a5, a5, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v24
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a5, a5, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v24
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v24
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a5, a5, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v24
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v24
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v24
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 7
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v24
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a5, a5, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a5, a5, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v24
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a5, a5, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v24
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a5, a5, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v24
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v24
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a5, a5, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v24
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v24
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v24
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a5, a5, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v24
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vxor.vv v0, v8, v24
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vxor.vv v0, v0, v24
-; RV32-NEXT:    vsll.vi v16, v16, 24
-; RV32-NEXT:    vand.vx v24, v8, a4
-; RV32-NEXT:    vsll.vi v24, v24, 8
-; RV32-NEXT:    vor.vv v16, v16, v24
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 6
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vxor.vv v24, v0, v24
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vxor.vv v24, v24, v0
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vxor.vv v24, v24, v0
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vxor.vv v24, v24, v0
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vxor.vv v24, v24, v0
-; RV32-NEXT:    addi a0, sp, 16
-; RV32-NEXT:    vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vxor.vv v24, v24, v0
-; RV32-NEXT:    vsrl.vi v8, v8, 8
-; RV32-NEXT:    vand.vx v8, v8, a4
-; RV32-NEXT:    vsrl.vi v24, v24, 24
-; RV32-NEXT:    vor.vv v8, v8, v24
-; RV32-NEXT:    vor.vv v8, v16, v8
-; RV32-NEXT:    vsrl.vi v16, v8, 4
-; RV32-NEXT:    vand.vx v8, v8, a3
-; RV32-NEXT:    vand.vx v16, v16, a3
-; RV32-NEXT:    vsll.vi v8, v8, 4
-; RV32-NEXT:    vor.vv v8, v16, v8
-; RV32-NEXT:    vsrl.vi v16, v8, 2
-; RV32-NEXT:    vand.vx v8, v8, a2
-; RV32-NEXT:    vand.vx v16, v16, a2
-; RV32-NEXT:    vsll.vi v8, v8, 2
-; RV32-NEXT:    vor.vv v8, v16, v8
-; RV32-NEXT:    vsrl.vi v16, v8, 1
-; RV32-NEXT:    vand.vx v8, v8, a1
-; RV32-NEXT:    vand.vx v16, v16, a1
-; RV32-NEXT:    vadd.vv v8, v8, v8
-; RV32-NEXT:    vor.vv v8, v16, v8
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add sp, sp, a0
-; RV32-NEXT:    lw ra, 76(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s0, 72(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s1, 68(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s2, 64(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s3, 60(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s4, 56(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s5, 52(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s6, 48(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s7, 44(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s8, 40(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s9, 36(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s10, 32(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s11, 28(sp) # 4-byte Folded Reload
-; RV32-NEXT:    addi sp, sp, 80
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: clmulr_nxv16i32:
-; RV64:       # %bb.0:
-; RV64-NEXT:    addi sp, sp, -144
-; RV64-NEXT:    sd ra, 136(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s0, 128(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s1, 120(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s2, 112(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s3, 104(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s4, 96(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s5, 88(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s6, 80(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s7, 72(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s8, 64(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s9, 56(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s10, 48(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s11, 40(sp) # 8-byte Folded Spill
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 4
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    sub sp, sp, a0
-; RV64-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
-; RV64-NEXT:    vsrl.vi v16, v8, 8
-; RV64-NEXT:    lui a5, 16
-; RV64-NEXT:    vsrl.vi v24, v8, 24
-; RV64-NEXT:    vsll.vi v0, v8, 24
-; RV64-NEXT:    lui a1, 61681
-; RV64-NEXT:    lui a2, 209715
-; RV64-NEXT:    lui ra, 349525
-; RV64-NEXT:    li t5, 16
-; RV64-NEXT:    li t2, 32
-; RV64-NEXT:    li a7, 64
-; RV64-NEXT:    li t0, 512
-; RV64-NEXT:    li t1, 1024
-; RV64-NEXT:    li a0, 1
-; RV64-NEXT:    lui t3, 1
-; RV64-NEXT:    lui t4, 2
-; RV64-NEXT:    lui t6, 4
-; RV64-NEXT:    lui s0, 8
-; RV64-NEXT:    lui s1, 32
-; RV64-NEXT:    lui s2, 64
-; RV64-NEXT:    lui s3, 128
-; RV64-NEXT:    lui s4, 256
-; RV64-NEXT:    lui s5, 512
-; RV64-NEXT:    lui s6, 1024
-; RV64-NEXT:    lui s7, 2048
-; RV64-NEXT:    lui s8, 4096
-; RV64-NEXT:    lui s9, 8192
-; RV64-NEXT:    lui s10, 16384
-; RV64-NEXT:    lui s11, 32768
-; RV64-NEXT:    addi a4, a5, -256
-; RV64-NEXT:    addi a3, a1, -241
-; RV64-NEXT:    addi a2, a2, 819
-; RV64-NEXT:    addi a1, ra, 1365
-; RV64-NEXT:    slli a0, a0, 11
-; RV64-NEXT:    vand.vx v16, v16, a4
-; RV64-NEXT:    vand.vx v8, v8, a4
-; RV64-NEXT:    vor.vv v16, v16, v24
-; RV64-NEXT:    vsll.vi v8, v8, 8
-; RV64-NEXT:    vor.vv v8, v0, v8
-; RV64-NEXT:    vor.vv v8, v8, v16
-; RV64-NEXT:    vsrl.vi v16, v8, 4
-; RV64-NEXT:    vand.vx v8, v8, a3
-; RV64-NEXT:    vand.vx v16, v16, a3
-; RV64-NEXT:    vsll.vi v8, v8, 4
-; RV64-NEXT:    vor.vv v8, v16, v8
-; RV64-NEXT:    vsrl.vi v16, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a2
-; RV64-NEXT:    vand.vx v16, v16, a2
-; RV64-NEXT:    vsll.vi v8, v8, 2
-; RV64-NEXT:    vor.vv v8, v16, v8
-; RV64-NEXT:    vsrl.vi v16, v8, 1
-; RV64-NEXT:    vand.vx v8, v8, a1
-; RV64-NEXT:    vand.vx v16, v16, a1
-; RV64-NEXT:    vadd.vv v8, v8, v8
-; RV64-NEXT:    vor.vv v8, v16, v8
-; RV64-NEXT:    vand.vi v16, v8, 2
-; RV64-NEXT:    vand.vi v24, v8, 1
-; RV64-NEXT:    vand.vi v0, v8, 4
-; RV64-NEXT:    vmul.vv v16, v8, v16
-; RV64-NEXT:    sd a0, 8(sp) # 8-byte Folded Spill
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    mv a6, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a6, a6, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a6, a6, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a6
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV64-NEXT:    vmul.vv v24, v8, v24
-; RV64-NEXT:    vmul.vv v0, v8, v0
-; RV64-NEXT:    vand.vi v16, v8, 8
-; RV64-NEXT:    vmul.vv v16, v8, v16
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 5
-; RV64-NEXT:    mv a6, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a6, a6, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a6
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV64-NEXT:    lui ra, 65536
-; RV64-NEXT:    vand.vx v16, v8, t5
-; RV64-NEXT:    vmul.vv v16, v8, v16
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    mv a6, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a6, a6, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a6, a6, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a6
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV64-NEXT:    lui t5, 131072
-; RV64-NEXT:    vand.vx v16, v8, t2
-; RV64-NEXT:    vmul.vv v16, v8, v16
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 4
-; RV64-NEXT:    mv a6, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a6, a6, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a6
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV64-NEXT:    lui t2, 262144
-; RV64-NEXT:    vand.vx v16, v8, a7
-; RV64-NEXT:    vmul.vv v16, v8, v16
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    mv a6, a0
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a6, a6, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a6
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV64-NEXT:    lui a7, 524288
-; RV64-NEXT:    li a6, 128
-; RV64-NEXT:    vand.vx v16, v8, a6
-; RV64-NEXT:    vmul.vv v16, v8, v16
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 6
-; RV64-NEXT:    mv a6, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a6
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV64-NEXT:    li a6, 256
-; RV64-NEXT:    vand.vx v16, v8, a6
-; RV64-NEXT:    vmul.vv v16, v8, v16
-; RV64-NEXT:    csrr a6, vlenb
-; RV64-NEXT:    slli a6, a6, 3
-; RV64-NEXT:    mv a0, a6
-; RV64-NEXT:    slli a6, a6, 1
-; RV64-NEXT:    add a0, a0, a6
-; RV64-NEXT:    slli a6, a6, 1
-; RV64-NEXT:    add a0, a0, a6
-; RV64-NEXT:    slli a6, a6, 2
-; RV64-NEXT:    add a6, a6, a0
-; RV64-NEXT:    ld a0, 8(sp) # 8-byte Folded Reload
-; RV64-NEXT:    add a6, sp, a6
-; RV64-NEXT:    addi a6, a6, 32
-; RV64-NEXT:    vs8r.v v16, (a6) # vscale x 64-byte Folded Spill
-; RV64-NEXT:    vand.vx v16, v8, t0
-; RV64-NEXT:    vmul.vv v16, v8, v16
-; RV64-NEXT:    csrr a6, vlenb
-; RV64-NEXT:    slli a6, a6, 4
-; RV64-NEXT:    mv t0, a6
-; RV64-NEXT:    slli a6, a6, 1
-; RV64-NEXT:    add t0, t0, a6
-; RV64-NEXT:    slli a6, a6, 2
-; RV64-NEXT:    add a6, a6, t0
-; RV64-NEXT:    add a6, sp, a6
-; RV64-NEXT:    addi a6, a6, 32
-; RV64-NEXT:    vs8r.v v16, (a6) # vscale x 64-byte Folded Spill
-; RV64-NEXT:    vand.vx v16, v8, t1
-; RV64-NEXT:    vmul.vv v16, v8, v16
-; RV64-NEXT:    csrr a6, vlenb
-; RV64-NEXT:    slli a6, a6, 3
-; RV64-NEXT:    mv t0, a6
-; RV64-NEXT:    slli a6, a6, 2
-; RV64-NEXT:    add t0, t0, a6
-; RV64-NEXT:    slli a6, a6, 2
-; RV64-NEXT:    add a6, a6, t0
-; RV64-NEXT:    add a6, sp, a6
-; RV64-NEXT:    addi a6, a6, 32
-; RV64-NEXT:    vs8r.v v16, (a6) # vscale x 64-byte Folded Spill
-; RV64-NEXT:    vand.vx v16, v8, a0
-; RV64-NEXT:    vmul.vv v16, v8, v16
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 5
-; RV64-NEXT:    mv a6, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a6
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV64-NEXT:    vand.vx v16, v8, t3
-; RV64-NEXT:    vmul.vv v16, v8, v16
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    mv a6, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a6, a6, a0
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a0, a0, a6
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV64-NEXT:    vand.vx v16, v8, t4
-; RV64-NEXT:    vmul.vv v16, v8, v16
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 4
-; RV64-NEXT:    mv a6, a0
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a0, a0, a6
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV64-NEXT:    vand.vx v16, v8, t6
-; RV64-NEXT:    vmul.vv v16, v8, v16
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    mv a6, a0
-; RV64-NEXT:    slli a0, a0, 4
-; RV64-NEXT:    add a0, a0, a6
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV64-NEXT:    vand.vx v16, v8, s0
-; RV64-NEXT:    vmul.vv v16, v8, v16
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 7
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV64-NEXT:    vand.vx v16, v8, a5
-; RV64-NEXT:    vmul.vv v16, v8, v16
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a5, a5, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a5, a5, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV64-NEXT:    vand.vx v16, v8, s1
-; RV64-NEXT:    vmul.vv v16, v8, v16
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 4
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a5, a5, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV64-NEXT:    vand.vx v16, v8, s2
-; RV64-NEXT:    vmul.vv v16, v8, v16
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a5, a5, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV64-NEXT:    vand.vx v16, v8, s3
-; RV64-NEXT:    vmul.vv v16, v8, v16
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 5
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV64-NEXT:    vand.vx v16, v8, s4
-; RV64-NEXT:    vmul.vv v16, v8, v16
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a5, a5, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV64-NEXT:    vand.vx v16, v8, s5
-; RV64-NEXT:    vmul.vv v16, v8, v16
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 4
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV64-NEXT:    vand.vx v16, v8, s6
-; RV64-NEXT:    vmul.vv v16, v8, v16
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV64-NEXT:    vand.vx v16, v8, s7
-; RV64-NEXT:    vmul.vv v16, v8, v16
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a5, a5, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV64-NEXT:    vand.vx v16, v8, s8
-; RV64-NEXT:    vmul.vv v16, v8, v16
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV64-NEXT:    vand.vx v16, v8, s9
-; RV64-NEXT:    vmul.vv v16, v8, v16
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV64-NEXT:    vand.vx v16, v8, s10
-; RV64-NEXT:    vmul.vv v16, v8, v16
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 6
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV64-NEXT:    vand.vx v16, v8, s11
-; RV64-NEXT:    vmul.vv v16, v8, v16
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 4
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV64-NEXT:    vand.vx v16, v8, ra
-; RV64-NEXT:    vmul.vv v16, v8, v16
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV64-NEXT:    vand.vx v16, v8, t5
-; RV64-NEXT:    vmul.vv v16, v8, v16
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 4
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV64-NEXT:    vand.vx v16, v8, t2
-; RV64-NEXT:    vmul.vv v16, v8, v16
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV64-NEXT:    vand.vx v16, v8, a7
-; RV64-NEXT:    vmul.vv v8, v8, v16
-; RV64-NEXT:    addi a0, sp, 32
-; RV64-NEXT:    vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a5, a5, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a5, a5, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
-; RV64-NEXT:    vxor.vv v8, v24, v8
-; RV64-NEXT:    vxor.vv v8, v8, v0
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 5
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a5, a5, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV64-NEXT:    vxor.vv v8, v8, v16
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a5, a5, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a5, a5, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV64-NEXT:    vxor.vv v8, v8, v16
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 4
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a5, a5, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV64-NEXT:    vxor.vv v8, v8, v16
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a5, a5, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV64-NEXT:    vxor.vv v8, v8, v16
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 6
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV64-NEXT:    vxor.vv v16, v8, v16
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a5, a5, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a5, a5, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
-; RV64-NEXT:    vxor.vv v8, v16, v8
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 4
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a5, a5, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; RV64-NEXT:    vxor.vv v8, v8, v24
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a5, a5, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; RV64-NEXT:    vxor.vv v8, v8, v24
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 5
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; RV64-NEXT:    vxor.vv v8, v8, v24
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a5, a5, a0
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; RV64-NEXT:    vxor.vv v8, v8, v24
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 4
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; RV64-NEXT:    vxor.vv v8, v8, v24
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 4
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; RV64-NEXT:    vxor.vv v8, v8, v24
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 7
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; RV64-NEXT:    vxor.vv v8, v8, v24
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a5, a5, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a5, a5, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; RV64-NEXT:    vxor.vv v8, v8, v24
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 4
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a5, a5, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; RV64-NEXT:    vxor.vv v8, v8, v24
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a5, a5, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; RV64-NEXT:    vxor.vv v8, v8, v24
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 5
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; RV64-NEXT:    vxor.vv v8, v8, v24
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a5, a5, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; RV64-NEXT:    vxor.vv v8, v8, v24
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 4
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; RV64-NEXT:    vxor.vv v8, v8, v24
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; RV64-NEXT:    vxor.vv v8, v8, v24
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a5, a5, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; RV64-NEXT:    vxor.vv v8, v8, v24
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v8, v24
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v24
-; RV64-NEXT:    vsll.vi v16, v16, 24
-; RV64-NEXT:    vand.vx v24, v8, a4
-; RV64-NEXT:    vsll.vi v24, v24, 8
-; RV64-NEXT:    vor.vv v16, v16, v24
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 6
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; RV64-NEXT:    vxor.vv v24, v0, v24
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 4
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
-; RV64-NEXT:    vxor.vv v24, v24, v0
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
-; RV64-NEXT:    vxor.vv v24, v24, v0
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 4
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
-; RV64-NEXT:    vxor.vv v24, v24, v0
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
-; RV64-NEXT:    vxor.vv v24, v24, v0
-; RV64-NEXT:    addi a0, sp, 32
-; RV64-NEXT:    vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
-; RV64-NEXT:    vxor.vv v24, v24, v0
-; RV64-NEXT:    vsrl.vi v8, v8, 8
-; RV64-NEXT:    vand.vx v8, v8, a4
-; RV64-NEXT:    vsrl.vi v24, v24, 24
-; RV64-NEXT:    vor.vv v8, v8, v24
-; RV64-NEXT:    vor.vv v8, v16, v8
-; RV64-NEXT:    vsrl.vi v16, v8, 4
-; RV64-NEXT:    vand.vx v8, v8, a3
-; RV64-NEXT:    vand.vx v16, v16, a3
-; RV64-NEXT:    vsll.vi v8, v8, 4
-; RV64-NEXT:    vor.vv v8, v16, v8
-; RV64-NEXT:    vsrl.vi v16, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a2
-; RV64-NEXT:    vand.vx v16, v16, a2
-; RV64-NEXT:    vsll.vi v8, v8, 2
-; RV64-NEXT:    vor.vv v8, v16, v8
-; RV64-NEXT:    vsrl.vi v16, v8, 1
-; RV64-NEXT:    vand.vx v8, v8, a1
-; RV64-NEXT:    vand.vx v16, v16, a1
-; RV64-NEXT:    vadd.vv v8, v8, v8
-; RV64-NEXT:    vor.vv v8, v16, v8
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 4
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add sp, sp, a0
-; RV64-NEXT:    ld ra, 136(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s0, 128(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s1, 120(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s2, 112(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s3, 104(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s4, 96(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s5, 88(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s6, 80(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s7, 72(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s8, 64(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s9, 56(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s10, 48(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s11, 40(sp) # 8-byte Folded Reload
-; RV64-NEXT:    addi sp, sp, 144
-; RV64-NEXT:    ret
-  %a = call <vscale x 16 x i32> @llvm.clmulr.nxv16i32(<vscale x 16 x i32> %x, <vscale x 16 x i32> %y)
-  ret <vscale x 16 x i32> %a
-}
-
-define <vscale x 1 x i64> @clmulr_nxv1i64(<vscale x 1 x i64> %x, <vscale x 1 x i64> %y) nounwind {
-; RV32-LABEL: clmulr_nxv1i64:
-; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -352
-; RV32-NEXT:    sw ra, 348(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s0, 344(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s1, 340(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s2, 336(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s3, 332(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s4, 328(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s5, 324(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s6, 320(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s7, 316(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s8, 312(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s9, 308(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s10, 304(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s11, 300(sp) # 4-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    sub sp, sp, a0
-; RV32-NEXT:    lui s7, 1044480
-; RV32-NEXT:    lui a7, 524288
-; RV32-NEXT:    li s11, 1
-; RV32-NEXT:    li s8, 2
-; RV32-NEXT:    li s9, 4
-; RV32-NEXT:    li s10, 8
-; RV32-NEXT:    li a3, 16
-; RV32-NEXT:    li a4, 32
-; RV32-NEXT:    li a5, 64
-; RV32-NEXT:    li a6, 128
-; RV32-NEXT:    li ra, 256
-; RV32-NEXT:    li a0, 512
-; RV32-NEXT:    li a1, 1024
-; RV32-NEXT:    lui a2, 1
-; RV32-NEXT:    lui t0, 2
-; RV32-NEXT:    lui t1, 4
-; RV32-NEXT:    lui t2, 8
-; RV32-NEXT:    lui t3, 16
-; RV32-NEXT:    lui t4, 32
-; RV32-NEXT:    lui t5, 64
-; RV32-NEXT:    lui t6, 128
-; RV32-NEXT:    lui s0, 256
-; RV32-NEXT:    lui s1, 512
-; RV32-NEXT:    lui s2, 1024
-; RV32-NEXT:    lui s3, 2048
-; RV32-NEXT:    lui s4, 4096
-; RV32-NEXT:    lui s5, 8192
-; RV32-NEXT:    lui s6, 16384
-; RV32-NEXT:    sw s7, 272(sp)
-; RV32-NEXT:    lui s7, 32768
-; RV32-NEXT:    sw zero, 276(sp)
-; RV32-NEXT:    sw a7, 264(sp)
-; RV32-NEXT:    sw zero, 268(sp)
-; RV32-NEXT:    sw zero, 256(sp)
-; RV32-NEXT:    sw s11, 260(sp)
-; RV32-NEXT:    sw zero, 248(sp)
-; RV32-NEXT:    sw s8, 252(sp)
-; RV32-NEXT:    lui s8, 65536
-; RV32-NEXT:    sw zero, 240(sp)
-; RV32-NEXT:    sw s9, 244(sp)
-; RV32-NEXT:    lui s9, 131072
-; RV32-NEXT:    sw zero, 232(sp)
-; RV32-NEXT:    sw s10, 236(sp)
-; RV32-NEXT:    lui s10, 262144
-; RV32-NEXT:    sw zero, 224(sp)
-; RV32-NEXT:    sw a3, 228(sp)
-; RV32-NEXT:    sw zero, 216(sp)
-; RV32-NEXT:    sw a4, 220(sp)
-; RV32-NEXT:    sw zero, 208(sp)
-; RV32-NEXT:    sw a5, 212(sp)
-; RV32-NEXT:    sw zero, 200(sp)
-; RV32-NEXT:    sw a6, 204(sp)
-; RV32-NEXT:    sw zero, 192(sp)
-; RV32-NEXT:    sw ra, 196(sp)
-; RV32-NEXT:    sw zero, 184(sp)
-; RV32-NEXT:    sw a0, 188(sp)
-; RV32-NEXT:    sw zero, 176(sp)
-; RV32-NEXT:    sw a1, 180(sp)
-; RV32-NEXT:    slli s11, s11, 11
-; RV32-NEXT:    sw zero, 168(sp)
-; RV32-NEXT:    sw s11, 172(sp)
-; RV32-NEXT:    sw zero, 160(sp)
-; RV32-NEXT:    sw a2, 164(sp)
-; RV32-NEXT:    sw zero, 152(sp)
-; RV32-NEXT:    sw t0, 156(sp)
-; RV32-NEXT:    sw zero, 144(sp)
-; RV32-NEXT:    sw t1, 148(sp)
-; RV32-NEXT:    sw zero, 136(sp)
-; RV32-NEXT:    sw t2, 140(sp)
-; RV32-NEXT:    sw zero, 128(sp)
-; RV32-NEXT:    sw t3, 132(sp)
-; RV32-NEXT:    sw zero, 120(sp)
-; RV32-NEXT:    sw t4, 124(sp)
-; RV32-NEXT:    sw zero, 112(sp)
-; RV32-NEXT:    sw t5, 116(sp)
-; RV32-NEXT:    sw zero, 104(sp)
-; RV32-NEXT:    sw t6, 108(sp)
-; RV32-NEXT:    sw zero, 96(sp)
-; RV32-NEXT:    sw s0, 100(sp)
-; RV32-NEXT:    sw zero, 88(sp)
-; RV32-NEXT:    sw s1, 92(sp)
-; RV32-NEXT:    sw zero, 80(sp)
-; RV32-NEXT:    sw s2, 84(sp)
-; RV32-NEXT:    sw zero, 72(sp)
-; RV32-NEXT:    sw s3, 76(sp)
-; RV32-NEXT:    sw zero, 64(sp)
-; RV32-NEXT:    sw s4, 68(sp)
-; RV32-NEXT:    sw zero, 56(sp)
-; RV32-NEXT:    sw s5, 60(sp)
-; RV32-NEXT:    sw zero, 48(sp)
-; RV32-NEXT:    sw s6, 52(sp)
-; RV32-NEXT:    sw zero, 40(sp)
-; RV32-NEXT:    sw s7, 44(sp)
-; RV32-NEXT:    sw zero, 32(sp)
-; RV32-NEXT:    sw s8, 36(sp)
-; RV32-NEXT:    sw zero, 24(sp)
-; RV32-NEXT:    sw s9, 28(sp)
-; RV32-NEXT:    sw zero, 16(sp)
-; RV32-NEXT:    sw s10, 20(sp)
-; RV32-NEXT:    sw zero, 8(sp)
-; RV32-NEXT:    sw a7, 12(sp)
-; RV32-NEXT:    lui a0, 61681
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; RV32-NEXT:    vmv.v.x v3, a0
-; RV32-NEXT:    lui a0, 209715
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vmv.v.x v2, a0
-; RV32-NEXT:    lui a0, 349525
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vmv.v.x v1, a0
-; RV32-NEXT:    addi a0, sp, 272
-; RV32-NEXT:    vsetvli a1, zero, e64, m1, ta, ma
-; RV32-NEXT:    vlse64.v v0, (a0), zero
-; RV32-NEXT:    addi a0, sp, 264
-; RV32-NEXT:    vlse64.v v13, (a0), zero
-; RV32-NEXT:    addi a0, sp, 256
-; RV32-NEXT:    vlse64.v v14, (a0), zero
-; RV32-NEXT:    addi a0, sp, 248
-; RV32-NEXT:    vlse64.v v15, (a0), zero
-; RV32-NEXT:    addi a0, sp, 240
-; RV32-NEXT:    vlse64.v v16, (a0), zero
-; RV32-NEXT:    addi a0, sp, 232
-; RV32-NEXT:    vlse64.v v17, (a0), zero
-; RV32-NEXT:    addi a0, sp, 224
-; RV32-NEXT:    vlse64.v v18, (a0), zero
-; RV32-NEXT:    addi a0, sp, 216
-; RV32-NEXT:    vlse64.v v19, (a0), zero
-; RV32-NEXT:    addi a0, sp, 208
-; RV32-NEXT:    vlse64.v v20, (a0), zero
-; RV32-NEXT:    addi a0, sp, 200
-; RV32-NEXT:    vlse64.v v21, (a0), zero
-; RV32-NEXT:    addi a0, sp, 192
-; RV32-NEXT:    vlse64.v v22, (a0), zero
-; RV32-NEXT:    addi a0, sp, 184
-; RV32-NEXT:    vlse64.v v23, (a0), zero
-; RV32-NEXT:    addi a0, sp, 176
-; RV32-NEXT:    vlse64.v v24, (a0), zero
-; RV32-NEXT:    addi a0, sp, 168
-; RV32-NEXT:    vlse64.v v25, (a0), zero
-; RV32-NEXT:    addi a0, sp, 160
-; RV32-NEXT:    vlse64.v v26, (a0), zero
-; RV32-NEXT:    addi a0, sp, 152
-; RV32-NEXT:    vlse64.v v9, (a0), zero
-; RV32-NEXT:    addi a0, sp, 144
-; RV32-NEXT:    vlse64.v v10, (a0), zero
-; RV32-NEXT:    addi a0, sp, 136
-; RV32-NEXT:    vlse64.v v29, (a0), zero
-; RV32-NEXT:    addi a0, sp, 128
-; RV32-NEXT:    vlse64.v v30, (a0), zero
-; RV32-NEXT:    addi a0, sp, 120
-; RV32-NEXT:    vlse64.v v31, (a0), zero
-; RV32-NEXT:    addi a0, sp, 112
-; RV32-NEXT:    vlse64.v v11, (a0), zero
-; RV32-NEXT:    addi a0, sp, 104
-; RV32-NEXT:    vlse64.v v12, (a0), zero
-; RV32-NEXT:    addi a0, sp, 96
-; RV32-NEXT:    vlse64.v v5, (a0), zero
-; RV32-NEXT:    addi a0, sp, 88
-; RV32-NEXT:    vlse64.v v4, (a0), zero
-; RV32-NEXT:    li a6, 56
-; RV32-NEXT:    vsrl.vi v27, v8, 24
-; RV32-NEXT:    vsrl.vx v28, v8, a6
-; RV32-NEXT:    li ra, 40
-; RV32-NEXT:    vsrl.vx v7, v8, ra
-; RV32-NEXT:    vsll.vx v6, v8, a6
-; RV32-NEXT:    addi a4, t3, -256
-; RV32-NEXT:    vand.vx v7, v7, a4
-; RV32-NEXT:    vor.vv v28, v7, v28
-; RV32-NEXT:    vand.vx v7, v8, a4
-; RV32-NEXT:    vsll.vx v7, v7, ra
-; RV32-NEXT:    vor.vv v7, v6, v7
-; RV32-NEXT:    vsrl.vi v6, v8, 8
-; RV32-NEXT:    lui a5, 4080
-; RV32-NEXT:    vand.vx v27, v27, a5
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v0, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vv v6, v6, v0
-; RV32-NEXT:    vor.vv v27, v6, v27
-; RV32-NEXT:    addi a3, sp, 80
-; RV32-NEXT:    vlse64.v v6, (a3), zero
-; RV32-NEXT:    vor.vv v27, v27, v28
-; RV32-NEXT:    vand.vx v28, v8, a5
-; RV32-NEXT:    vsll.vi v28, v28, 24
-; RV32-NEXT:    vand.vv v8, v8, v0
-; RV32-NEXT:    vsll.vi v8, v8, 8
-; RV32-NEXT:    vor.vv v8, v28, v8
-; RV32-NEXT:    addi a3, sp, 72
-; RV32-NEXT:    vlse64.v v28, (a3), zero
-; RV32-NEXT:    vor.vv v8, v7, v8
-; RV32-NEXT:    addi a3, sp, 64
-; RV32-NEXT:    vlse64.v v7, (a3), zero
-; RV32-NEXT:    vor.vv v8, v8, v27
-; RV32-NEXT:    vsrl.vi v27, v8, 4
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v3, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vv v8, v8, v3
-; RV32-NEXT:    vand.vv v27, v27, v3
-; RV32-NEXT:    vsll.vi v8, v8, 4
-; RV32-NEXT:    vor.vv v8, v27, v8
-; RV32-NEXT:    vsrl.vi v27, v8, 2
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v2, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vv v8, v8, v2
-; RV32-NEXT:    vand.vv v27, v27, v2
-; RV32-NEXT:    vsll.vi v8, v8, 2
-; RV32-NEXT:    vor.vv v8, v27, v8
-; RV32-NEXT:    vsrl.vi v27, v8, 1
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v1, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vv v8, v8, v1
-; RV32-NEXT:    vand.vv v27, v27, v1
-; RV32-NEXT:    vadd.vv v8, v8, v8
-; RV32-NEXT:    vor.vv v8, v27, v8
-; RV32-NEXT:    addi a3, sp, 56
-; RV32-NEXT:    vlse64.v v27, (a3), zero
-; RV32-NEXT:    vand.vv v13, v8, v13
-; RV32-NEXT:    vand.vv v14, v8, v14
-; RV32-NEXT:    vand.vv v15, v8, v15
-; RV32-NEXT:    vand.vv v16, v8, v16
-; RV32-NEXT:    vand.vv v17, v8, v17
-; RV32-NEXT:    vand.vv v18, v8, v18
-; RV32-NEXT:    vand.vv v19, v8, v19
-; RV32-NEXT:    vand.vv v20, v8, v20
-; RV32-NEXT:    vand.vv v21, v8, v21
-; RV32-NEXT:    vand.vv v22, v8, v22
-; RV32-NEXT:    vand.vv v23, v8, v23
-; RV32-NEXT:    vand.vv v24, v8, v24
-; RV32-NEXT:    vand.vv v25, v8, v25
-; RV32-NEXT:    vand.vv v26, v8, v26
-; RV32-NEXT:    vand.vv v3, v8, v9
-; RV32-NEXT:    vand.vv v2, v8, v10
-; RV32-NEXT:    vand.vv v29, v8, v29
-; RV32-NEXT:    vand.vv v30, v8, v30
-; RV32-NEXT:    vand.vv v31, v8, v31
-; RV32-NEXT:    vand.vv v0, v8, v11
-; RV32-NEXT:    vand.vv v9, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vv v5, v8, v5
-; RV32-NEXT:    vand.vv v4, v8, v4
-; RV32-NEXT:    vand.vv v6, v8, v6
-; RV32-NEXT:    vand.vv v9, v8, v28
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    addi a3, sp, 48
-; RV32-NEXT:    addi a0, sp, 40
-; RV32-NEXT:    vlse64.v v9, (a3), zero
-; RV32-NEXT:    vlse64.v v10, (a0), zero
-; RV32-NEXT:    vand.vv v11, v8, v7
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v11, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vv v11, v8, v27
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v11, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vv v9, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    addi a2, sp, 32
-; RV32-NEXT:    addi a3, sp, 24
-; RV32-NEXT:    addi a1, sp, 16
-; RV32-NEXT:    addi a0, sp, 8
-; RV32-NEXT:    vlse64.v v9, (a2), zero
-; RV32-NEXT:    vlse64.v v10, (a3), zero
-; RV32-NEXT:    vlse64.v v11, (a1), zero
-; RV32-NEXT:    vlse64.v v12, (a0), zero
-; RV32-NEXT:    vand.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a1, a0, 5
-; RV32-NEXT:    add a0, a1, a0
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vv v9, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vv v9, v8, v11
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a1, a0, 5
-; RV32-NEXT:    sub a0, a1, a0
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vv v9, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vi v9, v8, 2
-; RV32-NEXT:    vmul.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vi v9, v8, 1
-; RV32-NEXT:    vmul.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vi v9, v8, 4
-; RV32-NEXT:    vmul.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vi v9, v8, 8
-; RV32-NEXT:    vmul.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    li a0, 16
-; RV32-NEXT:    vand.vx v9, v8, a0
-; RV32-NEXT:    vmul.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    li a0, 32
-; RV32-NEXT:    vand.vx v9, v8, a0
-; RV32-NEXT:    vmul.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    li a0, 64
-; RV32-NEXT:    vand.vx v9, v8, a0
-; RV32-NEXT:    vmul.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    li a0, 128
-; RV32-NEXT:    vand.vx v9, v8, a0
-; RV32-NEXT:    vmul.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    li a0, 256
-; RV32-NEXT:    vand.vx v9, v8, a0
-; RV32-NEXT:    vmul.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    li a0, 512
-; RV32-NEXT:    vand.vx v9, v8, a0
-; RV32-NEXT:    vmul.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    li a0, 1024
-; RV32-NEXT:    vand.vx v9, v8, a0
-; RV32-NEXT:    vmul.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vx v9, v8, s11
-; RV32-NEXT:    vmul.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    vand.vx v9, v8, a0
-; RV32-NEXT:    vmul.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vx v9, v8, t0
-; RV32-NEXT:    vmul.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a1, a0, 4
-; RV32-NEXT:    add a0, a1, a0
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vx v9, v8, t1
-; RV32-NEXT:    vmul.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vx v9, v8, t2
-; RV32-NEXT:    vmul.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a1, a0, 4
-; RV32-NEXT:    sub a0, a1, a0
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vx v9, v8, t3
-; RV32-NEXT:    vmul.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vx v9, v8, t4
-; RV32-NEXT:    vmul.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vx v9, v8, t5
-; RV32-NEXT:    vmul.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vx v9, v8, t6
-; RV32-NEXT:    vmul.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vx v9, v8, s0
-; RV32-NEXT:    vmul.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vx v9, v8, s1
-; RV32-NEXT:    vmul.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a1, a0, 3
-; RV32-NEXT:    add a0, a1, a0
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vx v9, v8, s2
-; RV32-NEXT:    vmul.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vx v9, v8, s3
-; RV32-NEXT:    vmul.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a1, a0, 3
-; RV32-NEXT:    sub a0, a1, a0
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vx v9, v8, s4
-; RV32-NEXT:    vmul.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vx v9, v8, s5
-; RV32-NEXT:    vmul.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a1, a0, 2
-; RV32-NEXT:    add a0, a1, a0
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vx v9, v8, s6
-; RV32-NEXT:    vmul.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vx v9, v8, s7
-; RV32-NEXT:    vmul.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a1, a0, 1
-; RV32-NEXT:    add a0, a1, a0
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vx v9, v8, s8
-; RV32-NEXT:    vmul.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vx v9, v8, s9
-; RV32-NEXT:    vmul.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vx v1, v8, s10
-; RV32-NEXT:    vmul.vv v1, v8, v1
-; RV32-NEXT:    vmul.vv v9, v8, v13
-; RV32-NEXT:    addi a0, sp, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vmul.vv v10, v8, v14
-; RV32-NEXT:    vmul.vv v11, v8, v15
-; RV32-NEXT:    vmul.vv v12, v8, v16
-; RV32-NEXT:    vmul.vv v13, v8, v17
-; RV32-NEXT:    vmul.vv v14, v8, v18
-; RV32-NEXT:    vmul.vv v15, v8, v19
-; RV32-NEXT:    vmul.vv v16, v8, v20
-; RV32-NEXT:    vmul.vv v17, v8, v21
-; RV32-NEXT:    vmul.vv v18, v8, v22
-; RV32-NEXT:    vmul.vv v19, v8, v23
-; RV32-NEXT:    vmul.vv v20, v8, v24
-; RV32-NEXT:    vmul.vv v21, v8, v25
-; RV32-NEXT:    vmul.vv v22, v8, v26
-; RV32-NEXT:    vmul.vv v23, v8, v3
-; RV32-NEXT:    vmul.vv v24, v8, v2
-; RV32-NEXT:    vmul.vv v25, v8, v29
-; RV32-NEXT:    vmul.vv v26, v8, v30
-; RV32-NEXT:    vmul.vv v27, v8, v31
-; RV32-NEXT:    vmul.vv v28, v8, v0
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v29, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vmul.vv v29, v8, v29
-; RV32-NEXT:    vmul.vv v30, v8, v5
-; RV32-NEXT:    vmul.vv v31, v8, v4
-; RV32-NEXT:    vmul.vv v7, v8, v6
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v6, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vmul.vv v6, v8, v6
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v5, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vmul.vv v5, v8, v5
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v4, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vmul.vv v4, v8, v4
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v3, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vmul.vv v3, v8, v3
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vmul.vv v2, v8, v2
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a1, a0, 5
-; RV32-NEXT:    add a0, a1, a0
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vmul.vv v0, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vmul.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a1, a0, 5
-; RV32-NEXT:    sub a0, a1, a0
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vmul.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vmul.vv v8, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vi v8, v8, 0
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a1, a0, 4
-; RV32-NEXT:    add a0, a1, a0
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a1, a0, 4
-; RV32-NEXT:    sub a0, a1, a0
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a1, a0, 3
-; RV32-NEXT:    add a0, a1, a0
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a1, a0, 3
-; RV32-NEXT:    sub a0, a1, a0
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a1, a0, 2
-; RV32-NEXT:    add a0, a1, a0
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a1, a0, 1
-; RV32-NEXT:    add a0, a1, a0
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    vxor.vv v8, v8, v1
-; RV32-NEXT:    addi a0, sp, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    vxor.vv v8, v8, v11
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    vxor.vv v8, v8, v13
-; RV32-NEXT:    vxor.vv v8, v8, v14
-; RV32-NEXT:    vxor.vv v8, v8, v15
-; RV32-NEXT:    vxor.vv v8, v8, v16
-; RV32-NEXT:    vxor.vv v8, v8, v17
-; RV32-NEXT:    vxor.vv v8, v8, v18
-; RV32-NEXT:    vxor.vv v8, v8, v19
-; RV32-NEXT:    vxor.vv v8, v8, v20
-; RV32-NEXT:    vxor.vv v8, v8, v21
-; RV32-NEXT:    vxor.vv v8, v8, v22
-; RV32-NEXT:    vxor.vv v8, v8, v23
-; RV32-NEXT:    vxor.vv v8, v8, v24
-; RV32-NEXT:    vxor.vv v8, v8, v25
-; RV32-NEXT:    vxor.vv v8, v8, v26
-; RV32-NEXT:    vxor.vv v8, v8, v27
-; RV32-NEXT:    vxor.vv v8, v8, v28
-; RV32-NEXT:    vxor.vv v8, v8, v29
-; RV32-NEXT:    vxor.vv v8, v8, v30
-; RV32-NEXT:    vxor.vv v8, v8, v31
-; RV32-NEXT:    vxor.vv v8, v8, v7
-; RV32-NEXT:    vxor.vv v8, v8, v6
-; RV32-NEXT:    vxor.vv v8, v8, v5
-; RV32-NEXT:    vxor.vv v8, v8, v4
-; RV32-NEXT:    vxor.vv v8, v8, v3
-; RV32-NEXT:    vxor.vv v8, v8, v2
-; RV32-NEXT:    vxor.vv v8, v8, v0
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    vsrl.vx v9, v8, a6
-; RV32-NEXT:    vsll.vx v10, v8, a6
-; RV32-NEXT:    vsrl.vx v11, v8, ra
-; RV32-NEXT:    vand.vx v12, v8, a4
-; RV32-NEXT:    vand.vx v11, v11, a4
-; RV32-NEXT:    vsrl.vi v13, v8, 24
-; RV32-NEXT:    vand.vx v14, v8, a5
-; RV32-NEXT:    vand.vx v13, v13, a5
-; RV32-NEXT:    vsll.vx v12, v12, ra
-; RV32-NEXT:    vsrl.vi v15, v8, 8
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v16, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vand.vv v8, v8, v16
-; RV32-NEXT:    vand.vv v15, v15, v16
-; RV32-NEXT:    vor.vv v9, v11, v9
-; RV32-NEXT:    vor.vv v11, v15, v13
-; RV32-NEXT:    vsll.vi v8, v8, 8
-; RV32-NEXT:    vsll.vi v13, v14, 24
-; RV32-NEXT:    vor.vv v8, v13, v8
-; RV32-NEXT:    vor.vv v10, v10, v12
-; RV32-NEXT:    vor.vv v9, v11, v9
-; RV32-NEXT:    vor.vv v8, v10, v8
-; RV32-NEXT:    vor.vv v8, v8, v9
-; RV32-NEXT:    vsrl.vi v9, v8, 4
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v10, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vand.vv v8, v8, v10
-; RV32-NEXT:    vand.vv v9, v9, v10
-; RV32-NEXT:    vsll.vi v8, v8, 4
-; RV32-NEXT:    vor.vv v8, v9, v8
-; RV32-NEXT:    vsrl.vi v9, v8, 2
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v10, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vand.vv v8, v8, v10
-; RV32-NEXT:    vand.vv v9, v9, v10
-; RV32-NEXT:    vsll.vi v8, v8, 2
-; RV32-NEXT:    vor.vv v8, v9, v8
-; RV32-NEXT:    vsrl.vi v9, v8, 1
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v10, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vand.vv v8, v8, v10
-; RV32-NEXT:    vand.vv v9, v9, v10
-; RV32-NEXT:    vadd.vv v8, v8, v8
-; RV32-NEXT:    vor.vv v8, v9, v8
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add sp, sp, a0
-; RV32-NEXT:    lw ra, 348(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s0, 344(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s1, 340(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s2, 336(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s3, 332(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s4, 328(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s5, 324(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s6, 320(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s7, 316(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s8, 312(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s9, 308(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s10, 304(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s11, 300(sp) # 4-byte Folded Reload
-; RV32-NEXT:    addi sp, sp, 352
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: clmulr_nxv1i64:
-; RV64:       # %bb.0:
-; RV64-NEXT:    addi sp, sp, -224
-; RV64-NEXT:    sd ra, 216(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s0, 208(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s1, 200(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s2, 192(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s3, 184(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s4, 176(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s5, 168(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s6, 160(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s7, 152(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s8, 144(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s9, 136(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s10, 128(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s11, 120(sp) # 8-byte Folded Spill
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 4
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    sub sp, sp, a0
-; RV64-NEXT:    vsetvli a1, zero, e64, m1, ta, ma
-; RV64-NEXT:    vsrl.vi v10, v8, 24
-; RV64-NEXT:    vsrl.vi v9, v8, 8
-; RV64-NEXT:    li t2, 255
-; RV64-NEXT:    lui t6, 61681
-; RV64-NEXT:    lui s0, 209715
-; RV64-NEXT:    lui s1, 349525
-; RV64-NEXT:    li s10, 16
-; RV64-NEXT:    li s9, 32
-; RV64-NEXT:    li s8, 64
-; RV64-NEXT:    li s5, 128
-; RV64-NEXT:    li s6, 256
-; RV64-NEXT:    li t5, 512
-; RV64-NEXT:    li t3, 1024
-; RV64-NEXT:    li t0, 1
-; RV64-NEXT:    lui s7, 1
-; RV64-NEXT:    lui a1, 2
-; RV64-NEXT:    lui t4, 4
-; RV64-NEXT:    lui t1, 8
-; RV64-NEXT:    lui a7, 32
-; RV64-NEXT:    lui a6, 64
-; RV64-NEXT:    lui a5, 128
-; RV64-NEXT:    lui a4, 256
-; RV64-NEXT:    lui a3, 512
-; RV64-NEXT:    lui a2, 1024
-; RV64-NEXT:    li s11, 56
-; RV64-NEXT:    vsrl.vx v11, v8, s11
-; RV64-NEXT:    li ra, 40
-; RV64-NEXT:    vsrl.vx v12, v8, ra
-; RV64-NEXT:    addi t6, t6, -241
-; RV64-NEXT:    addi s2, s0, 819
-; RV64-NEXT:    addi s3, s1, 1365
-; RV64-NEXT:    slli s1, t6, 32
-; RV64-NEXT:    add s4, t6, s1
-; RV64-NEXT:    slli t6, s2, 32
-; RV64-NEXT:    add s2, s2, t6
-; RV64-NEXT:    slli t6, s3, 32
-; RV64-NEXT:    add s3, s3, t6
-; RV64-NEXT:    lui s0, 16
-; RV64-NEXT:    addi s1, s0, -256
-; RV64-NEXT:    lui a0, 4080
-; RV64-NEXT:    vand.vx v10, v10, a0
-; RV64-NEXT:    slli t6, t2, 24
-; RV64-NEXT:    vand.vx v13, v8, a0
-; RV64-NEXT:    vsll.vx v14, v8, s11
-; RV64-NEXT:    vand.vx v12, v12, s1
-; RV64-NEXT:    vand.vx v9, v9, t6
-; RV64-NEXT:    vsll.vi v13, v13, 24
-; RV64-NEXT:    vand.vx v15, v8, t6
-; RV64-NEXT:    vand.vx v8, v8, s1
-; RV64-NEXT:    vor.vv v11, v12, v11
-; RV64-NEXT:    vor.vv v9, v9, v10
-; RV64-NEXT:    vsll.vi v10, v15, 8
-; RV64-NEXT:    vsll.vx v8, v8, ra
-; RV64-NEXT:    vor.vv v9, v9, v11
-; RV64-NEXT:    vor.vv v10, v13, v10
-; RV64-NEXT:    vor.vv v8, v14, v8
-; RV64-NEXT:    vor.vv v8, v8, v10
-; RV64-NEXT:    vor.vv v8, v8, v9
-; RV64-NEXT:    vsrl.vi v9, v8, 4
-; RV64-NEXT:    vand.vx v8, v8, s4
-; RV64-NEXT:    vand.vx v9, v9, s4
-; RV64-NEXT:    vsll.vi v8, v8, 4
-; RV64-NEXT:    vor.vv v8, v9, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, s2
-; RV64-NEXT:    vand.vx v9, v9, s2
-; RV64-NEXT:    vsll.vi v8, v8, 2
-; RV64-NEXT:    vor.vv v8, v9, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 1
-; RV64-NEXT:    vand.vx v8, v8, s3
-; RV64-NEXT:    vand.vx v9, v9, s3
-; RV64-NEXT:    vadd.vv v8, v8, v8
-; RV64-NEXT:    vor.vv v8, v9, v8
-; RV64-NEXT:    vand.vx v7, v8, s10
-; RV64-NEXT:    lui t2, 4096
-; RV64-NEXT:    vand.vx v6, v8, s9
-; RV64-NEXT:    lui s9, 8192
-; RV64-NEXT:    vand.vx v5, v8, s8
-; RV64-NEXT:    lui s8, 16384
-; RV64-NEXT:    vand.vx v4, v8, s5
-; RV64-NEXT:    lui s10, 32768
-; RV64-NEXT:    vand.vx v13, v8, s6
-; RV64-NEXT:    lui s11, 65536
-; RV64-NEXT:    vand.vx v14, v8, t5
-; RV64-NEXT:    lui t5, 131072
-; RV64-NEXT:    vand.vx v15, v8, t3
-; RV64-NEXT:    slli t3, t0, 11
-; RV64-NEXT:    vand.vx v16, v8, t3
-; RV64-NEXT:    lui t3, 262144
-; RV64-NEXT:    vand.vx v17, v8, s7
-; RV64-NEXT:    slli a0, t0, 31
-; RV64-NEXT:    sd a0, 96(sp) # 8-byte Folded Spill
-; RV64-NEXT:    vand.vx v18, v8, a1
-; RV64-NEXT:    slli a0, t0, 32
-; RV64-NEXT:    sd a0, 88(sp) # 8-byte Folded Spill
-; RV64-NEXT:    vand.vx v19, v8, t4
-; RV64-NEXT:    slli a0, t0, 33
-; RV64-NEXT:    sd a0, 80(sp) # 8-byte Folded Spill
-; RV64-NEXT:    vand.vx v20, v8, t1
-; RV64-NEXT:    slli a0, t0, 34
-; RV64-NEXT:    sd a0, 72(sp) # 8-byte Folded Spill
-; RV64-NEXT:    vand.vx v21, v8, s0
-; RV64-NEXT:    slli a0, t0, 35
-; RV64-NEXT:    sd a0, 64(sp) # 8-byte Folded Spill
-; RV64-NEXT:    vand.vx v22, v8, a7
-; RV64-NEXT:    slli a0, t0, 36
-; RV64-NEXT:    sd a0, 56(sp) # 8-byte Folded Spill
-; RV64-NEXT:    vand.vx v23, v8, a6
-; RV64-NEXT:    slli a0, t0, 37
-; RV64-NEXT:    sd a0, 48(sp) # 8-byte Folded Spill
-; RV64-NEXT:    vand.vx v24, v8, a5
-; RV64-NEXT:    slli a0, t0, 38
-; RV64-NEXT:    sd a0, 40(sp) # 8-byte Folded Spill
-; RV64-NEXT:    vand.vx v25, v8, a4
-; RV64-NEXT:    slli a0, t0, 39
-; RV64-NEXT:    sd a0, 32(sp) # 8-byte Folded Spill
-; RV64-NEXT:    vand.vx v26, v8, a3
-; RV64-NEXT:    slli a0, t0, 40
-; RV64-NEXT:    sd a0, 24(sp) # 8-byte Folded Spill
-; RV64-NEXT:    vand.vx v27, v8, a2
-; RV64-NEXT:    slli a0, t0, 41
-; RV64-NEXT:    sd a0, 16(sp) # 8-byte Folded Spill
-; RV64-NEXT:    lui a0, 2048
-; RV64-NEXT:    vand.vx v28, v8, a0
-; RV64-NEXT:    slli s5, t0, 42
-; RV64-NEXT:    vand.vx v29, v8, t2
-; RV64-NEXT:    slli s6, t0, 43
-; RV64-NEXT:    vand.vx v30, v8, s9
-; RV64-NEXT:    slli s7, t0, 44
-; RV64-NEXT:    vand.vx v10, v8, s8
-; RV64-NEXT:    slli s8, t0, 45
-; RV64-NEXT:    vand.vx v11, v8, s10
-; RV64-NEXT:    slli s9, t0, 46
-; RV64-NEXT:    vand.vx v12, v8, s11
-; RV64-NEXT:    slli s10, t0, 47
-; RV64-NEXT:    vand.vx v9, v8, t5
-; RV64-NEXT:    slli s11, t0, 48
-; RV64-NEXT:    vand.vx v31, v8, t3
-; RV64-NEXT:    slli ra, t0, 49
-; RV64-NEXT:    slli t5, t0, 50
-; RV64-NEXT:    slli t4, t0, 51
-; RV64-NEXT:    slli t3, t0, 52
-; RV64-NEXT:    slli t2, t0, 53
-; RV64-NEXT:    slli t1, t0, 54
-; RV64-NEXT:    slli a7, t0, 55
-; RV64-NEXT:    slli a6, t0, 56
-; RV64-NEXT:    slli a5, t0, 57
-; RV64-NEXT:    slli a4, t0, 58
-; RV64-NEXT:    slli a3, t0, 59
-; RV64-NEXT:    slli a2, t0, 60
-; RV64-NEXT:    slli a1, t0, 61
-; RV64-NEXT:    slli t0, t0, 62
-; RV64-NEXT:    li a0, -1
-; RV64-NEXT:    slli a0, a0, 63
-; RV64-NEXT:    vand.vi v3, v8, 2
-; RV64-NEXT:    vand.vi v2, v8, 1
-; RV64-NEXT:    vand.vi v1, v8, 4
-; RV64-NEXT:    vand.vi v0, v8, 8
-; RV64-NEXT:    vmul.vv v3, v8, v3
-; RV64-NEXT:    sd t6, 8(sp) # 8-byte Folded Spill
-; RV64-NEXT:    csrr t6, vlenb
-; RV64-NEXT:    mv s0, t6
-; RV64-NEXT:    slli t6, t6, 1
-; RV64-NEXT:    add s0, s0, t6
-; RV64-NEXT:    slli t6, t6, 2
-; RV64-NEXT:    add s0, s0, t6
-; RV64-NEXT:    slli t6, t6, 1
-; RV64-NEXT:    add t6, t6, s0
-; RV64-NEXT:    add t6, sp, t6
-; RV64-NEXT:    addi t6, t6, 112
-; RV64-NEXT:    vs1r.v v3, (t6) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    vmul.vv v3, v8, v2
-; RV64-NEXT:    csrr t6, vlenb
-; RV64-NEXT:    mv s0, t6
-; RV64-NEXT:    slli t6, t6, 3
-; RV64-NEXT:    add s0, s0, t6
-; RV64-NEXT:    slli t6, t6, 1
-; RV64-NEXT:    add t6, t6, s0
-; RV64-NEXT:    add t6, sp, t6
-; RV64-NEXT:    addi t6, t6, 112
-; RV64-NEXT:    vs1r.v v3, (t6) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    vmul.vv v3, v8, v1
-; RV64-NEXT:    csrr t6, vlenb
-; RV64-NEXT:    slli t6, t6, 3
-; RV64-NEXT:    mv s0, t6
-; RV64-NEXT:    slli t6, t6, 1
-; RV64-NEXT:    add t6, t6, s0
-; RV64-NEXT:    add t6, sp, t6
-; RV64-NEXT:    addi t6, t6, 112
-; RV64-NEXT:    vs1r.v v3, (t6) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    vmul.vv v0, v8, v0
-; RV64-NEXT:    vmul.vv v7, v8, v7
-; RV64-NEXT:    csrr t6, vlenb
-; RV64-NEXT:    mv s0, t6
-; RV64-NEXT:    slli t6, t6, 1
-; RV64-NEXT:    add s0, s0, t6
-; RV64-NEXT:    slli t6, t6, 1
-; RV64-NEXT:    add s0, s0, t6
-; RV64-NEXT:    slli t6, t6, 2
-; RV64-NEXT:    add t6, t6, s0
-; RV64-NEXT:    add t6, sp, t6
-; RV64-NEXT:    addi t6, t6, 112
-; RV64-NEXT:    vs1r.v v7, (t6) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    vmul.vv v7, v8, v6
-; RV64-NEXT:    csrr t6, vlenb
-; RV64-NEXT:    slli t6, t6, 1
-; RV64-NEXT:    mv s0, t6
-; RV64-NEXT:    slli t6, t6, 1
-; RV64-NEXT:    add s0, s0, t6
-; RV64-NEXT:    slli t6, t6, 2
-; RV64-NEXT:    add t6, t6, s0
-; RV64-NEXT:    add t6, sp, t6
-; RV64-NEXT:    addi t6, t6, 112
-; RV64-NEXT:    vs1r.v v7, (t6) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    vmul.vv v7, v8, v5
-; RV64-NEXT:    csrr t6, vlenb
-; RV64-NEXT:    mv s0, t6
-; RV64-NEXT:    slli t6, t6, 2
-; RV64-NEXT:    add s0, s0, t6
-; RV64-NEXT:    slli t6, t6, 2
-; RV64-NEXT:    add t6, t6, s0
-; RV64-NEXT:    add t6, sp, t6
-; RV64-NEXT:    addi t6, t6, 112
-; RV64-NEXT:    vs1r.v v7, (t6) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    vmul.vv v7, v8, v4
-; RV64-NEXT:    csrr t6, vlenb
-; RV64-NEXT:    slli t6, t6, 2
-; RV64-NEXT:    mv s0, t6
-; RV64-NEXT:    slli t6, t6, 2
-; RV64-NEXT:    add t6, t6, s0
-; RV64-NEXT:    add t6, sp, t6
-; RV64-NEXT:    addi t6, t6, 112
-; RV64-NEXT:    vs1r.v v7, (t6) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    vmul.vv v13, v8, v13
-; RV64-NEXT:    csrr t6, vlenb
-; RV64-NEXT:    mv s0, t6
-; RV64-NEXT:    slli t6, t6, 1
-; RV64-NEXT:    add s0, s0, t6
-; RV64-NEXT:    slli t6, t6, 3
-; RV64-NEXT:    add t6, t6, s0
-; RV64-NEXT:    add t6, sp, t6
-; RV64-NEXT:    addi t6, t6, 112
-; RV64-NEXT:    vs1r.v v13, (t6) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    vmul.vv v13, v8, v14
-; RV64-NEXT:    csrr t6, vlenb
-; RV64-NEXT:    slli t6, t6, 1
-; RV64-NEXT:    mv s0, t6
-; RV64-NEXT:    slli t6, t6, 3
-; RV64-NEXT:    add t6, t6, s0
-; RV64-NEXT:    add t6, sp, t6
-; RV64-NEXT:    addi t6, t6, 112
-; RV64-NEXT:    vs1r.v v13, (t6) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    vmul.vv v13, v8, v15
-; RV64-NEXT:    csrr t6, vlenb
-; RV64-NEXT:    slli s0, t6, 4
-; RV64-NEXT:    add t6, s0, t6
-; RV64-NEXT:    add t6, sp, t6
-; RV64-NEXT:    addi t6, t6, 112
-; RV64-NEXT:    vs1r.v v13, (t6) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    vmul.vv v13, v8, v16
-; RV64-NEXT:    csrr t6, vlenb
-; RV64-NEXT:    slli t6, t6, 4
-; RV64-NEXT:    add t6, sp, t6
-; RV64-NEXT:    addi t6, t6, 112
-; RV64-NEXT:    vs1r.v v13, (t6) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    vmul.vv v13, v8, v17
-; RV64-NEXT:    csrr t6, vlenb
-; RV64-NEXT:    slli s0, t6, 4
-; RV64-NEXT:    sub t6, s0, t6
-; RV64-NEXT:    add t6, sp, t6
-; RV64-NEXT:    addi t6, t6, 112
-; RV64-NEXT:    vs1r.v v13, (t6) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    vmul.vv v13, v8, v18
-; RV64-NEXT:    csrr t6, vlenb
-; RV64-NEXT:    slli t6, t6, 1
-; RV64-NEXT:    mv s0, t6
-; RV64-NEXT:    slli t6, t6, 1
-; RV64-NEXT:    add s0, s0, t6
-; RV64-NEXT:    slli t6, t6, 1
-; RV64-NEXT:    add t6, t6, s0
-; RV64-NEXT:    add t6, sp, t6
-; RV64-NEXT:    addi t6, t6, 112
-; RV64-NEXT:    vs1r.v v13, (t6) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    vmul.vv v13, v8, v19
-; RV64-NEXT:    csrr t6, vlenb
-; RV64-NEXT:    mv s0, t6
-; RV64-NEXT:    slli t6, t6, 2
-; RV64-NEXT:    add s0, s0, t6
-; RV64-NEXT:    slli t6, t6, 1
-; RV64-NEXT:    add t6, t6, s0
-; RV64-NEXT:    add t6, sp, t6
-; RV64-NEXT:    addi t6, t6, 112
-; RV64-NEXT:    vs1r.v v13, (t6) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    vmul.vv v13, v8, v20
-; RV64-NEXT:    csrr t6, vlenb
-; RV64-NEXT:    slli t6, t6, 2
-; RV64-NEXT:    mv s0, t6
-; RV64-NEXT:    slli t6, t6, 1
-; RV64-NEXT:    add t6, t6, s0
-; RV64-NEXT:    add t6, sp, t6
-; RV64-NEXT:    addi t6, t6, 112
-; RV64-NEXT:    vs1r.v v13, (t6) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    vmul.vv v13, v8, v21
-; RV64-NEXT:    csrr t6, vlenb
-; RV64-NEXT:    mv s0, t6
-; RV64-NEXT:    slli t6, t6, 1
-; RV64-NEXT:    add s0, s0, t6
-; RV64-NEXT:    slli t6, t6, 2
-; RV64-NEXT:    add t6, t6, s0
-; RV64-NEXT:    add t6, sp, t6
-; RV64-NEXT:    addi t6, t6, 112
-; RV64-NEXT:    vs1r.v v13, (t6) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    vmul.vv v13, v8, v22
-; RV64-NEXT:    csrr t6, vlenb
-; RV64-NEXT:    slli t6, t6, 1
-; RV64-NEXT:    mv s0, t6
-; RV64-NEXT:    slli t6, t6, 2
-; RV64-NEXT:    add t6, t6, s0
-; RV64-NEXT:    add t6, sp, t6
-; RV64-NEXT:    addi t6, t6, 112
-; RV64-NEXT:    vs1r.v v13, (t6) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    vmul.vv v13, v8, v23
-; RV64-NEXT:    csrr t6, vlenb
-; RV64-NEXT:    slli s0, t6, 3
-; RV64-NEXT:    add t6, s0, t6
-; RV64-NEXT:    add t6, sp, t6
-; RV64-NEXT:    addi t6, t6, 112
-; RV64-NEXT:    vs1r.v v13, (t6) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    vmul.vv v13, v8, v24
-; RV64-NEXT:    csrr t6, vlenb
-; RV64-NEXT:    slli t6, t6, 3
-; RV64-NEXT:    add t6, sp, t6
-; RV64-NEXT:    addi t6, t6, 112
-; RV64-NEXT:    vs1r.v v13, (t6) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    vmul.vv v13, v8, v25
-; RV64-NEXT:    csrr t6, vlenb
-; RV64-NEXT:    slli s0, t6, 3
-; RV64-NEXT:    sub t6, s0, t6
-; RV64-NEXT:    add t6, sp, t6
-; RV64-NEXT:    addi t6, t6, 112
-; RV64-NEXT:    vs1r.v v13, (t6) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    vmul.vv v13, v8, v26
-; RV64-NEXT:    csrr t6, vlenb
-; RV64-NEXT:    slli t6, t6, 1
-; RV64-NEXT:    mv s0, t6
-; RV64-NEXT:    slli t6, t6, 1
-; RV64-NEXT:    add t6, t6, s0
-; RV64-NEXT:    add t6, sp, t6
-; RV64-NEXT:    addi t6, t6, 112
-; RV64-NEXT:    vs1r.v v13, (t6) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    vmul.vv v13, v8, v27
-; RV64-NEXT:    csrr t6, vlenb
-; RV64-NEXT:    slli s0, t6, 2
-; RV64-NEXT:    add t6, s0, t6
-; RV64-NEXT:    add t6, sp, t6
-; RV64-NEXT:    addi t6, t6, 112
-; RV64-NEXT:    vs1r.v v13, (t6) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    vmul.vv v13, v8, v28
-; RV64-NEXT:    csrr t6, vlenb
-; RV64-NEXT:    slli t6, t6, 2
-; RV64-NEXT:    add t6, sp, t6
-; RV64-NEXT:    addi t6, t6, 112
-; RV64-NEXT:    vs1r.v v13, (t6) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    vmul.vv v13, v8, v29
-; RV64-NEXT:    csrr t6, vlenb
-; RV64-NEXT:    slli s0, t6, 1
-; RV64-NEXT:    add t6, s0, t6
-; RV64-NEXT:    add t6, sp, t6
-; RV64-NEXT:    addi t6, t6, 112
-; RV64-NEXT:    vs1r.v v13, (t6) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    vmul.vv v13, v8, v30
-; RV64-NEXT:    csrr t6, vlenb
-; RV64-NEXT:    slli t6, t6, 1
-; RV64-NEXT:    add t6, sp, t6
-; RV64-NEXT:    addi t6, t6, 112
-; RV64-NEXT:    vs1r.v v13, (t6) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    vmul.vv v10, v8, v10
-; RV64-NEXT:    csrr t6, vlenb
-; RV64-NEXT:    slli t6, t6, 1
-; RV64-NEXT:    mv s0, t6
-; RV64-NEXT:    slli t6, t6, 4
-; RV64-NEXT:    add t6, t6, s0
-; RV64-NEXT:    add t6, sp, t6
-; RV64-NEXT:    addi t6, t6, 112
-; RV64-NEXT:    vs1r.v v10, (t6) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    vmul.vv v10, v8, v11
-; RV64-NEXT:    csrr t6, vlenb
-; RV64-NEXT:    slli s0, t6, 5
-; RV64-NEXT:    add t6, s0, t6
-; RV64-NEXT:    add t6, sp, t6
-; RV64-NEXT:    addi t6, t6, 112
-; RV64-NEXT:    vs1r.v v10, (t6) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    vmul.vv v10, v8, v12
-; RV64-NEXT:    csrr t6, vlenb
-; RV64-NEXT:    slli t6, t6, 5
-; RV64-NEXT:    add t6, sp, t6
-; RV64-NEXT:    addi t6, t6, 112
-; RV64-NEXT:    vs1r.v v10, (t6) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    vmul.vv v9, v8, v9
-; RV64-NEXT:    csrr t6, vlenb
-; RV64-NEXT:    slli s0, t6, 5
-; RV64-NEXT:    sub t6, s0, t6
-; RV64-NEXT:    add t6, sp, t6
-; RV64-NEXT:    addi t6, t6, 112
-; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    vmul.vv v9, v8, v31
-; RV64-NEXT:    csrr t6, vlenb
-; RV64-NEXT:    slli t6, t6, 1
-; RV64-NEXT:    mv s0, t6
-; RV64-NEXT:    slli t6, t6, 1
-; RV64-NEXT:    add s0, s0, t6
-; RV64-NEXT:    slli t6, t6, 1
-; RV64-NEXT:    add s0, s0, t6
-; RV64-NEXT:    slli t6, t6, 1
-; RV64-NEXT:    add t6, t6, s0
-; RV64-NEXT:    add t6, sp, t6
-; RV64-NEXT:    addi t6, t6, 112
-; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    ld s0, 96(sp) # 8-byte Folded Reload
-; RV64-NEXT:    vand.vx v9, v8, s0
-; RV64-NEXT:    vmul.vv v9, v8, v9
-; RV64-NEXT:    csrr t6, vlenb
-; RV64-NEXT:    mv s0, t6
-; RV64-NEXT:    slli t6, t6, 2
-; RV64-NEXT:    add s0, s0, t6
-; RV64-NEXT:    slli t6, t6, 1
-; RV64-NEXT:    add s0, s0, t6
-; RV64-NEXT:    slli t6, t6, 1
-; RV64-NEXT:    add t6, t6, s0
-; RV64-NEXT:    add t6, sp, t6
-; RV64-NEXT:    addi t6, t6, 112
-; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    ld s0, 88(sp) # 8-byte Folded Reload
-; RV64-NEXT:    vand.vx v9, v8, s0
-; RV64-NEXT:    vmul.vv v9, v8, v9
-; RV64-NEXT:    csrr t6, vlenb
-; RV64-NEXT:    slli t6, t6, 2
-; RV64-NEXT:    mv s0, t6
-; RV64-NEXT:    slli t6, t6, 1
-; RV64-NEXT:    add s0, s0, t6
-; RV64-NEXT:    slli t6, t6, 1
-; RV64-NEXT:    add t6, t6, s0
-; RV64-NEXT:    add t6, sp, t6
-; RV64-NEXT:    addi t6, t6, 112
-; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    ld s0, 80(sp) # 8-byte Folded Reload
-; RV64-NEXT:    vand.vx v9, v8, s0
-; RV64-NEXT:    vmul.vv v9, v8, v9
-; RV64-NEXT:    csrr s0, vlenb
-; RV64-NEXT:    slli s0, s0, 1
-; RV64-NEXT:    mv t6, s0
-; RV64-NEXT:    slli s0, s0, 2
-; RV64-NEXT:    add t6, t6, s0
-; RV64-NEXT:    slli s0, s0, 1
-; RV64-NEXT:    add s0, s0, t6
-; RV64-NEXT:    ld t6, 8(sp) # 8-byte Folded Reload
-; RV64-NEXT:    add s0, sp, s0
-; RV64-NEXT:    addi s0, s0, 112
-; RV64-NEXT:    vs1r.v v9, (s0) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    ld s0, 72(sp) # 8-byte Folded Reload
-; RV64-NEXT:    vand.vx v9, v8, s0
-; RV64-NEXT:    vmul.vv v9, v8, v9
-; RV64-NEXT:    csrr s0, vlenb
-; RV64-NEXT:    add s0, sp, s0
-; RV64-NEXT:    addi s0, s0, 112
-; RV64-NEXT:    vs1r.v v9, (s0) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    ld s0, 64(sp) # 8-byte Folded Reload
-; RV64-NEXT:    vand.vx v9, v8, s0
-; RV64-NEXT:    vmul.vv v9, v8, v9
-; RV64-NEXT:    addi s0, sp, 112
-; RV64-NEXT:    vs1r.v v9, (s0) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    ld s0, 56(sp) # 8-byte Folded Reload
-; RV64-NEXT:    vand.vx v9, v8, s0
-; RV64-NEXT:    vmul.vv v4, v8, v9
-; RV64-NEXT:    ld s0, 48(sp) # 8-byte Folded Reload
-; RV64-NEXT:    vand.vx v9, v8, s0
-; RV64-NEXT:    vmul.vv v5, v8, v9
-; RV64-NEXT:    ld s0, 40(sp) # 8-byte Folded Reload
-; RV64-NEXT:    vand.vx v9, v8, s0
-; RV64-NEXT:    vmul.vv v6, v8, v9
-; RV64-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
-; RV64-NEXT:    vand.vx v9, v8, s0
-; RV64-NEXT:    vmul.vv v7, v8, v9
-; RV64-NEXT:    ld s0, 24(sp) # 8-byte Folded Reload
-; RV64-NEXT:    vand.vx v9, v8, s0
-; RV64-NEXT:    vmul.vv v31, v8, v9
-; RV64-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
-; RV64-NEXT:    vand.vx v9, v8, s0
-; RV64-NEXT:    vmul.vv v30, v8, v9
-; RV64-NEXT:    vand.vx v9, v8, s5
-; RV64-NEXT:    vmul.vv v29, v8, v9
-; RV64-NEXT:    vand.vx v9, v8, s6
-; RV64-NEXT:    vmul.vv v28, v8, v9
-; RV64-NEXT:    vand.vx v9, v8, s7
-; RV64-NEXT:    vmul.vv v27, v8, v9
-; RV64-NEXT:    vand.vx v9, v8, s8
-; RV64-NEXT:    vmul.vv v26, v8, v9
-; RV64-NEXT:    vand.vx v9, v8, s9
-; RV64-NEXT:    vmul.vv v25, v8, v9
-; RV64-NEXT:    vand.vx v9, v8, s10
-; RV64-NEXT:    vmul.vv v23, v8, v9
-; RV64-NEXT:    vand.vx v9, v8, s11
-; RV64-NEXT:    vmul.vv v19, v8, v9
-; RV64-NEXT:    vand.vx v9, v8, ra
-; RV64-NEXT:    vmul.vv v14, v8, v9
-; RV64-NEXT:    vand.vx v9, v8, t5
-; RV64-NEXT:    vmul.vv v9, v8, v9
-; RV64-NEXT:    vand.vx v10, v8, t4
-; RV64-NEXT:    vmul.vv v24, v8, v10
-; RV64-NEXT:    vand.vx v10, v8, t3
-; RV64-NEXT:    vmul.vv v22, v8, v10
-; RV64-NEXT:    vand.vx v10, v8, t2
-; RV64-NEXT:    vmul.vv v20, v8, v10
-; RV64-NEXT:    vand.vx v10, v8, t1
-; RV64-NEXT:    vmul.vv v15, v8, v10
-; RV64-NEXT:    vand.vx v10, v8, a7
-; RV64-NEXT:    vmul.vv v10, v8, v10
-; RV64-NEXT:    vand.vx v11, v8, a6
-; RV64-NEXT:    vmul.vv v16, v8, v11
-; RV64-NEXT:    vand.vx v11, v8, a5
-; RV64-NEXT:    vmul.vv v11, v8, v11
-; RV64-NEXT:    vand.vx v12, v8, a4
-; RV64-NEXT:    vmul.vv v21, v8, v12
-; RV64-NEXT:    vand.vx v12, v8, a3
-; RV64-NEXT:    vmul.vv v17, v8, v12
-; RV64-NEXT:    vand.vx v12, v8, a2
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    vand.vx v13, v8, a1
-; RV64-NEXT:    vmul.vv v18, v8, v13
-; RV64-NEXT:    vand.vx v13, v8, t0
-; RV64-NEXT:    vmul.vv v13, v8, v13
-; RV64-NEXT:    vand.vx v2, v8, a0
-; RV64-NEXT:    vmul.vv v8, v8, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl1r.v v1, (a0) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v2, v1, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl1r.v v1, (a0) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v2, v2, v1
-; RV64-NEXT:    vxor.vv v2, v2, v0
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl1r.v v1, (a0) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v2, v2, v1
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl1r.v v1, (a0) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v2, v2, v1
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl1r.v v1, (a0) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v2, v2, v1
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl1r.v v1, (a0) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v2, v2, v1
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl1r.v v1, (a0) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v1, v2, v1
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl1r.v v0, (a0) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v1, v1, v0
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a1, a0, 4
-; RV64-NEXT:    add a0, a1, a0
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl1r.v v0, (a0) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v1, v1, v0
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 4
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl1r.v v0, (a0) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v1, v1, v0
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a1, a0, 4
-; RV64-NEXT:    sub a0, a1, a0
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl1r.v v0, (a0) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v1, v1, v0
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl1r.v v0, (a0) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v1, v1, v0
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl1r.v v0, (a0) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v1, v1, v0
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl1r.v v0, (a0) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v1, v1, v0
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl1r.v v0, (a0) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v1, v1, v0
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl1r.v v0, (a0) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v1, v1, v0
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a1, a0, 3
-; RV64-NEXT:    add a0, a1, a0
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl1r.v v0, (a0) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v1, v1, v0
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl1r.v v0, (a0) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v1, v1, v0
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a1, a0, 3
-; RV64-NEXT:    sub a0, a1, a0
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl1r.v v0, (a0) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v1, v1, v0
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl1r.v v0, (a0) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v1, v1, v0
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a1, a0, 2
-; RV64-NEXT:    add a0, a1, a0
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl1r.v v0, (a0) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v1, v1, v0
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl1r.v v0, (a0) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v1, v1, v0
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a1, a0, 1
-; RV64-NEXT:    add a0, a1, a0
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl1r.v v0, (a0) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v1, v0
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl1r.v v3, (a0) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v3
-; RV64-NEXT:    li a0, 56
-; RV64-NEXT:    vsll.vx v2, v2, a0
-; RV64-NEXT:    vand.vx v1, v1, s1
-; RV64-NEXT:    li a1, 40
-; RV64-NEXT:    vsll.vx v1, v1, a1
-; RV64-NEXT:    vor.vv v2, v2, v1
-; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    mv a3, a2
-; RV64-NEXT:    slli a2, a2, 4
-; RV64-NEXT:    add a2, a2, a3
-; RV64-NEXT:    add a2, sp, a2
-; RV64-NEXT:    addi a2, a2, 112
-; RV64-NEXT:    vl1r.v v1, (a2) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v1, v0, v1
-; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    slli a3, a2, 5
-; RV64-NEXT:    add a2, a3, a2
-; RV64-NEXT:    add a2, sp, a2
-; RV64-NEXT:    addi a2, a2, 112
-; RV64-NEXT:    vl1r.v v0, (a2) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v1, v1, v0
-; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    slli a2, a2, 5
-; RV64-NEXT:    add a2, sp, a2
-; RV64-NEXT:    addi a2, a2, 112
-; RV64-NEXT:    vl1r.v v0, (a2) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v1, v1, v0
-; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    slli a3, a2, 5
-; RV64-NEXT:    sub a2, a3, a2
-; RV64-NEXT:    add a2, sp, a2
-; RV64-NEXT:    addi a2, a2, 112
-; RV64-NEXT:    vl1r.v v0, (a2) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v1, v1, v0
-; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    mv a3, a2
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    add a3, a3, a2
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    add a3, a3, a2
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    add a2, a2, a3
-; RV64-NEXT:    add a2, sp, a2
-; RV64-NEXT:    addi a2, a2, 112
-; RV64-NEXT:    vl1r.v v0, (a2) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v1, v1, v0
-; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    mv a3, a2
-; RV64-NEXT:    slli a2, a2, 2
-; RV64-NEXT:    add a3, a3, a2
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    add a3, a3, a2
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    add a2, a2, a3
-; RV64-NEXT:    add a2, sp, a2
-; RV64-NEXT:    addi a2, a2, 112
-; RV64-NEXT:    vl1r.v v0, (a2) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v1, v1, v0
-; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    slli a2, a2, 2
-; RV64-NEXT:    mv a3, a2
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    add a3, a3, a2
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    add a2, a2, a3
-; RV64-NEXT:    add a2, sp, a2
-; RV64-NEXT:    addi a2, a2, 112
-; RV64-NEXT:    vl1r.v v0, (a2) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v1, v1, v0
-; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    mv a3, a2
-; RV64-NEXT:    slli a2, a2, 2
-; RV64-NEXT:    add a3, a3, a2
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    add a2, a2, a3
-; RV64-NEXT:    add a2, sp, a2
-; RV64-NEXT:    addi a2, a2, 112
-; RV64-NEXT:    vl1r.v v0, (a2) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v1, v1, v0
-; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    add a2, sp, a2
-; RV64-NEXT:    addi a2, a2, 112
-; RV64-NEXT:    vl1r.v v3, (a2) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v1, v1, v3
-; RV64-NEXT:    addi a2, sp, 112
-; RV64-NEXT:    vl1r.v v3, (a2) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v3, v1, v3
-; RV64-NEXT:    vxor.vv v4, v3, v4
-; RV64-NEXT:    vxor.vv v5, v4, v5
-; RV64-NEXT:    vxor.vv v6, v5, v6
-; RV64-NEXT:    vxor.vv v7, v6, v7
-; RV64-NEXT:    vxor.vv v31, v7, v31
-; RV64-NEXT:    vxor.vv v30, v31, v30
-; RV64-NEXT:    vxor.vv v29, v30, v29
-; RV64-NEXT:    vxor.vv v28, v29, v28
-; RV64-NEXT:    vxor.vv v27, v28, v27
-; RV64-NEXT:    vxor.vv v26, v27, v26
-; RV64-NEXT:    vxor.vv v25, v26, v25
-; RV64-NEXT:    vxor.vv v23, v25, v23
-; RV64-NEXT:    vxor.vv v19, v23, v19
-; RV64-NEXT:    vxor.vv v14, v19, v14
-; RV64-NEXT:    vxor.vv v9, v14, v9
-; RV64-NEXT:    vsrl.vi v14, v7, 8
-; RV64-NEXT:    vand.vx v14, v14, t6
-; RV64-NEXT:    vsrl.vi v19, v23, 24
-; RV64-NEXT:    lui a2, 4080
-; RV64-NEXT:    vand.vx v19, v19, a2
-; RV64-NEXT:    vor.vv v14, v14, v19
-; RV64-NEXT:    vxor.vv v9, v9, v24
-; RV64-NEXT:    vxor.vv v9, v9, v22
-; RV64-NEXT:    vxor.vv v9, v9, v20
-; RV64-NEXT:    vxor.vv v9, v9, v15
-; RV64-NEXT:    vxor.vv v9, v9, v10
-; RV64-NEXT:    vand.vx v10, v7, a2
-; RV64-NEXT:    vsll.vi v10, v10, 24
-; RV64-NEXT:    vxor.vv v15, v9, v16
-; RV64-NEXT:    vxor.vv v11, v15, v11
-; RV64-NEXT:    vand.vx v15, v9, t6
-; RV64-NEXT:    vsll.vi v15, v15, 8
-; RV64-NEXT:    vor.vv v10, v10, v15
-; RV64-NEXT:    vxor.vv v11, v11, v21
-; RV64-NEXT:    vor.vv v10, v2, v10
-; RV64-NEXT:    vxor.vv v11, v11, v17
-; RV64-NEXT:    vxor.vv v11, v11, v12
-; RV64-NEXT:    vsrl.vx v9, v9, a1
-; RV64-NEXT:    vand.vx v9, v9, s1
-; RV64-NEXT:    vxor.vv v11, v11, v18
-; RV64-NEXT:    vxor.vv v11, v11, v13
-; RV64-NEXT:    vxor.vv v8, v11, v8
-; RV64-NEXT:    vsrl.vx v8, v8, a0
-; RV64-NEXT:    vor.vv v8, v9, v8
-; RV64-NEXT:    vor.vv v8, v14, v8
-; RV64-NEXT:    vor.vv v8, v10, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 4
-; RV64-NEXT:    vand.vx v8, v8, s4
-; RV64-NEXT:    vand.vx v9, v9, s4
-; RV64-NEXT:    vsll.vi v8, v8, 4
-; RV64-NEXT:    vor.vv v8, v9, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, s2
-; RV64-NEXT:    vand.vx v9, v9, s2
-; RV64-NEXT:    vsll.vi v8, v8, 2
-; RV64-NEXT:    vor.vv v8, v9, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 1
-; RV64-NEXT:    vand.vx v8, v8, s3
-; RV64-NEXT:    vand.vx v9, v9, s3
-; RV64-NEXT:    vadd.vv v8, v8, v8
-; RV64-NEXT:    vor.vv v8, v9, v8
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 4
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add sp, sp, a0
-; RV64-NEXT:    ld ra, 216(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s0, 208(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s1, 200(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s2, 192(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s3, 184(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s4, 176(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s5, 168(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s6, 160(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s7, 152(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s8, 144(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s9, 136(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s10, 128(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s11, 120(sp) # 8-byte Folded Reload
-; RV64-NEXT:    addi sp, sp, 224
-; RV64-NEXT:    ret
-  %a = call <vscale x 1 x i64> @llvm.clmulr.nxv1i64(<vscale x 1 x i64> %x, <vscale x 1 x i64> %y)
-  ret <vscale x 1 x i64> %a
-}
-
-define <vscale x 2 x i64> @clmulr_nxv2i64(<vscale x 2 x i64> %x, <vscale x 2 x i64> %y) nounwind {
-; RV32-LABEL: clmulr_nxv2i64:
-; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -352
-; RV32-NEXT:    sw ra, 348(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s0, 344(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s1, 340(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s2, 336(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s3, 332(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s4, 328(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s5, 324(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s6, 320(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s7, 316(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s8, 312(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s9, 308(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s10, 304(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s11, 300(sp) # 4-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    sub sp, sp, a0
-; RV32-NEXT:    lui s7, 1044480
-; RV32-NEXT:    lui a7, 524288
-; RV32-NEXT:    li a1, 1
-; RV32-NEXT:    li s8, 2
-; RV32-NEXT:    li s9, 4
-; RV32-NEXT:    li s10, 8
-; RV32-NEXT:    li a3, 16
-; RV32-NEXT:    li a4, 32
-; RV32-NEXT:    li a5, 64
-; RV32-NEXT:    li a6, 128
-; RV32-NEXT:    li s11, 256
-; RV32-NEXT:    li ra, 512
-; RV32-NEXT:    li a0, 1024
-; RV32-NEXT:    lui a2, 1
-; RV32-NEXT:    lui t0, 2
-; RV32-NEXT:    lui t1, 4
-; RV32-NEXT:    lui t2, 8
-; RV32-NEXT:    lui t3, 16
-; RV32-NEXT:    lui t4, 32
-; RV32-NEXT:    lui t5, 64
-; RV32-NEXT:    lui t6, 128
-; RV32-NEXT:    lui s0, 256
-; RV32-NEXT:    lui s1, 512
-; RV32-NEXT:    lui s2, 1024
-; RV32-NEXT:    lui s3, 2048
-; RV32-NEXT:    lui s4, 4096
-; RV32-NEXT:    lui s5, 8192
-; RV32-NEXT:    lui s6, 16384
-; RV32-NEXT:    sw s7, 272(sp)
-; RV32-NEXT:    lui s7, 32768
-; RV32-NEXT:    sw zero, 276(sp)
-; RV32-NEXT:    sw a7, 264(sp)
-; RV32-NEXT:    sw zero, 268(sp)
-; RV32-NEXT:    sw zero, 256(sp)
-; RV32-NEXT:    sw a1, 260(sp)
-; RV32-NEXT:    sw zero, 248(sp)
-; RV32-NEXT:    sw s8, 252(sp)
-; RV32-NEXT:    lui s8, 65536
-; RV32-NEXT:    sw zero, 240(sp)
-; RV32-NEXT:    sw s9, 244(sp)
-; RV32-NEXT:    lui s9, 131072
-; RV32-NEXT:    sw zero, 232(sp)
-; RV32-NEXT:    sw s10, 236(sp)
-; RV32-NEXT:    lui s10, 262144
-; RV32-NEXT:    sw zero, 224(sp)
-; RV32-NEXT:    sw a3, 228(sp)
-; RV32-NEXT:    sw zero, 216(sp)
-; RV32-NEXT:    sw a4, 220(sp)
-; RV32-NEXT:    sw zero, 208(sp)
-; RV32-NEXT:    sw a5, 212(sp)
-; RV32-NEXT:    sw zero, 200(sp)
-; RV32-NEXT:    sw a6, 204(sp)
-; RV32-NEXT:    sw zero, 192(sp)
-; RV32-NEXT:    sw s11, 196(sp)
-; RV32-NEXT:    sw zero, 184(sp)
-; RV32-NEXT:    sw ra, 188(sp)
-; RV32-NEXT:    sw zero, 176(sp)
-; RV32-NEXT:    sw a0, 180(sp)
-; RV32-NEXT:    slli a5, a1, 11
-; RV32-NEXT:    sw zero, 168(sp)
-; RV32-NEXT:    sw a5, 172(sp)
-; RV32-NEXT:    sw zero, 160(sp)
-; RV32-NEXT:    sw a2, 164(sp)
-; RV32-NEXT:    sw zero, 152(sp)
-; RV32-NEXT:    sw t0, 156(sp)
-; RV32-NEXT:    sw zero, 144(sp)
-; RV32-NEXT:    sw t1, 148(sp)
-; RV32-NEXT:    sw zero, 136(sp)
-; RV32-NEXT:    sw t2, 140(sp)
-; RV32-NEXT:    sw zero, 128(sp)
-; RV32-NEXT:    sw t3, 132(sp)
-; RV32-NEXT:    sw zero, 120(sp)
-; RV32-NEXT:    sw t4, 124(sp)
-; RV32-NEXT:    sw zero, 112(sp)
-; RV32-NEXT:    sw t5, 116(sp)
-; RV32-NEXT:    sw zero, 104(sp)
-; RV32-NEXT:    sw t6, 108(sp)
-; RV32-NEXT:    sw zero, 96(sp)
-; RV32-NEXT:    sw s0, 100(sp)
-; RV32-NEXT:    sw zero, 88(sp)
-; RV32-NEXT:    sw s1, 92(sp)
-; RV32-NEXT:    sw zero, 80(sp)
-; RV32-NEXT:    sw s2, 84(sp)
-; RV32-NEXT:    sw zero, 72(sp)
-; RV32-NEXT:    sw s3, 76(sp)
-; RV32-NEXT:    sw zero, 64(sp)
-; RV32-NEXT:    sw s4, 68(sp)
-; RV32-NEXT:    sw zero, 56(sp)
-; RV32-NEXT:    sw s5, 60(sp)
-; RV32-NEXT:    sw zero, 48(sp)
-; RV32-NEXT:    sw s6, 52(sp)
-; RV32-NEXT:    sw zero, 40(sp)
-; RV32-NEXT:    sw s7, 44(sp)
-; RV32-NEXT:    sw zero, 32(sp)
-; RV32-NEXT:    sw s8, 36(sp)
-; RV32-NEXT:    sw zero, 24(sp)
-; RV32-NEXT:    sw s9, 28(sp)
-; RV32-NEXT:    sw zero, 16(sp)
-; RV32-NEXT:    sw s10, 20(sp)
-; RV32-NEXT:    sw zero, 8(sp)
-; RV32-NEXT:    sw a7, 12(sp)
-; RV32-NEXT:    lui a0, 61681
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
-; RV32-NEXT:    vmv.v.x v4, a0
-; RV32-NEXT:    lui a0, 209715
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vmv.v.x v2, a0
-; RV32-NEXT:    lui a0, 349525
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vmv.v.x v0, a0
-; RV32-NEXT:    addi a0, sp, 272
-; RV32-NEXT:    vsetvli a2, zero, e64, m2, ta, ma
-; RV32-NEXT:    vlse64.v v6, (a0), zero
-; RV32-NEXT:    addi a0, sp, 264
-; RV32-NEXT:    vlse64.v v10, (a0), zero
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    addi a0, sp, 256
-; RV32-NEXT:    vlse64.v v12, (a0), zero
-; RV32-NEXT:    addi a0, sp, 248
-; RV32-NEXT:    vlse64.v v14, (a0), zero
-; RV32-NEXT:    addi a0, sp, 240
-; RV32-NEXT:    vlse64.v v16, (a0), zero
-; RV32-NEXT:    addi a0, sp, 232
-; RV32-NEXT:    vlse64.v v18, (a0), zero
-; RV32-NEXT:    addi a0, sp, 224
-; RV32-NEXT:    vlse64.v v20, (a0), zero
-; RV32-NEXT:    addi a0, sp, 216
-; RV32-NEXT:    vlse64.v v22, (a0), zero
-; RV32-NEXT:    li ra, 56
-; RV32-NEXT:    vsrl.vi v24, v8, 24
-; RV32-NEXT:    vsrl.vx v26, v8, ra
-; RV32-NEXT:    li s11, 40
-; RV32-NEXT:    vsrl.vx v28, v8, s11
-; RV32-NEXT:    vsll.vx v30, v8, ra
-; RV32-NEXT:    addi a4, t3, -256
-; RV32-NEXT:    vand.vx v28, v28, a4
-; RV32-NEXT:    vor.vv v26, v28, v26
-; RV32-NEXT:    vand.vx v28, v8, a4
-; RV32-NEXT:    vsll.vx v28, v28, s11
-; RV32-NEXT:    vor.vv v30, v30, v28
-; RV32-NEXT:    vsrl.vi v28, v8, 8
-; RV32-NEXT:    lui a6, 4080
-; RV32-NEXT:    vand.vx v24, v24, a6
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v6, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vv v28, v28, v6
-; RV32-NEXT:    vor.vv v28, v28, v24
-; RV32-NEXT:    addi a3, sp, 208
-; RV32-NEXT:    vlse64.v v24, (a3), zero
-; RV32-NEXT:    vor.vv v10, v28, v26
-; RV32-NEXT:    vand.vx v26, v8, a6
-; RV32-NEXT:    vsll.vi v26, v26, 24
-; RV32-NEXT:    vand.vv v8, v8, v6
-; RV32-NEXT:    vsll.vi v8, v8, 8
-; RV32-NEXT:    vor.vv v8, v26, v8
-; RV32-NEXT:    addi a3, sp, 200
-; RV32-NEXT:    vlse64.v v28, (a3), zero
-; RV32-NEXT:    vor.vv v8, v30, v8
-; RV32-NEXT:    addi a3, sp, 192
-; RV32-NEXT:    vlse64.v v26, (a3), zero
-; RV32-NEXT:    vor.vv v8, v8, v10
-; RV32-NEXT:    vsrl.vi v30, v8, 4
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v4, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vv v8, v8, v4
-; RV32-NEXT:    vand.vv v30, v30, v4
-; RV32-NEXT:    vsll.vi v8, v8, 4
-; RV32-NEXT:    vor.vv v8, v30, v8
-; RV32-NEXT:    vsrl.vi v30, v8, 2
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v2, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vv v8, v8, v2
-; RV32-NEXT:    vand.vv v30, v30, v2
-; RV32-NEXT:    vsll.vi v8, v8, 2
-; RV32-NEXT:    vor.vv v8, v30, v8
-; RV32-NEXT:    vsrl.vi v30, v8, 1
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v0, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vv v8, v8, v0
-; RV32-NEXT:    vand.vv v30, v30, v0
-; RV32-NEXT:    vadd.vv v8, v8, v8
-; RV32-NEXT:    vor.vv v8, v30, v8
-; RV32-NEXT:    addi a3, sp, 184
-; RV32-NEXT:    vlse64.v v30, (a3), zero
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vand.vv v6, v8, v10
-; RV32-NEXT:    vand.vv v4, v8, v12
-; RV32-NEXT:    vand.vv v2, v8, v14
-; RV32-NEXT:    vand.vv v0, v8, v16
-; RV32-NEXT:    vand.vv v10, v8, v18
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vv v10, v8, v20
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vv v10, v8, v22
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vv v10, v8, v24
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vv v28, v8, v28
-; RV32-NEXT:    addi a3, sp, 176
-; RV32-NEXT:    addi a0, sp, 168
-; RV32-NEXT:    vlse64.v v10, (a3), zero
-; RV32-NEXT:    vlse64.v v12, (a0), zero
-; RV32-NEXT:    vand.vv v14, v8, v26
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v14, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vv v14, v8, v30
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v14, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vv v10, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    addi a2, sp, 160
-; RV32-NEXT:    addi a3, sp, 152
-; RV32-NEXT:    addi a1, sp, 144
-; RV32-NEXT:    addi a0, sp, 136
-; RV32-NEXT:    vlse64.v v10, (a2), zero
-; RV32-NEXT:    vlse64.v v12, (a3), zero
-; RV32-NEXT:    vlse64.v v14, (a1), zero
-; RV32-NEXT:    vlse64.v v16, (a0), zero
-; RV32-NEXT:    vand.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vv v10, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vv v10, v8, v14
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vv v10, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    addi a0, sp, 128
-; RV32-NEXT:    addi a1, sp, 120
-; RV32-NEXT:    addi a2, sp, 112
-; RV32-NEXT:    addi a3, sp, 104
-; RV32-NEXT:    vlse64.v v10, (a0), zero
-; RV32-NEXT:    vlse64.v v12, (a1), zero
-; RV32-NEXT:    vlse64.v v14, (a2), zero
-; RV32-NEXT:    vlse64.v v16, (a3), zero
-; RV32-NEXT:    vand.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vv v10, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vv v10, v8, v14
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vv v10, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    addi a0, sp, 96
-; RV32-NEXT:    addi a1, sp, 88
-; RV32-NEXT:    addi a2, sp, 80
-; RV32-NEXT:    addi a3, sp, 72
-; RV32-NEXT:    vlse64.v v10, (a0), zero
-; RV32-NEXT:    vlse64.v v12, (a1), zero
-; RV32-NEXT:    vlse64.v v14, (a2), zero
-; RV32-NEXT:    vlse64.v v16, (a3), zero
-; RV32-NEXT:    vand.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vv v10, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vv v10, v8, v14
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vv v10, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    addi a0, sp, 64
-; RV32-NEXT:    addi a1, sp, 56
-; RV32-NEXT:    addi a2, sp, 48
-; RV32-NEXT:    addi a3, sp, 40
-; RV32-NEXT:    vlse64.v v10, (a0), zero
-; RV32-NEXT:    vlse64.v v12, (a1), zero
-; RV32-NEXT:    vlse64.v v14, (a2), zero
-; RV32-NEXT:    vlse64.v v16, (a3), zero
-; RV32-NEXT:    vand.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vv v10, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vv v10, v8, v14
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vv v10, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    addi a0, sp, 32
-; RV32-NEXT:    addi a1, sp, 24
-; RV32-NEXT:    addi a2, sp, 16
-; RV32-NEXT:    addi a3, sp, 8
-; RV32-NEXT:    vlse64.v v10, (a0), zero
-; RV32-NEXT:    vlse64.v v12, (a1), zero
-; RV32-NEXT:    vlse64.v v14, (a2), zero
-; RV32-NEXT:    vlse64.v v16, (a3), zero
-; RV32-NEXT:    vand.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vv v10, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 6
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vv v10, v8, v14
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vv v10, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vi v10, v8, 2
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vi v10, v8, 1
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vi v10, v8, 4
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vi v10, v8, 8
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    li a0, 16
-; RV32-NEXT:    vand.vx v10, v8, a0
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    li a0, 32
-; RV32-NEXT:    vand.vx v10, v8, a0
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    li a0, 64
-; RV32-NEXT:    vand.vx v10, v8, a0
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    li a0, 128
-; RV32-NEXT:    vand.vx v10, v8, a0
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    li a0, 256
-; RV32-NEXT:    vand.vx v10, v8, a0
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    li a0, 512
-; RV32-NEXT:    vand.vx v10, v8, a0
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    li a0, 1024
-; RV32-NEXT:    vand.vx v10, v8, a0
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vx v10, v8, a5
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    vand.vx v10, v8, a0
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vx v10, v8, t0
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vx v10, v8, t1
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vx v10, v8, t2
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vx v10, v8, t3
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vx v10, v8, t4
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vx v10, v8, t5
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vx v10, v8, t6
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vx v10, v8, s0
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vx v10, v8, s1
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vx v10, v8, s2
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vx v10, v8, s3
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vx v10, v8, s4
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vx v10, v8, s5
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vx v10, v8, s6
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vx v10, v8, s7
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vx v10, v8, s8
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vx v10, v8, s9
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vx v10, v8, s10
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    addi a0, sp, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vmul.vv v12, v8, v6
-; RV32-NEXT:    vmul.vv v14, v8, v4
-; RV32-NEXT:    vmul.vv v16, v8, v2
-; RV32-NEXT:    vmul.vv v18, v8, v0
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v20, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vmul.vv v20, v8, v20
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v22, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vmul.vv v22, v8, v22
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v24, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vmul.vv v24, v8, v24
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v26, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vmul.vv v26, v8, v26
-; RV32-NEXT:    vmul.vv v28, v8, v28
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v30, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vmul.vv v30, v8, v30
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v6, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vmul.vv v6, v8, v6
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vmul.vv v4, v8, v4
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vmul.vv v2, v8, v2
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v0, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vmul.vv v0, v8, v0
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 6
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vmul.vv v8, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v8, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v8, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vi v8, v8, 0
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    addi a0, sp, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    vxor.vv v8, v8, v14
-; RV32-NEXT:    vxor.vv v8, v8, v16
-; RV32-NEXT:    vxor.vv v8, v8, v18
-; RV32-NEXT:    vxor.vv v8, v8, v20
-; RV32-NEXT:    vxor.vv v8, v8, v22
-; RV32-NEXT:    vxor.vv v8, v8, v24
-; RV32-NEXT:    vxor.vv v8, v8, v26
-; RV32-NEXT:    vxor.vv v8, v8, v28
-; RV32-NEXT:    vxor.vv v8, v8, v30
-; RV32-NEXT:    vxor.vv v8, v8, v6
-; RV32-NEXT:    vxor.vv v8, v8, v4
-; RV32-NEXT:    vxor.vv v8, v8, v2
-; RV32-NEXT:    vxor.vv v8, v8, v0
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    vsrl.vx v10, v8, ra
-; RV32-NEXT:    vsll.vx v12, v8, ra
-; RV32-NEXT:    vsrl.vx v14, v8, s11
-; RV32-NEXT:    vand.vx v16, v8, a4
-; RV32-NEXT:    vand.vx v14, v14, a4
-; RV32-NEXT:    vsrl.vi v18, v8, 24
-; RV32-NEXT:    vand.vx v20, v8, a6
-; RV32-NEXT:    vand.vx v18, v18, a6
-; RV32-NEXT:    vsll.vx v16, v16, s11
-; RV32-NEXT:    vsrl.vi v22, v8, 8
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v24, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vand.vv v8, v8, v24
-; RV32-NEXT:    vand.vv v22, v22, v24
-; RV32-NEXT:    vor.vv v10, v14, v10
-; RV32-NEXT:    vor.vv v14, v22, v18
-; RV32-NEXT:    vsll.vi v8, v8, 8
-; RV32-NEXT:    vsll.vi v18, v20, 24
-; RV32-NEXT:    vor.vv v8, v18, v8
-; RV32-NEXT:    vor.vv v12, v12, v16
-; RV32-NEXT:    vor.vv v10, v14, v10
-; RV32-NEXT:    vor.vv v8, v12, v8
-; RV32-NEXT:    vor.vv v8, v8, v10
-; RV32-NEXT:    vsrl.vi v10, v8, 4
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v12, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vand.vv v8, v8, v12
-; RV32-NEXT:    vand.vv v10, v10, v12
-; RV32-NEXT:    vsll.vi v8, v8, 4
-; RV32-NEXT:    vor.vv v8, v10, v8
-; RV32-NEXT:    vsrl.vi v10, v8, 2
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v12, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vand.vv v8, v8, v12
-; RV32-NEXT:    vand.vv v10, v10, v12
-; RV32-NEXT:    vsll.vi v8, v8, 2
-; RV32-NEXT:    vor.vv v8, v10, v8
-; RV32-NEXT:    vsrl.vi v10, v8, 1
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v12, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vand.vv v8, v8, v12
-; RV32-NEXT:    vand.vv v10, v10, v12
-; RV32-NEXT:    vadd.vv v8, v8, v8
-; RV32-NEXT:    vor.vv v8, v10, v8
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add sp, sp, a0
-; RV32-NEXT:    lw ra, 348(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s0, 344(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s1, 340(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s2, 336(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s3, 332(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s4, 328(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s5, 324(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s6, 320(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s7, 316(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s8, 312(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s9, 308(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s10, 304(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s11, 300(sp) # 4-byte Folded Reload
-; RV32-NEXT:    addi sp, sp, 352
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: clmulr_nxv2i64:
-; RV64:       # %bb.0:
-; RV64-NEXT:    addi sp, sp, -224
-; RV64-NEXT:    sd ra, 216(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s0, 208(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s1, 200(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s2, 192(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s3, 184(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s4, 176(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s5, 168(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s6, 160(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s7, 152(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s8, 144(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s9, 136(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s10, 128(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s11, 120(sp) # 8-byte Folded Spill
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    sub sp, sp, a0
-; RV64-NEXT:    li s3, 40
-; RV64-NEXT:    lui s1, 16
-; RV64-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; RV64-NEXT:    vsrl.vi v14, v8, 24
-; RV64-NEXT:    vsrl.vi v10, v8, 8
-; RV64-NEXT:    li t4, 255
-; RV64-NEXT:    lui a5, 61681
-; RV64-NEXT:    lui a6, 209715
-; RV64-NEXT:    lui t6, 349525
-; RV64-NEXT:    li t5, 16
-; RV64-NEXT:    li t3, 32
-; RV64-NEXT:    li t2, 64
-; RV64-NEXT:    li t0, 128
-; RV64-NEXT:    li t1, 256
-; RV64-NEXT:    li a4, 512
-; RV64-NEXT:    li a3, 1024
-; RV64-NEXT:    li s0, 1
-; RV64-NEXT:    lui a2, 1
-; RV64-NEXT:    lui a1, 2
-; RV64-NEXT:    lui a0, 4
-; RV64-NEXT:    li a7, 56
-; RV64-NEXT:    vsrl.vx v12, v8, a7
-; RV64-NEXT:    vsrl.vx v18, v8, s3
-; RV64-NEXT:    addi s2, s1, -256
-; RV64-NEXT:    lui s1, 4080
-; RV64-NEXT:    vand.vx v16, v14, s1
-; RV64-NEXT:    slli t4, t4, 24
-; RV64-NEXT:    vand.vx v20, v8, s1
-; RV64-NEXT:    vsll.vx v14, v8, a7
-; RV64-NEXT:    addi a7, a5, -241
-; RV64-NEXT:    addi a6, a6, 819
-; RV64-NEXT:    addi a5, t6, 1365
-; RV64-NEXT:    slli t6, s0, 11
-; RV64-NEXT:    slli s1, s0, 31
-; RV64-NEXT:    sd s1, 96(sp) # 8-byte Folded Spill
-; RV64-NEXT:    slli s1, s0, 32
-; RV64-NEXT:    sd s1, 88(sp) # 8-byte Folded Spill
-; RV64-NEXT:    slli s1, s0, 33
-; RV64-NEXT:    sd s1, 80(sp) # 8-byte Folded Spill
-; RV64-NEXT:    slli s1, s0, 34
-; RV64-NEXT:    sd s1, 72(sp) # 8-byte Folded Spill
-; RV64-NEXT:    slli s1, s0, 35
-; RV64-NEXT:    sd s1, 64(sp) # 8-byte Folded Spill
-; RV64-NEXT:    slli s1, s0, 36
-; RV64-NEXT:    sd s1, 56(sp) # 8-byte Folded Spill
-; RV64-NEXT:    slli s1, a7, 32
-; RV64-NEXT:    add a7, a7, s1
-; RV64-NEXT:    slli s1, a6, 32
-; RV64-NEXT:    add a6, a6, s1
-; RV64-NEXT:    slli s1, a5, 32
-; RV64-NEXT:    add a5, a5, s1
-; RV64-NEXT:    slli s1, s0, 37
-; RV64-NEXT:    sd s1, 48(sp) # 8-byte Folded Spill
-; RV64-NEXT:    vand.vx v18, v18, s2
-; RV64-NEXT:    vand.vx v10, v10, t4
-; RV64-NEXT:    vsll.vi v20, v20, 24
-; RV64-NEXT:    vand.vx v22, v8, t4
-; RV64-NEXT:    vand.vx v8, v8, s2
-; RV64-NEXT:    vor.vv v12, v18, v12
-; RV64-NEXT:    vor.vv v10, v10, v16
-; RV64-NEXT:    vsll.vi v16, v22, 8
-; RV64-NEXT:    vsll.vx v8, v8, s3
-; RV64-NEXT:    vor.vv v10, v10, v12
-; RV64-NEXT:    vor.vv v12, v20, v16
-; RV64-NEXT:    vor.vv v8, v14, v8
-; RV64-NEXT:    vor.vv v8, v8, v12
-; RV64-NEXT:    vor.vv v8, v8, v10
-; RV64-NEXT:    vsrl.vi v10, v8, 4
-; RV64-NEXT:    vand.vx v8, v8, a7
-; RV64-NEXT:    vand.vx v10, v10, a7
-; RV64-NEXT:    vsll.vi v8, v8, 4
-; RV64-NEXT:    vor.vv v8, v10, v8
-; RV64-NEXT:    vsrl.vi v10, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a6
-; RV64-NEXT:    vand.vx v10, v10, a6
-; RV64-NEXT:    vsll.vi v8, v8, 2
-; RV64-NEXT:    vor.vv v8, v10, v8
-; RV64-NEXT:    vsrl.vi v10, v8, 1
-; RV64-NEXT:    vand.vx v8, v8, a5
-; RV64-NEXT:    vand.vx v10, v10, a5
-; RV64-NEXT:    vadd.vv v8, v8, v8
-; RV64-NEXT:    vor.vv v8, v10, v8
-; RV64-NEXT:    vand.vx v10, v8, t5
-; RV64-NEXT:    slli t5, s0, 38
-; RV64-NEXT:    sd t5, 40(sp) # 8-byte Folded Spill
-; RV64-NEXT:    vand.vx v12, v8, t3
-; RV64-NEXT:    slli t3, s0, 39
-; RV64-NEXT:    sd t3, 32(sp) # 8-byte Folded Spill
-; RV64-NEXT:    vand.vx v14, v8, t2
-; RV64-NEXT:    slli t2, s0, 40
-; RV64-NEXT:    sd t2, 24(sp) # 8-byte Folded Spill
-; RV64-NEXT:    vand.vx v24, v8, t0
-; RV64-NEXT:    slli t0, s0, 41
-; RV64-NEXT:    sd t0, 16(sp) # 8-byte Folded Spill
-; RV64-NEXT:    vand.vx v16, v8, t1
-; RV64-NEXT:    slli s6, s0, 42
-; RV64-NEXT:    vand.vx v18, v8, a4
-; RV64-NEXT:    slli s7, s0, 43
-; RV64-NEXT:    vand.vx v20, v8, a3
-; RV64-NEXT:    slli s8, s0, 44
-; RV64-NEXT:    vand.vx v22, v8, t6
-; RV64-NEXT:    slli s9, s0, 45
-; RV64-NEXT:    vand.vx v26, v8, a2
-; RV64-NEXT:    slli s10, s0, 46
-; RV64-NEXT:    vand.vx v28, v8, a1
-; RV64-NEXT:    slli s11, s0, 47
-; RV64-NEXT:    vand.vx v30, v8, a0
-; RV64-NEXT:    slli ra, s0, 48
-; RV64-NEXT:    slli s4, s0, 49
-; RV64-NEXT:    slli s3, s0, 50
-; RV64-NEXT:    slli s1, s0, 51
-; RV64-NEXT:    slli t6, s0, 52
-; RV64-NEXT:    slli t5, s0, 53
-; RV64-NEXT:    slli t3, s0, 54
-; RV64-NEXT:    slli t2, s0, 55
-; RV64-NEXT:    slli t1, s0, 56
-; RV64-NEXT:    slli t0, s0, 57
-; RV64-NEXT:    slli a4, s0, 58
-; RV64-NEXT:    slli a3, s0, 59
-; RV64-NEXT:    slli a2, s0, 60
-; RV64-NEXT:    slli a1, s0, 61
-; RV64-NEXT:    slli s0, s0, 62
-; RV64-NEXT:    li a0, -1
-; RV64-NEXT:    slli a0, a0, 63
-; RV64-NEXT:    vand.vi v6, v8, 2
-; RV64-NEXT:    vand.vi v4, v8, 1
-; RV64-NEXT:    vand.vi v2, v8, 4
-; RV64-NEXT:    vand.vi v0, v8, 8
-; RV64-NEXT:    vmul.vv v6, v8, v6
-; RV64-NEXT:    sd a5, 8(sp) # 8-byte Folded Spill
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 3
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 112
-; RV64-NEXT:    vs2r.v v6, (a5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    vmul.vv v6, v8, v4
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 4
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 112
-; RV64-NEXT:    vs2r.v v6, (a5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    vmul.vv v6, v8, v2
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 5
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 112
-; RV64-NEXT:    vs2r.v v6, (a5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    vmul.vv v0, v8, v0
-; RV64-NEXT:    vmul.vv v10, v8, v10
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 112
-; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    vmul.vv v10, v8, v12
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 112
-; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    vmul.vv v10, v8, v14
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 112
-; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    vmul.vv v10, v8, v24
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 3
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 112
-; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    vmul.vv v10, v8, v16
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 112
-; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    vmul.vv v10, v8, v18
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 112
-; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    vmul.vv v10, v8, v20
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 3
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 112
-; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    vmul.vv v10, v8, v22
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 4
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 112
-; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    vmul.vv v10, v8, v26
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 3
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 112
-; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    vmul.vv v10, v8, v28
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 3
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 112
-; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    vmul.vv v10, v8, v30
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 3
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 112
-; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    lui s5, 8
-; RV64-NEXT:    vand.vx v10, v8, s5
-; RV64-NEXT:    vmul.vv v10, v8, v10
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 3
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 3
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 112
-; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    lui s5, 16
-; RV64-NEXT:    vand.vx v10, v8, s5
-; RV64-NEXT:    vmul.vv v10, v8, v10
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 4
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 112
-; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    lui s5, 32
-; RV64-NEXT:    vand.vx v10, v8, s5
-; RV64-NEXT:    vmul.vv v10, v8, v10
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 4
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 112
-; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    lui s5, 64
-; RV64-NEXT:    vand.vx v10, v8, s5
-; RV64-NEXT:    vmul.vv v10, v8, v10
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 5
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 112
-; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    lui s5, 128
-; RV64-NEXT:    vand.vx v10, v8, s5
-; RV64-NEXT:    vmul.vv v10, v8, v10
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 6
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 112
-; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    lui s5, 256
-; RV64-NEXT:    vand.vx v10, v8, s5
-; RV64-NEXT:    vmul.vv v10, v8, v10
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 112
-; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    lui s5, 512
-; RV64-NEXT:    vand.vx v10, v8, s5
-; RV64-NEXT:    vmul.vv v10, v8, v10
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 112
-; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    lui s5, 1024
-; RV64-NEXT:    vand.vx v10, v8, s5
-; RV64-NEXT:    vmul.vv v10, v8, v10
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 112
-; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    lui s5, 2048
-; RV64-NEXT:    vand.vx v10, v8, s5
-; RV64-NEXT:    vmul.vv v10, v8, v10
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 3
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 112
-; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    lui s5, 4096
-; RV64-NEXT:    vand.vx v10, v8, s5
-; RV64-NEXT:    vmul.vv v10, v8, v10
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 112
-; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    lui s5, 8192
-; RV64-NEXT:    vand.vx v10, v8, s5
-; RV64-NEXT:    vmul.vv v10, v8, v10
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 3
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 112
-; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    lui s5, 16384
-; RV64-NEXT:    vand.vx v10, v8, s5
-; RV64-NEXT:    vmul.vv v10, v8, v10
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 112
-; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    lui s5, 32768
-; RV64-NEXT:    vand.vx v10, v8, s5
-; RV64-NEXT:    vmul.vv v10, v8, v10
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 4
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 112
-; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    lui s5, 65536
-; RV64-NEXT:    vand.vx v10, v8, s5
-; RV64-NEXT:    vmul.vv v10, v8, v10
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 112
-; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    lui s5, 131072
-; RV64-NEXT:    vand.vx v10, v8, s5
-; RV64-NEXT:    vmul.vv v10, v8, v10
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 112
-; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    lui s5, 262144
-; RV64-NEXT:    vand.vx v10, v8, s5
-; RV64-NEXT:    vmul.vv v10, v8, v10
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 112
-; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    ld s5, 96(sp) # 8-byte Folded Reload
-; RV64-NEXT:    vand.vx v10, v8, s5
-; RV64-NEXT:    vmul.vv v10, v8, v10
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 3
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 112
-; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    ld s5, 88(sp) # 8-byte Folded Reload
-; RV64-NEXT:    vand.vx v10, v8, s5
-; RV64-NEXT:    vmul.vv v10, v8, v10
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 3
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 112
-; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    ld s5, 80(sp) # 8-byte Folded Reload
-; RV64-NEXT:    vand.vx v10, v8, s5
-; RV64-NEXT:    vmul.vv v10, v8, v10
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 3
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 112
-; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    ld s5, 72(sp) # 8-byte Folded Reload
-; RV64-NEXT:    vand.vx v10, v8, s5
-; RV64-NEXT:    vmul.vv v10, v8, v10
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 4
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 112
-; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    ld s5, 64(sp) # 8-byte Folded Reload
-; RV64-NEXT:    vand.vx v10, v8, s5
-; RV64-NEXT:    vmul.vv v10, v8, v10
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 112
-; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    ld s5, 56(sp) # 8-byte Folded Reload
-; RV64-NEXT:    vand.vx v10, v8, s5
-; RV64-NEXT:    vmul.vv v10, v8, v10
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 112
-; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    ld s5, 48(sp) # 8-byte Folded Reload
-; RV64-NEXT:    vand.vx v10, v8, s5
-; RV64-NEXT:    vmul.vv v10, v8, v10
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 112
-; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    ld s5, 40(sp) # 8-byte Folded Reload
-; RV64-NEXT:    vand.vx v10, v8, s5
-; RV64-NEXT:    vmul.vv v10, v8, v10
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 112
-; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    ld s5, 32(sp) # 8-byte Folded Reload
-; RV64-NEXT:    vand.vx v10, v8, s5
-; RV64-NEXT:    vmul.vv v10, v8, v10
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 3
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 112
-; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    ld s5, 24(sp) # 8-byte Folded Reload
-; RV64-NEXT:    vand.vx v10, v8, s5
-; RV64-NEXT:    vmul.vv v10, v8, v10
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 112
-; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    ld s5, 16(sp) # 8-byte Folded Reload
-; RV64-NEXT:    vand.vx v10, v8, s5
-; RV64-NEXT:    vmul.vv v10, v8, v10
-; RV64-NEXT:    csrr s5, vlenb
-; RV64-NEXT:    slli s5, s5, 2
-; RV64-NEXT:    mv a5, s5
-; RV64-NEXT:    slli s5, s5, 2
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    ld a5, 8(sp) # 8-byte Folded Reload
-; RV64-NEXT:    add s5, sp, s5
-; RV64-NEXT:    addi s5, s5, 112
-; RV64-NEXT:    vs2r.v v10, (s5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    vand.vx v10, v8, s6
-; RV64-NEXT:    vmul.vv v10, v8, v10
-; RV64-NEXT:    csrr s5, vlenb
-; RV64-NEXT:    slli s5, s5, 1
-; RV64-NEXT:    mv s6, s5
-; RV64-NEXT:    slli s5, s5, 3
-; RV64-NEXT:    add s5, s5, s6
-; RV64-NEXT:    add s5, sp, s5
-; RV64-NEXT:    addi s5, s5, 112
-; RV64-NEXT:    vs2r.v v10, (s5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    vand.vx v10, v8, s7
-; RV64-NEXT:    vmul.vv v10, v8, v10
-; RV64-NEXT:    csrr s5, vlenb
-; RV64-NEXT:    slli s5, s5, 4
-; RV64-NEXT:    add s5, sp, s5
-; RV64-NEXT:    addi s5, s5, 112
-; RV64-NEXT:    vs2r.v v10, (s5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    vand.vx v10, v8, s8
-; RV64-NEXT:    vmul.vv v10, v8, v10
-; RV64-NEXT:    csrr s5, vlenb
-; RV64-NEXT:    slli s5, s5, 1
-; RV64-NEXT:    mv s6, s5
-; RV64-NEXT:    slli s5, s5, 1
-; RV64-NEXT:    add s6, s6, s5
-; RV64-NEXT:    slli s5, s5, 1
-; RV64-NEXT:    add s5, s5, s6
-; RV64-NEXT:    add s5, sp, s5
-; RV64-NEXT:    addi s5, s5, 112
-; RV64-NEXT:    vs2r.v v10, (s5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    vand.vx v10, v8, s9
-; RV64-NEXT:    vmul.vv v10, v8, v10
-; RV64-NEXT:    csrr s5, vlenb
-; RV64-NEXT:    slli s5, s5, 2
-; RV64-NEXT:    mv s6, s5
-; RV64-NEXT:    slli s5, s5, 1
-; RV64-NEXT:    add s5, s5, s6
-; RV64-NEXT:    add s5, sp, s5
-; RV64-NEXT:    addi s5, s5, 112
-; RV64-NEXT:    vs2r.v v10, (s5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    vand.vx v10, v8, s10
-; RV64-NEXT:    vmul.vv v10, v8, v10
-; RV64-NEXT:    csrr s5, vlenb
-; RV64-NEXT:    slli s5, s5, 1
-; RV64-NEXT:    mv s6, s5
-; RV64-NEXT:    slli s5, s5, 2
-; RV64-NEXT:    add s5, s5, s6
-; RV64-NEXT:    add s5, sp, s5
-; RV64-NEXT:    addi s5, s5, 112
-; RV64-NEXT:    vs2r.v v10, (s5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    vand.vx v10, v8, s11
-; RV64-NEXT:    vmul.vv v10, v8, v10
-; RV64-NEXT:    csrr s5, vlenb
-; RV64-NEXT:    slli s5, s5, 1
-; RV64-NEXT:    mv s6, s5
-; RV64-NEXT:    slli s5, s5, 1
-; RV64-NEXT:    add s5, s5, s6
-; RV64-NEXT:    add s5, sp, s5
-; RV64-NEXT:    addi s5, s5, 112
-; RV64-NEXT:    vs2r.v v10, (s5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    vand.vx v10, v8, ra
-; RV64-NEXT:    vmul.vv v10, v8, v10
-; RV64-NEXT:    csrr s5, vlenb
-; RV64-NEXT:    slli s5, s5, 1
-; RV64-NEXT:    add s5, sp, s5
-; RV64-NEXT:    addi s5, s5, 112
-; RV64-NEXT:    vs2r.v v10, (s5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    vand.vx v10, v8, s4
-; RV64-NEXT:    vmul.vv v20, v8, v10
-; RV64-NEXT:    vand.vx v10, v8, s3
-; RV64-NEXT:    vmul.vv v10, v8, v10
-; RV64-NEXT:    vand.vx v12, v8, s1
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr s1, vlenb
-; RV64-NEXT:    slli s1, s1, 3
-; RV64-NEXT:    add s1, sp, s1
-; RV64-NEXT:    addi s1, s1, 112
-; RV64-NEXT:    vs2r.v v12, (s1) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    vand.vx v12, v8, t6
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr t6, vlenb
-; RV64-NEXT:    slli t6, t6, 2
-; RV64-NEXT:    add t6, sp, t6
-; RV64-NEXT:    addi t6, t6, 112
-; RV64-NEXT:    vs2r.v v12, (t6) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    vand.vx v12, v8, t5
-; RV64-NEXT:    vmul.vv v6, v8, v12
-; RV64-NEXT:    vand.vx v12, v8, t3
-; RV64-NEXT:    vmul.vv v22, v8, v12
-; RV64-NEXT:    vand.vx v12, v8, t2
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    vand.vx v14, v8, t1
-; RV64-NEXT:    vmul.vv v24, v8, v14
-; RV64-NEXT:    vand.vx v14, v8, t0
-; RV64-NEXT:    vmul.vv v14, v8, v14
-; RV64-NEXT:    vand.vx v16, v8, a4
-; RV64-NEXT:    vmul.vv v4, v8, v16
-; RV64-NEXT:    vand.vx v16, v8, a3
-; RV64-NEXT:    vmul.vv v2, v8, v16
-; RV64-NEXT:    vand.vx v16, v8, a2
-; RV64-NEXT:    vmul.vv v26, v8, v16
-; RV64-NEXT:    vand.vx v16, v8, a1
-; RV64-NEXT:    vmul.vv v16, v8, v16
-; RV64-NEXT:    addi a1, sp, 112
-; RV64-NEXT:    vs2r.v v16, (a1) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    vand.vx v16, v8, s0
-; RV64-NEXT:    vmul.vv v18, v8, v16
-; RV64-NEXT:    vand.vx v16, v8, a0
-; RV64-NEXT:    vmul.vv v16, v8, v16
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl2r.v v8, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 4
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl2r.v v28, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v8, v28, v8
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 5
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl2r.v v28, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v8, v8, v28
-; RV64-NEXT:    vxor.vv v8, v8, v0
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl2r.v v28, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v8, v8, v28
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl2r.v v28, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v8, v8, v28
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl2r.v v28, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v8, v8, v28
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl2r.v v28, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v8, v8, v28
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl2r.v v28, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v8, v28
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl2r.v v28, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v28
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl2r.v v28, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v28
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 4
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl2r.v v28, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v28
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl2r.v v28, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v28
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl2r.v v28, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v28
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl2r.v v28, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v28
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl2r.v v28, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v28
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 4
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl2r.v v28, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v28
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 4
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl2r.v v28, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v28
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 5
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl2r.v v28, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v28
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 6
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl2r.v v28, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v28
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl2r.v v28, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v28
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl2r.v v28, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v28
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl2r.v v28, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v28
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl2r.v v28, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v28
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl2r.v v28, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v28, v0, v28
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl2r.v v30, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v28, v28, v30
-; RV64-NEXT:    li a1, 56
-; RV64-NEXT:    vsll.vx v8, v8, a1
-; RV64-NEXT:    vand.vx v0, v0, s2
-; RV64-NEXT:    li a0, 40
-; RV64-NEXT:    vsll.vx v0, v0, a0
-; RV64-NEXT:    vor.vv v8, v8, v0
-; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    slli a2, a2, 2
-; RV64-NEXT:    mv a3, a2
-; RV64-NEXT:    slli a2, a2, 2
-; RV64-NEXT:    add a3, a3, a2
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    add a2, a2, a3
-; RV64-NEXT:    add a2, sp, a2
-; RV64-NEXT:    addi a2, a2, 112
-; RV64-NEXT:    vl2r.v v0, (a2) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v28, v28, v0
-; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    slli a2, a2, 4
-; RV64-NEXT:    mv a3, a2
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    add a2, a2, a3
-; RV64-NEXT:    add a2, sp, a2
-; RV64-NEXT:    addi a2, a2, 112
-; RV64-NEXT:    vl2r.v v30, (a2) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v28, v28, v30
-; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    mv a3, a2
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    add a3, a3, a2
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    add a3, a3, a2
-; RV64-NEXT:    slli a2, a2, 2
-; RV64-NEXT:    add a2, a2, a3
-; RV64-NEXT:    add a2, sp, a2
-; RV64-NEXT:    addi a2, a2, 112
-; RV64-NEXT:    vl2r.v v30, (a2) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v28, v28, v30
-; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    slli a2, a2, 2
-; RV64-NEXT:    mv a3, a2
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    add a3, a3, a2
-; RV64-NEXT:    slli a2, a2, 2
-; RV64-NEXT:    add a2, a2, a3
-; RV64-NEXT:    add a2, sp, a2
-; RV64-NEXT:    addi a2, a2, 112
-; RV64-NEXT:    vl2r.v v30, (a2) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v28, v28, v30
-; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    mv a3, a2
-; RV64-NEXT:    slli a2, a2, 2
-; RV64-NEXT:    add a3, a3, a2
-; RV64-NEXT:    slli a2, a2, 2
-; RV64-NEXT:    add a2, a2, a3
-; RV64-NEXT:    add a2, sp, a2
-; RV64-NEXT:    addi a2, a2, 112
-; RV64-NEXT:    vl2r.v v30, (a2) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v28, v28, v30
-; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    slli a2, a2, 3
-; RV64-NEXT:    mv a3, a2
-; RV64-NEXT:    slli a2, a2, 2
-; RV64-NEXT:    add a2, a2, a3
-; RV64-NEXT:    add a2, sp, a2
-; RV64-NEXT:    addi a2, a2, 112
-; RV64-NEXT:    vl2r.v v30, (a2) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v28, v28, v30
-; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    mv a3, a2
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    add a3, a3, a2
-; RV64-NEXT:    slli a2, a2, 3
-; RV64-NEXT:    add a2, a2, a3
-; RV64-NEXT:    add a2, sp, a2
-; RV64-NEXT:    addi a2, a2, 112
-; RV64-NEXT:    vl2r.v v30, (a2) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v28, v28, v30
-; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    slli a2, a2, 2
-; RV64-NEXT:    mv a3, a2
-; RV64-NEXT:    slli a2, a2, 3
-; RV64-NEXT:    add a2, a2, a3
-; RV64-NEXT:    add a2, sp, a2
-; RV64-NEXT:    addi a2, a2, 112
-; RV64-NEXT:    vl2r.v v30, (a2) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v28, v28, v30
-; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    mv a3, a2
-; RV64-NEXT:    slli a2, a2, 4
-; RV64-NEXT:    add a2, a2, a3
-; RV64-NEXT:    add a2, sp, a2
-; RV64-NEXT:    addi a2, a2, 112
-; RV64-NEXT:    vl2r.v v30, (a2) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v28, v28, v30
-; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    slli a2, a2, 5
-; RV64-NEXT:    add a2, sp, a2
-; RV64-NEXT:    addi a2, a2, 112
-; RV64-NEXT:    vl2r.v v30, (a2) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v28, v28, v30
-; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    mv a3, a2
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    add a3, a3, a2
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    add a3, a3, a2
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    add a2, a2, a3
-; RV64-NEXT:    add a2, sp, a2
-; RV64-NEXT:    addi a2, a2, 112
-; RV64-NEXT:    vl2r.v v30, (a2) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v28, v28, v30
-; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    slli a2, a2, 2
-; RV64-NEXT:    mv a3, a2
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    add a3, a3, a2
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    add a2, a2, a3
-; RV64-NEXT:    add a2, sp, a2
-; RV64-NEXT:    addi a2, a2, 112
-; RV64-NEXT:    vl2r.v v30, (a2) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v28, v28, v30
-; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    mv a3, a2
-; RV64-NEXT:    slli a2, a2, 2
-; RV64-NEXT:    add a3, a3, a2
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    add a2, a2, a3
-; RV64-NEXT:    add a2, sp, a2
-; RV64-NEXT:    addi a2, a2, 112
-; RV64-NEXT:    vl2r.v v30, (a2) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v28, v28, v30
-; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    slli a2, a2, 3
-; RV64-NEXT:    mv a3, a2
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    add a2, a2, a3
-; RV64-NEXT:    add a2, sp, a2
-; RV64-NEXT:    addi a2, a2, 112
-; RV64-NEXT:    vl2r.v v30, (a2) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v28, v28, v30
-; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    mv a3, a2
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    add a3, a3, a2
-; RV64-NEXT:    slli a2, a2, 2
-; RV64-NEXT:    add a2, a2, a3
-; RV64-NEXT:    add a2, sp, a2
-; RV64-NEXT:    addi a2, a2, 112
-; RV64-NEXT:    vl2r.v v30, (a2) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v28, v30
-; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    slli a2, a2, 2
-; RV64-NEXT:    mv a3, a2
-; RV64-NEXT:    slli a2, a2, 2
-; RV64-NEXT:    add a2, a2, a3
-; RV64-NEXT:    add a2, sp, a2
-; RV64-NEXT:    addi a2, a2, 112
-; RV64-NEXT:    vl2r.v v30, (a2) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v30
-; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    mv a3, a2
-; RV64-NEXT:    slli a2, a2, 3
-; RV64-NEXT:    add a2, a2, a3
-; RV64-NEXT:    add a2, sp, a2
-; RV64-NEXT:    addi a2, a2, 112
-; RV64-NEXT:    vl2r.v v30, (a2) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v30
-; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    slli a2, a2, 4
-; RV64-NEXT:    add a2, sp, a2
-; RV64-NEXT:    addi a2, a2, 112
-; RV64-NEXT:    vl2r.v v30, (a2) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v30
-; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    mv a3, a2
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    add a3, a3, a2
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    add a2, a2, a3
-; RV64-NEXT:    add a2, sp, a2
-; RV64-NEXT:    addi a2, a2, 112
-; RV64-NEXT:    vl2r.v v30, (a2) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v30
-; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    slli a2, a2, 2
-; RV64-NEXT:    mv a3, a2
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    add a2, a2, a3
-; RV64-NEXT:    add a2, sp, a2
-; RV64-NEXT:    addi a2, a2, 112
-; RV64-NEXT:    vl2r.v v30, (a2) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v30
-; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    mv a3, a2
-; RV64-NEXT:    slli a2, a2, 2
-; RV64-NEXT:    add a2, a2, a3
-; RV64-NEXT:    add a2, sp, a2
-; RV64-NEXT:    addi a2, a2, 112
-; RV64-NEXT:    vl2r.v v30, (a2) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v30
-; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    mv a3, a2
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    add a2, a2, a3
-; RV64-NEXT:    add a2, sp, a2
-; RV64-NEXT:    addi a2, a2, 112
-; RV64-NEXT:    vl2r.v v30, (a2) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v30
-; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    add a2, sp, a2
-; RV64-NEXT:    addi a2, a2, 112
-; RV64-NEXT:    vl2r.v v30, (a2) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v30, v0, v30
-; RV64-NEXT:    vxor.vv v20, v30, v20
-; RV64-NEXT:    vxor.vv v10, v20, v10
-; RV64-NEXT:    vsrl.vi v20, v28, 8
-; RV64-NEXT:    vand.vx v20, v20, t4
-; RV64-NEXT:    vsrl.vi v30, v0, 24
-; RV64-NEXT:    lui a2, 4080
-; RV64-NEXT:    vand.vx v30, v30, a2
-; RV64-NEXT:    vor.vv v20, v20, v30
-; RV64-NEXT:    csrr a3, vlenb
-; RV64-NEXT:    slli a3, a3, 3
-; RV64-NEXT:    add a3, sp, a3
-; RV64-NEXT:    addi a3, a3, 112
-; RV64-NEXT:    vl2r.v v30, (a3) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v10, v10, v30
-; RV64-NEXT:    csrr a3, vlenb
-; RV64-NEXT:    slli a3, a3, 2
-; RV64-NEXT:    add a3, sp, a3
-; RV64-NEXT:    addi a3, a3, 112
-; RV64-NEXT:    vl2r.v v30, (a3) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v10, v10, v30
-; RV64-NEXT:    vxor.vv v10, v10, v6
-; RV64-NEXT:    vxor.vv v10, v10, v22
-; RV64-NEXT:    vxor.vv v10, v10, v12
-; RV64-NEXT:    vand.vx v12, v28, a2
-; RV64-NEXT:    vsll.vi v12, v12, 24
-; RV64-NEXT:    vxor.vv v22, v10, v24
-; RV64-NEXT:    vxor.vv v14, v22, v14
-; RV64-NEXT:    vand.vx v22, v10, t4
-; RV64-NEXT:    vsll.vi v22, v22, 8
-; RV64-NEXT:    vor.vv v12, v12, v22
-; RV64-NEXT:    vxor.vv v14, v14, v4
-; RV64-NEXT:    vor.vv v8, v8, v12
-; RV64-NEXT:    vxor.vv v12, v14, v2
-; RV64-NEXT:    vxor.vv v12, v12, v26
-; RV64-NEXT:    vsrl.vx v10, v10, a0
-; RV64-NEXT:    vand.vx v10, v10, s2
-; RV64-NEXT:    addi a0, sp, 112
-; RV64-NEXT:    vl2r.v v14, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v12, v12, v14
-; RV64-NEXT:    vxor.vv v12, v12, v18
-; RV64-NEXT:    vxor.vv v12, v12, v16
-; RV64-NEXT:    vsrl.vx v12, v12, a1
-; RV64-NEXT:    vor.vv v10, v10, v12
-; RV64-NEXT:    vor.vv v10, v20, v10
-; RV64-NEXT:    vor.vv v8, v8, v10
-; RV64-NEXT:    vsrl.vi v10, v8, 4
-; RV64-NEXT:    vand.vx v8, v8, a7
-; RV64-NEXT:    vand.vx v10, v10, a7
-; RV64-NEXT:    vsll.vi v8, v8, 4
-; RV64-NEXT:    vor.vv v8, v10, v8
-; RV64-NEXT:    vsrl.vi v10, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a6
-; RV64-NEXT:    vand.vx v10, v10, a6
-; RV64-NEXT:    vsll.vi v8, v8, 2
-; RV64-NEXT:    vor.vv v8, v10, v8
-; RV64-NEXT:    vsrl.vi v10, v8, 1
-; RV64-NEXT:    vand.vx v8, v8, a5
-; RV64-NEXT:    vand.vx v10, v10, a5
-; RV64-NEXT:    vadd.vv v8, v8, v8
-; RV64-NEXT:    vor.vv v8, v10, v8
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add sp, sp, a0
-; RV64-NEXT:    ld ra, 216(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s0, 208(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s1, 200(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s2, 192(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s3, 184(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s4, 176(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s5, 168(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s6, 160(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s7, 152(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s8, 144(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s9, 136(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s10, 128(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s11, 120(sp) # 8-byte Folded Reload
-; RV64-NEXT:    addi sp, sp, 224
-; RV64-NEXT:    ret
-  %a = call <vscale x 2 x i64> @llvm.clmulr.nxv2i64(<vscale x 2 x i64> %x, <vscale x 2 x i64> %y)
-  ret <vscale x 2 x i64> %a
-}
-
-define <vscale x 4 x i64> @clmulr_nxv4i64(<vscale x 4 x i64> %x, <vscale x 4 x i64> %y) nounwind {
-; RV32-LABEL: clmulr_nxv4i64:
-; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -352
-; RV32-NEXT:    sw ra, 348(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s0, 344(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s1, 340(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s2, 336(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s3, 332(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s4, 328(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s5, 324(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s6, 320(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s7, 316(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s8, 312(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s9, 308(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s10, 304(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s11, 300(sp) # 4-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 6
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    sub sp, sp, a0
-; RV32-NEXT:    lui s11, 1044480
-; RV32-NEXT:    lui t6, 524288
-; RV32-NEXT:    li a0, 1
-; RV32-NEXT:    li ra, 2
-; RV32-NEXT:    li t4, 4
-; RV32-NEXT:    li t2, 8
-; RV32-NEXT:    li t5, 16
-; RV32-NEXT:    li t3, 32
-; RV32-NEXT:    li t1, 64
-; RV32-NEXT:    li t0, 128
-; RV32-NEXT:    li a7, 256
-; RV32-NEXT:    li a6, 512
-; RV32-NEXT:    li a3, 1024
-; RV32-NEXT:    lui a2, 1
-; RV32-NEXT:    lui a4, 2
-; RV32-NEXT:    lui a1, 4
-; RV32-NEXT:    lui a5, 8
-; RV32-NEXT:    lui s0, 16
-; RV32-NEXT:    lui s1, 32
-; RV32-NEXT:    lui s2, 64
-; RV32-NEXT:    lui s3, 128
-; RV32-NEXT:    lui s4, 256
-; RV32-NEXT:    lui s5, 512
-; RV32-NEXT:    lui s6, 1024
-; RV32-NEXT:    lui s7, 2048
-; RV32-NEXT:    lui s8, 4096
-; RV32-NEXT:    lui s9, 8192
-; RV32-NEXT:    lui s10, 16384
-; RV32-NEXT:    sw s11, 272(sp)
-; RV32-NEXT:    lui s11, 32768
-; RV32-NEXT:    sw zero, 276(sp)
-; RV32-NEXT:    sw t6, 264(sp)
-; RV32-NEXT:    sw zero, 268(sp)
-; RV32-NEXT:    sw zero, 256(sp)
-; RV32-NEXT:    sw a0, 260(sp)
-; RV32-NEXT:    sw zero, 248(sp)
-; RV32-NEXT:    sw ra, 252(sp)
-; RV32-NEXT:    lui ra, 65536
-; RV32-NEXT:    sw zero, 240(sp)
-; RV32-NEXT:    sw t4, 244(sp)
-; RV32-NEXT:    lui t4, 131072
-; RV32-NEXT:    sw zero, 232(sp)
-; RV32-NEXT:    sw t2, 236(sp)
-; RV32-NEXT:    lui t2, 262144
-; RV32-NEXT:    sw zero, 224(sp)
-; RV32-NEXT:    sw t5, 228(sp)
-; RV32-NEXT:    sw zero, 216(sp)
-; RV32-NEXT:    sw t3, 220(sp)
-; RV32-NEXT:    sw zero, 208(sp)
-; RV32-NEXT:    sw t1, 212(sp)
-; RV32-NEXT:    sw zero, 200(sp)
-; RV32-NEXT:    sw t0, 204(sp)
-; RV32-NEXT:    sw zero, 192(sp)
-; RV32-NEXT:    sw a7, 196(sp)
-; RV32-NEXT:    sw zero, 184(sp)
-; RV32-NEXT:    sw a6, 188(sp)
-; RV32-NEXT:    sw zero, 176(sp)
-; RV32-NEXT:    sw a3, 180(sp)
-; RV32-NEXT:    li t1, 1024
-; RV32-NEXT:    slli a3, a0, 11
-; RV32-NEXT:    sw zero, 168(sp)
-; RV32-NEXT:    sw a3, 172(sp)
-; RV32-NEXT:    sw zero, 160(sp)
-; RV32-NEXT:    sw a2, 164(sp)
-; RV32-NEXT:    sw zero, 152(sp)
-; RV32-NEXT:    sw a4, 156(sp)
-; RV32-NEXT:    lui t3, 2
-; RV32-NEXT:    sw zero, 144(sp)
-; RV32-NEXT:    sw a1, 148(sp)
-; RV32-NEXT:    sw zero, 136(sp)
-; RV32-NEXT:    sw a5, 140(sp)
-; RV32-NEXT:    lui t5, 8
-; RV32-NEXT:    sw zero, 128(sp)
-; RV32-NEXT:    sw s0, 132(sp)
-; RV32-NEXT:    sw zero, 120(sp)
-; RV32-NEXT:    sw s1, 124(sp)
-; RV32-NEXT:    sw zero, 112(sp)
-; RV32-NEXT:    sw s2, 116(sp)
-; RV32-NEXT:    sw zero, 104(sp)
-; RV32-NEXT:    sw s3, 108(sp)
-; RV32-NEXT:    sw zero, 96(sp)
-; RV32-NEXT:    sw s4, 100(sp)
-; RV32-NEXT:    sw zero, 88(sp)
-; RV32-NEXT:    sw s5, 92(sp)
-; RV32-NEXT:    sw zero, 80(sp)
-; RV32-NEXT:    sw s6, 84(sp)
-; RV32-NEXT:    sw zero, 72(sp)
-; RV32-NEXT:    sw s7, 76(sp)
-; RV32-NEXT:    sw zero, 64(sp)
-; RV32-NEXT:    sw s8, 68(sp)
-; RV32-NEXT:    sw zero, 56(sp)
-; RV32-NEXT:    sw s9, 60(sp)
-; RV32-NEXT:    sw zero, 48(sp)
-; RV32-NEXT:    sw s10, 52(sp)
-; RV32-NEXT:    sw zero, 40(sp)
-; RV32-NEXT:    sw s11, 44(sp)
-; RV32-NEXT:    sw zero, 32(sp)
-; RV32-NEXT:    sw ra, 36(sp)
-; RV32-NEXT:    sw zero, 24(sp)
-; RV32-NEXT:    sw t4, 28(sp)
-; RV32-NEXT:    sw zero, 16(sp)
-; RV32-NEXT:    sw t2, 20(sp)
-; RV32-NEXT:    sw zero, 8(sp)
-; RV32-NEXT:    sw t6, 12(sp)
-; RV32-NEXT:    lui a1, 61681
-; RV32-NEXT:    addi a1, a1, -241
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
-; RV32-NEXT:    vmv.v.x v28, a1
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    vmv.v.x v4, a1
-; RV32-NEXT:    addi a1, sp, 272
-; RV32-NEXT:    vsetvli a2, zero, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v0, (a1), zero
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 8
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v0, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    li a6, 56
-; RV32-NEXT:    vsrl.vi v20, v8, 24
-; RV32-NEXT:    vsrl.vx v12, v8, a6
-; RV32-NEXT:    li a5, 40
-; RV32-NEXT:    vsrl.vx v16, v8, a5
-; RV32-NEXT:    vsll.vx v24, v8, a6
-; RV32-NEXT:    addi a2, s0, -256
-; RV32-NEXT:    vand.vx v16, v16, a2
-; RV32-NEXT:    vor.vv v16, v16, v12
-; RV32-NEXT:    vand.vx v12, v8, a2
-; RV32-NEXT:    vsll.vx v12, v12, a5
-; RV32-NEXT:    vor.vv v12, v24, v12
-; RV32-NEXT:    vsrl.vi v24, v8, 8
-; RV32-NEXT:    lui a4, 4080
-; RV32-NEXT:    vand.vx v20, v20, a4
-; RV32-NEXT:    lui a7, 349525
-; RV32-NEXT:    addi a7, a7, 1365
-; RV32-NEXT:    vand.vv v24, v24, v0
-; RV32-NEXT:    vor.vv v20, v24, v20
-; RV32-NEXT:    vsetvli t0, zero, e32, m4, ta, ma
-; RV32-NEXT:    vmv.v.x v24, a7
-; RV32-NEXT:    vsetvli a7, zero, e64, m4, ta, ma
-; RV32-NEXT:    vor.vv v16, v20, v16
-; RV32-NEXT:    vand.vx v20, v8, a4
-; RV32-NEXT:    vsll.vi v20, v20, 24
-; RV32-NEXT:    vand.vv v8, v8, v0
-; RV32-NEXT:    vsll.vi v8, v8, 8
-; RV32-NEXT:    vor.vv v8, v20, v8
-; RV32-NEXT:    addi a7, sp, 264
-; RV32-NEXT:    vlse64.v v20, (a7), zero
-; RV32-NEXT:    vor.vv v8, v12, v8
-; RV32-NEXT:    addi a7, sp, 256
-; RV32-NEXT:    vlse64.v v12, (a7), zero
-; RV32-NEXT:    vor.vv v8, v8, v16
-; RV32-NEXT:    vsrl.vi v16, v8, 4
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v28, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vv v8, v8, v28
-; RV32-NEXT:    vand.vv v16, v16, v28
-; RV32-NEXT:    vsll.vi v8, v8, 4
-; RV32-NEXT:    vor.vv v8, v16, v8
-; RV32-NEXT:    vsrl.vi v16, v8, 2
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v4, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vv v8, v8, v4
-; RV32-NEXT:    vand.vv v16, v16, v4
-; RV32-NEXT:    vsll.vi v8, v8, 2
-; RV32-NEXT:    vor.vv v8, v16, v8
-; RV32-NEXT:    vsrl.vi v16, v8, 1
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v24, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vv v8, v8, v24
-; RV32-NEXT:    vand.vv v16, v16, v24
-; RV32-NEXT:    vadd.vv v8, v8, v8
-; RV32-NEXT:    vor.vv v8, v16, v8
-; RV32-NEXT:    addi a7, sp, 248
-; RV32-NEXT:    vlse64.v v16, (a7), zero
-; RV32-NEXT:    vand.vv v28, v8, v20
-; RV32-NEXT:    addi a7, sp, 240
-; RV32-NEXT:    addi t0, sp, 232
-; RV32-NEXT:    vlse64.v v20, (a7), zero
-; RV32-NEXT:    vlse64.v v24, (t0), zero
-; RV32-NEXT:    vand.vv v4, v8, v12
-; RV32-NEXT:    vand.vv v0, v8, v16
-; RV32-NEXT:    vand.vv v12, v8, v20
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vv v12, v8, v24
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    addi a7, sp, 224
-; RV32-NEXT:    addi t0, sp, 216
-; RV32-NEXT:    addi a1, sp, 208
-; RV32-NEXT:    addi a0, sp, 200
-; RV32-NEXT:    vlse64.v v12, (a7), zero
-; RV32-NEXT:    vlse64.v v16, (t0), zero
-; RV32-NEXT:    vlse64.v v20, (a1), zero
-; RV32-NEXT:    vlse64.v v24, (a0), zero
-; RV32-NEXT:    vand.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vv v12, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vv v12, v8, v20
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vv v12, v8, v24
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    addi a0, sp, 192
-; RV32-NEXT:    addi a1, sp, 184
-; RV32-NEXT:    addi a7, sp, 176
-; RV32-NEXT:    addi t0, sp, 168
-; RV32-NEXT:    vlse64.v v12, (a0), zero
-; RV32-NEXT:    vlse64.v v16, (a1), zero
-; RV32-NEXT:    vlse64.v v20, (a7), zero
-; RV32-NEXT:    vlse64.v v24, (t0), zero
-; RV32-NEXT:    vand.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vv v12, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vv v12, v8, v20
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vv v12, v8, v24
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    addi a0, sp, 160
-; RV32-NEXT:    addi a1, sp, 152
-; RV32-NEXT:    addi a7, sp, 144
-; RV32-NEXT:    addi t0, sp, 136
-; RV32-NEXT:    vlse64.v v12, (a0), zero
-; RV32-NEXT:    vlse64.v v16, (a1), zero
-; RV32-NEXT:    vlse64.v v20, (a7), zero
-; RV32-NEXT:    vlse64.v v24, (t0), zero
-; RV32-NEXT:    vand.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vv v12, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vv v12, v8, v20
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 6
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vv v12, v8, v24
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    addi a0, sp, 128
-; RV32-NEXT:    addi a1, sp, 120
-; RV32-NEXT:    addi a7, sp, 112
-; RV32-NEXT:    addi t0, sp, 104
-; RV32-NEXT:    vlse64.v v12, (a0), zero
-; RV32-NEXT:    vlse64.v v16, (a1), zero
-; RV32-NEXT:    vlse64.v v20, (a7), zero
-; RV32-NEXT:    vlse64.v v24, (t0), zero
-; RV32-NEXT:    vand.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vv v12, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vv v12, v8, v20
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vv v12, v8, v24
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    addi a0, sp, 96
-; RV32-NEXT:    addi a1, sp, 88
-; RV32-NEXT:    addi a7, sp, 80
-; RV32-NEXT:    addi t0, sp, 72
-; RV32-NEXT:    vlse64.v v12, (a0), zero
-; RV32-NEXT:    vlse64.v v16, (a1), zero
-; RV32-NEXT:    vlse64.v v20, (a7), zero
-; RV32-NEXT:    vlse64.v v24, (t0), zero
-; RV32-NEXT:    vand.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vv v12, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vv v12, v8, v20
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vv v12, v8, v24
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    addi a0, sp, 64
-; RV32-NEXT:    addi a1, sp, 56
-; RV32-NEXT:    addi a7, sp, 48
-; RV32-NEXT:    addi t0, sp, 40
-; RV32-NEXT:    vlse64.v v12, (a0), zero
-; RV32-NEXT:    vlse64.v v16, (a1), zero
-; RV32-NEXT:    vlse64.v v20, (a7), zero
-; RV32-NEXT:    vlse64.v v24, (t0), zero
-; RV32-NEXT:    vand.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vv v12, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vv v12, v8, v20
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vv v12, v8, v24
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    addi a0, sp, 32
-; RV32-NEXT:    addi a1, sp, 24
-; RV32-NEXT:    addi a7, sp, 16
-; RV32-NEXT:    addi t0, sp, 8
-; RV32-NEXT:    vlse64.v v12, (a0), zero
-; RV32-NEXT:    vlse64.v v16, (a1), zero
-; RV32-NEXT:    vlse64.v v20, (a7), zero
-; RV32-NEXT:    vlse64.v v24, (t0), zero
-; RV32-NEXT:    vand.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vv v12, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 7
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vv v12, v8, v20
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vv v12, v8, v24
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vi v12, v8, 2
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vi v12, v8, 1
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vi v12, v8, 4
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vi v12, v8, 8
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    li a0, 16
-; RV32-NEXT:    vand.vx v12, v8, a0
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    li a0, 32
-; RV32-NEXT:    vand.vx v12, v8, a0
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    li a0, 64
-; RV32-NEXT:    vand.vx v12, v8, a0
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    li a0, 128
-; RV32-NEXT:    vand.vx v12, v8, a0
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    li a0, 256
-; RV32-NEXT:    vand.vx v12, v8, a0
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    li a0, 512
-; RV32-NEXT:    vand.vx v12, v8, a0
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vx v12, v8, t1
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vx v12, v8, a3
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    vand.vx v12, v8, a0
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vx v12, v8, t3
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    lui a0, 4
-; RV32-NEXT:    vand.vx v12, v8, a0
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 6
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vx v12, v8, t5
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vx v12, v8, s0
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vx v12, v8, s1
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vx v12, v8, s2
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vx v12, v8, s3
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vx v12, v8, s4
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vx v12, v8, s5
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vx v12, v8, s6
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vx v12, v8, s7
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vx v12, v8, s8
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vx v12, v8, s9
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vx v12, v8, s10
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vx v12, v8, s11
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vx v12, v8, ra
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vx v12, v8, t4
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vx v12, v8, t2
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    addi a0, sp, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vmul.vv v16, v8, v28
-; RV32-NEXT:    vmul.vv v20, v8, v4
-; RV32-NEXT:    vmul.vv v24, v8, v0
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v28, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vmul.vv v28, v8, v28
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vmul.vv v4, v8, v4
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v0, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vmul.vv v0, v8, v0
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 6
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 6
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 7
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vmul.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v8, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v8, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vi v8, v8, 0
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 6
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    addi a0, sp, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    vxor.vv v8, v8, v16
-; RV32-NEXT:    vxor.vv v8, v8, v20
-; RV32-NEXT:    vxor.vv v8, v8, v24
-; RV32-NEXT:    vxor.vv v8, v8, v28
-; RV32-NEXT:    vxor.vv v8, v8, v4
-; RV32-NEXT:    vxor.vv v8, v8, v0
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 6
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    vsrl.vx v12, v8, a6
-; RV32-NEXT:    vsrl.vx v16, v8, a5
-; RV32-NEXT:    vsrl.vi v20, v8, 24
-; RV32-NEXT:    vand.vx v16, v16, a2
-; RV32-NEXT:    vor.vv v12, v16, v12
-; RV32-NEXT:    vsrl.vi v16, v8, 8
-; RV32-NEXT:    vand.vx v20, v20, a4
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 8
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vand.vv v16, v16, v24
-; RV32-NEXT:    vor.vv v16, v16, v20
-; RV32-NEXT:    vand.vx v20, v8, a4
-; RV32-NEXT:    vand.vv v24, v8, v24
-; RV32-NEXT:    vsll.vi v24, v24, 8
-; RV32-NEXT:    vsll.vi v20, v20, 24
-; RV32-NEXT:    vor.vv v20, v20, v24
-; RV32-NEXT:    vsll.vx v24, v8, a6
-; RV32-NEXT:    vand.vx v8, v8, a2
-; RV32-NEXT:    vsll.vx v8, v8, a5
-; RV32-NEXT:    vor.vv v8, v24, v8
-; RV32-NEXT:    vor.vv v12, v16, v12
-; RV32-NEXT:    vor.vv v8, v8, v20
-; RV32-NEXT:    vor.vv v8, v8, v12
-; RV32-NEXT:    vsrl.vi v12, v8, 4
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vand.vv v8, v8, v16
-; RV32-NEXT:    vand.vv v12, v12, v16
-; RV32-NEXT:    vsll.vi v8, v8, 4
-; RV32-NEXT:    vor.vv v8, v12, v8
-; RV32-NEXT:    vsrl.vi v12, v8, 2
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vand.vv v8, v8, v16
-; RV32-NEXT:    vand.vv v12, v12, v16
-; RV32-NEXT:    vsll.vi v8, v8, 2
-; RV32-NEXT:    vor.vv v8, v12, v8
-; RV32-NEXT:    vsrl.vi v12, v8, 1
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vand.vv v8, v8, v16
-; RV32-NEXT:    vand.vv v12, v12, v16
-; RV32-NEXT:    vadd.vv v8, v8, v8
-; RV32-NEXT:    vor.vv v8, v12, v8
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 6
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add sp, sp, a0
-; RV32-NEXT:    lw ra, 348(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s0, 344(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s1, 340(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s2, 336(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s3, 332(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s4, 328(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s5, 324(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s6, 320(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s7, 316(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s8, 312(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s9, 308(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s10, 304(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s11, 300(sp) # 4-byte Folded Reload
-; RV32-NEXT:    addi sp, sp, 352
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: clmulr_nxv4i64:
-; RV64:       # %bb.0:
-; RV64-NEXT:    addi sp, sp, -240
-; RV64-NEXT:    sd ra, 232(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s0, 224(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s1, 216(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s2, 208(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s3, 200(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s4, 192(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s5, 184(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s6, 176(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s7, 168(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s8, 160(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s9, 152(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s10, 144(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s11, 136(sp) # 8-byte Folded Spill
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    sub sp, sp, a0
-; RV64-NEXT:    li t0, 40
-; RV64-NEXT:    lui a7, 16
-; RV64-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; RV64-NEXT:    vsrl.vi v20, v8, 24
-; RV64-NEXT:    vsrl.vi v12, v8, 8
-; RV64-NEXT:    li t2, 255
-; RV64-NEXT:    lui a3, 61681
-; RV64-NEXT:    lui a4, 209715
-; RV64-NEXT:    lui a5, 349525
-; RV64-NEXT:    li a2, 16
-; RV64-NEXT:    li a1, 32
-; RV64-NEXT:    li a0, 64
-; RV64-NEXT:    li s9, 1
-; RV64-NEXT:    li a6, 56
-; RV64-NEXT:    vsrl.vx v16, v8, a6
-; RV64-NEXT:    vsrl.vx v28, v8, t0
-; RV64-NEXT:    addi t6, a7, -256
-; RV64-NEXT:    lui a7, 4080
-; RV64-NEXT:    vand.vx v24, v20, a7
-; RV64-NEXT:    slli t2, t2, 24
-; RV64-NEXT:    vand.vx v4, v8, a7
-; RV64-NEXT:    vsll.vx v20, v8, a6
-; RV64-NEXT:    addi a7, a3, -241
-; RV64-NEXT:    addi a6, a4, 819
-; RV64-NEXT:    addi a5, a5, 1365
-; RV64-NEXT:    slli a3, s9, 11
-; RV64-NEXT:    sd a3, 112(sp) # 8-byte Folded Spill
-; RV64-NEXT:    slli a3, s9, 31
-; RV64-NEXT:    sd a3, 104(sp) # 8-byte Folded Spill
-; RV64-NEXT:    slli a3, s9, 32
-; RV64-NEXT:    sd a3, 96(sp) # 8-byte Folded Spill
-; RV64-NEXT:    slli a3, s9, 33
-; RV64-NEXT:    sd a3, 88(sp) # 8-byte Folded Spill
-; RV64-NEXT:    slli a3, s9, 34
-; RV64-NEXT:    sd a3, 80(sp) # 8-byte Folded Spill
-; RV64-NEXT:    slli a3, s9, 35
-; RV64-NEXT:    sd a3, 72(sp) # 8-byte Folded Spill
-; RV64-NEXT:    slli a3, s9, 36
-; RV64-NEXT:    sd a3, 64(sp) # 8-byte Folded Spill
-; RV64-NEXT:    slli a3, s9, 37
-; RV64-NEXT:    sd a3, 56(sp) # 8-byte Folded Spill
-; RV64-NEXT:    slli a3, s9, 38
-; RV64-NEXT:    sd a3, 48(sp) # 8-byte Folded Spill
-; RV64-NEXT:    slli a3, s9, 39
-; RV64-NEXT:    sd a3, 40(sp) # 8-byte Folded Spill
-; RV64-NEXT:    slli a3, s9, 40
-; RV64-NEXT:    sd a3, 32(sp) # 8-byte Folded Spill
-; RV64-NEXT:    slli a3, s9, 41
-; RV64-NEXT:    sd a3, 24(sp) # 8-byte Folded Spill
-; RV64-NEXT:    slli s6, s9, 42
-; RV64-NEXT:    slli s7, s9, 43
-; RV64-NEXT:    slli a3, a7, 32
-; RV64-NEXT:    add a7, a7, a3
-; RV64-NEXT:    slli a3, a6, 32
-; RV64-NEXT:    add a6, a6, a3
-; RV64-NEXT:    slli a3, a5, 32
-; RV64-NEXT:    add a5, a5, a3
-; RV64-NEXT:    slli s8, s9, 44
-; RV64-NEXT:    vand.vx v28, v28, t6
-; RV64-NEXT:    vand.vx v12, v12, t2
-; RV64-NEXT:    vsll.vi v4, v4, 24
-; RV64-NEXT:    vand.vx v0, v8, t2
-; RV64-NEXT:    vand.vx v8, v8, t6
-; RV64-NEXT:    vor.vv v16, v28, v16
-; RV64-NEXT:    vor.vv v12, v12, v24
-; RV64-NEXT:    vsll.vi v24, v0, 8
-; RV64-NEXT:    vsll.vx v8, v8, t0
-; RV64-NEXT:    vor.vv v12, v12, v16
-; RV64-NEXT:    vor.vv v16, v4, v24
-; RV64-NEXT:    vor.vv v8, v20, v8
-; RV64-NEXT:    vor.vv v8, v8, v16
-; RV64-NEXT:    vor.vv v8, v8, v12
-; RV64-NEXT:    vsrl.vi v12, v8, 4
-; RV64-NEXT:    vand.vx v8, v8, a7
-; RV64-NEXT:    vand.vx v12, v12, a7
-; RV64-NEXT:    vsll.vi v8, v8, 4
-; RV64-NEXT:    vor.vv v8, v12, v8
-; RV64-NEXT:    vsrl.vi v12, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a6
-; RV64-NEXT:    vand.vx v12, v12, a6
-; RV64-NEXT:    vsll.vi v8, v8, 2
-; RV64-NEXT:    vor.vv v8, v12, v8
-; RV64-NEXT:    vsrl.vi v12, v8, 1
-; RV64-NEXT:    vand.vx v8, v8, a5
-; RV64-NEXT:    vand.vx v12, v12, a5
-; RV64-NEXT:    vadd.vv v8, v8, v8
-; RV64-NEXT:    vor.vv v8, v12, v8
-; RV64-NEXT:    vand.vx v12, v8, a2
-; RV64-NEXT:    slli s10, s9, 45
-; RV64-NEXT:    vand.vx v16, v8, a1
-; RV64-NEXT:    slli s11, s9, 46
-; RV64-NEXT:    vand.vx v20, v8, a0
-; RV64-NEXT:    slli ra, s9, 47
-; RV64-NEXT:    slli s4, s9, 48
-; RV64-NEXT:    slli s3, s9, 49
-; RV64-NEXT:    slli s2, s9, 50
-; RV64-NEXT:    slli s1, s9, 51
-; RV64-NEXT:    slli s0, s9, 52
-; RV64-NEXT:    slli t5, s9, 53
-; RV64-NEXT:    slli t4, s9, 54
-; RV64-NEXT:    slli t3, s9, 55
-; RV64-NEXT:    slli t1, s9, 56
-; RV64-NEXT:    slli t0, s9, 57
-; RV64-NEXT:    slli a4, s9, 58
-; RV64-NEXT:    slli a3, s9, 59
-; RV64-NEXT:    slli a2, s9, 60
-; RV64-NEXT:    slli a1, s9, 61
-; RV64-NEXT:    slli s9, s9, 62
-; RV64-NEXT:    li a0, -1
-; RV64-NEXT:    slli a0, a0, 63
-; RV64-NEXT:    vand.vi v24, v8, 2
-; RV64-NEXT:    vand.vi v28, v8, 1
-; RV64-NEXT:    vand.vi v4, v8, 4
-; RV64-NEXT:    vand.vi v0, v8, 8
-; RV64-NEXT:    vmul.vv v24, v8, v24
-; RV64-NEXT:    vmul.vv v28, v8, v28
-; RV64-NEXT:    sd a5, 8(sp) # 8-byte Folded Spill
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 3
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 128
-; RV64-NEXT:    vs4r.v v28, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    vmul.vv v28, v8, v4
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 3
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 128
-; RV64-NEXT:    vs4r.v v28, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    vmul.vv v0, v8, v0
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 5
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 128
-; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    vmul.vv v12, v8, v16
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 128
-; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    vmul.vv v12, v8, v20
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 3
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 128
-; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    li s5, 128
-; RV64-NEXT:    vand.vx v12, v8, s5
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 128
-; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    li s5, 256
-; RV64-NEXT:    vand.vx v12, v8, s5
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 4
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 128
-; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    li s5, 512
-; RV64-NEXT:    vand.vx v12, v8, s5
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 3
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 128
-; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    li s5, 1024
-; RV64-NEXT:    vand.vx v12, v8, s5
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 3
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 3
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 128
-; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    ld s5, 112(sp) # 8-byte Folded Reload
-; RV64-NEXT:    vand.vx v12, v8, s5
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 4
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 128
-; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    lui s5, 1
-; RV64-NEXT:    vand.vx v12, v8, s5
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 6
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 128
-; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    lui s5, 2
-; RV64-NEXT:    vand.vx v12, v8, s5
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 128
-; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    lui s5, 4
-; RV64-NEXT:    vand.vx v12, v8, s5
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 3
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 128
-; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    lui s5, 8
-; RV64-NEXT:    vand.vx v12, v8, s5
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 128
-; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    lui s5, 16
-; RV64-NEXT:    vand.vx v12, v8, s5
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 4
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 128
-; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    lui s5, 32
-; RV64-NEXT:    vand.vx v12, v8, s5
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 128
-; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    lui s5, 64
-; RV64-NEXT:    vand.vx v12, v8, s5
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 3
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 128
-; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    lui s5, 128
-; RV64-NEXT:    vand.vx v12, v8, s5
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 3
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 128
-; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    lui s5, 256
-; RV64-NEXT:    vand.vx v12, v8, s5
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 5
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 128
-; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    lui s5, 512
-; RV64-NEXT:    vand.vx v12, v8, s5
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 3
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 128
-; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    lui s5, 1024
-; RV64-NEXT:    vand.vx v12, v8, s5
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 3
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 3
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 128
-; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    lui s5, 2048
-; RV64-NEXT:    vand.vx v12, v8, s5
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 3
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 128
-; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    lui s5, 4096
-; RV64-NEXT:    vand.vx v12, v8, s5
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 4
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 3
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 128
-; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    lui s5, 8192
-; RV64-NEXT:    vand.vx v12, v8, s5
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 3
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 4
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 128
-; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    lui s5, 16384
-; RV64-NEXT:    vand.vx v12, v8, s5
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 4
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 128
-; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    lui s5, 32768
-; RV64-NEXT:    vand.vx v12, v8, s5
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 5
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 128
-; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    lui s5, 65536
-; RV64-NEXT:    vand.vx v12, v8, s5
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 7
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 128
-; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    lui s5, 131072
-; RV64-NEXT:    vand.vx v12, v8, s5
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 128
-; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    lui s5, 262144
-; RV64-NEXT:    vand.vx v12, v8, s5
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 3
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 128
-; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    ld s5, 104(sp) # 8-byte Folded Reload
-; RV64-NEXT:    vand.vx v12, v8, s5
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 128
-; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    ld s5, 96(sp) # 8-byte Folded Reload
-; RV64-NEXT:    vand.vx v12, v8, s5
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 4
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 128
-; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    ld s5, 88(sp) # 8-byte Folded Reload
-; RV64-NEXT:    vand.vx v12, v8, s5
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 128
-; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    ld s5, 80(sp) # 8-byte Folded Reload
-; RV64-NEXT:    vand.vx v12, v8, s5
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 3
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 128
-; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    ld s5, 72(sp) # 8-byte Folded Reload
-; RV64-NEXT:    vand.vx v12, v8, s5
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 3
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 128
-; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    ld s5, 64(sp) # 8-byte Folded Reload
-; RV64-NEXT:    vand.vx v12, v8, s5
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 5
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 128
-; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    ld s5, 56(sp) # 8-byte Folded Reload
-; RV64-NEXT:    vand.vx v12, v8, s5
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 128
-; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    ld s5, 48(sp) # 8-byte Folded Reload
-; RV64-NEXT:    vand.vx v12, v8, s5
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 3
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 128
-; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    ld s5, 40(sp) # 8-byte Folded Reload
-; RV64-NEXT:    vand.vx v12, v8, s5
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 128
-; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    ld s5, 32(sp) # 8-byte Folded Reload
-; RV64-NEXT:    vand.vx v12, v8, s5
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 4
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 128
-; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    ld s5, 24(sp) # 8-byte Folded Reload
-; RV64-NEXT:    vand.vx v12, v8, s5
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr s5, vlenb
-; RV64-NEXT:    slli s5, s5, 2
-; RV64-NEXT:    mv a5, s5
-; RV64-NEXT:    slli s5, s5, 1
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    slli s5, s5, 3
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    ld a5, 8(sp) # 8-byte Folded Reload
-; RV64-NEXT:    add s5, sp, s5
-; RV64-NEXT:    addi s5, s5, 128
-; RV64-NEXT:    vs4r.v v12, (s5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    vand.vx v12, v8, s6
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr s5, vlenb
-; RV64-NEXT:    slli s5, s5, 3
-; RV64-NEXT:    mv s6, s5
-; RV64-NEXT:    slli s5, s5, 3
-; RV64-NEXT:    add s5, s5, s6
-; RV64-NEXT:    add s5, sp, s5
-; RV64-NEXT:    addi s5, s5, 128
-; RV64-NEXT:    vs4r.v v12, (s5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    vand.vx v12, v8, s7
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr s5, vlenb
-; RV64-NEXT:    slli s5, s5, 2
-; RV64-NEXT:    mv s6, s5
-; RV64-NEXT:    slli s5, s5, 4
-; RV64-NEXT:    add s5, s5, s6
-; RV64-NEXT:    add s5, sp, s5
-; RV64-NEXT:    addi s5, s5, 128
-; RV64-NEXT:    vs4r.v v12, (s5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    vand.vx v12, v8, s8
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr s5, vlenb
-; RV64-NEXT:    slli s5, s5, 6
-; RV64-NEXT:    add s5, sp, s5
-; RV64-NEXT:    addi s5, s5, 128
-; RV64-NEXT:    vs4r.v v12, (s5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    vand.vx v12, v8, s10
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr s5, vlenb
-; RV64-NEXT:    slli s5, s5, 2
-; RV64-NEXT:    mv s6, s5
-; RV64-NEXT:    slli s5, s5, 1
-; RV64-NEXT:    add s6, s6, s5
-; RV64-NEXT:    slli s5, s5, 1
-; RV64-NEXT:    add s6, s6, s5
-; RV64-NEXT:    slli s5, s5, 1
-; RV64-NEXT:    add s5, s5, s6
-; RV64-NEXT:    add s5, sp, s5
-; RV64-NEXT:    addi s5, s5, 128
-; RV64-NEXT:    vs4r.v v12, (s5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    vand.vx v12, v8, s11
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr s5, vlenb
-; RV64-NEXT:    slli s5, s5, 3
-; RV64-NEXT:    mv s6, s5
-; RV64-NEXT:    slli s5, s5, 1
-; RV64-NEXT:    add s6, s6, s5
-; RV64-NEXT:    slli s5, s5, 1
-; RV64-NEXT:    add s5, s5, s6
-; RV64-NEXT:    add s5, sp, s5
-; RV64-NEXT:    addi s5, s5, 128
-; RV64-NEXT:    vs4r.v v12, (s5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    vand.vx v12, v8, ra
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr s5, vlenb
-; RV64-NEXT:    slli s5, s5, 4
-; RV64-NEXT:    mv s6, s5
-; RV64-NEXT:    slli s5, s5, 1
-; RV64-NEXT:    add s5, s5, s6
-; RV64-NEXT:    add s5, sp, s5
-; RV64-NEXT:    addi s5, s5, 128
-; RV64-NEXT:    vs4r.v v12, (s5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    vand.vx v12, v8, s4
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr s4, vlenb
-; RV64-NEXT:    slli s4, s4, 3
-; RV64-NEXT:    mv s5, s4
-; RV64-NEXT:    slli s4, s4, 2
-; RV64-NEXT:    add s4, s4, s5
-; RV64-NEXT:    add s4, sp, s4
-; RV64-NEXT:    addi s4, s4, 128
-; RV64-NEXT:    vs4r.v v12, (s4) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    vand.vx v12, v8, s3
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr s3, vlenb
-; RV64-NEXT:    slli s3, s3, 2
-; RV64-NEXT:    mv s4, s3
-; RV64-NEXT:    slli s3, s3, 2
-; RV64-NEXT:    add s3, s3, s4
-; RV64-NEXT:    add s3, sp, s3
-; RV64-NEXT:    addi s3, s3, 128
-; RV64-NEXT:    vs4r.v v12, (s3) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    vand.vx v12, v8, s2
-; RV64-NEXT:    vmul.vv v4, v8, v12
-; RV64-NEXT:    vand.vx v12, v8, s1
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr s1, vlenb
-; RV64-NEXT:    slli s1, s1, 2
-; RV64-NEXT:    mv s2, s1
-; RV64-NEXT:    slli s1, s1, 2
-; RV64-NEXT:    add s2, s2, s1
-; RV64-NEXT:    slli s1, s1, 1
-; RV64-NEXT:    add s1, s1, s2
-; RV64-NEXT:    add s1, sp, s1
-; RV64-NEXT:    addi s1, s1, 128
-; RV64-NEXT:    vs4r.v v12, (s1) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    vand.vx v12, v8, s0
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr s0, vlenb
-; RV64-NEXT:    slli s0, s0, 2
-; RV64-NEXT:    mv s1, s0
-; RV64-NEXT:    slli s0, s0, 1
-; RV64-NEXT:    add s1, s1, s0
-; RV64-NEXT:    slli s0, s0, 2
-; RV64-NEXT:    add s0, s0, s1
-; RV64-NEXT:    add s0, sp, s0
-; RV64-NEXT:    addi s0, s0, 128
-; RV64-NEXT:    vs4r.v v12, (s0) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    vand.vx v12, v8, t5
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr t5, vlenb
-; RV64-NEXT:    slli t5, t5, 2
-; RV64-NEXT:    mv s0, t5
-; RV64-NEXT:    slli t5, t5, 3
-; RV64-NEXT:    add t5, t5, s0
-; RV64-NEXT:    add t5, sp, t5
-; RV64-NEXT:    addi t5, t5, 128
-; RV64-NEXT:    vs4r.v v12, (t5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    vand.vx v12, v8, t4
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr t4, vlenb
-; RV64-NEXT:    slli t4, t4, 4
-; RV64-NEXT:    add t4, sp, t4
-; RV64-NEXT:    addi t4, t4, 128
-; RV64-NEXT:    vs4r.v v12, (t4) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    vand.vx v12, v8, t3
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr t3, vlenb
-; RV64-NEXT:    slli t3, t3, 2
-; RV64-NEXT:    add t3, sp, t3
-; RV64-NEXT:    addi t3, t3, 128
-; RV64-NEXT:    vs4r.v v12, (t3) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    vand.vx v12, v8, t1
-; RV64-NEXT:    vmul.vv v20, v8, v12
-; RV64-NEXT:    vand.vx v12, v8, t0
-; RV64-NEXT:    vmul.vv v16, v8, v12
-; RV64-NEXT:    vand.vx v12, v8, a4
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a4, vlenb
-; RV64-NEXT:    slli a4, a4, 5
-; RV64-NEXT:    add a4, sp, a4
-; RV64-NEXT:    addi a4, a4, 128
-; RV64-NEXT:    vs4r.v v12, (a4) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    vand.vx v12, v8, a3
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a3, vlenb
-; RV64-NEXT:    slli a3, a3, 2
-; RV64-NEXT:    mv a4, a3
-; RV64-NEXT:    slli a3, a3, 1
-; RV64-NEXT:    add a4, a4, a3
-; RV64-NEXT:    slli a3, a3, 1
-; RV64-NEXT:    add a3, a3, a4
-; RV64-NEXT:    add a3, sp, a3
-; RV64-NEXT:    addi a3, a3, 128
-; RV64-NEXT:    vs4r.v v12, (a3) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    vand.vx v12, v8, a2
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    slli a2, a2, 2
-; RV64-NEXT:    mv a3, a2
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    add a2, a2, a3
-; RV64-NEXT:    add a2, sp, a2
-; RV64-NEXT:    addi a2, a2, 128
-; RV64-NEXT:    vs4r.v v12, (a2) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    vand.vx v12, v8, a1
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    slli a1, a1, 3
-; RV64-NEXT:    mv a2, a1
-; RV64-NEXT:    slli a1, a1, 1
-; RV64-NEXT:    add a1, a1, a2
-; RV64-NEXT:    add a1, sp, a1
-; RV64-NEXT:    addi a1, a1, 128
-; RV64-NEXT:    vs4r.v v12, (a1) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    vand.vx v12, v8, s9
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    slli a1, a1, 3
-; RV64-NEXT:    add a1, sp, a1
-; RV64-NEXT:    addi a1, a1, 128
-; RV64-NEXT:    vs4r.v v12, (a1) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    vand.vx v12, v8, a0
-; RV64-NEXT:    vmul.vv v8, v8, v12
-; RV64-NEXT:    addi a0, sp, 128
-; RV64-NEXT:    vs4r.v v8, (a0) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl4r.v v8, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v8, v8, v24
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v8, v8, v12
-; RV64-NEXT:    vxor.vv v8, v8, v0
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 5
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v8, v8, v12
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v8, v8, v12
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v8, v8, v12
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v8, v8, v12
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 4
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v12, v8, v12
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v12, v12, v24
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v12, v12, v24
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 4
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v12, v12, v24
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 6
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v12, v12, v24
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v12, v12, v24
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v12, v12, v24
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v12, v12, v24
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 4
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v12, v12, v24
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v12, v12, v24
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v12, v12, v24
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v12, v12, v24
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 5
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v12, v12, v24
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v12, v12, v24
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v12, v12, v24
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v12, v12, v24
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 4
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v12, v24
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 4
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v24
-; RV64-NEXT:    li a1, 56
-; RV64-NEXT:    vsll.vx v8, v8, a1
-; RV64-NEXT:    vand.vx v12, v12, t6
-; RV64-NEXT:    li a0, 40
-; RV64-NEXT:    vsll.vx v12, v12, a0
-; RV64-NEXT:    vor.vv v12, v8, v12
-; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    slli a2, a2, 2
-; RV64-NEXT:    mv a3, a2
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    add a3, a3, a2
-; RV64-NEXT:    slli a2, a2, 4
-; RV64-NEXT:    add a2, a2, a3
-; RV64-NEXT:    add a2, sp, a2
-; RV64-NEXT:    addi a2, a2, 128
-; RV64-NEXT:    vl4r.v v8, (a2) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v8, v0, v8
-; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    slli a2, a2, 2
-; RV64-NEXT:    mv a3, a2
-; RV64-NEXT:    slli a2, a2, 5
-; RV64-NEXT:    add a2, a2, a3
-; RV64-NEXT:    add a2, sp, a2
-; RV64-NEXT:    addi a2, a2, 128
-; RV64-NEXT:    vl4r.v v24, (a2) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v8, v8, v24
-; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    slli a2, a2, 7
-; RV64-NEXT:    add a2, sp, a2
-; RV64-NEXT:    addi a2, a2, 128
-; RV64-NEXT:    vl4r.v v24, (a2) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v8, v8, v24
-; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    slli a2, a2, 2
-; RV64-NEXT:    mv a3, a2
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    add a3, a3, a2
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    add a3, a3, a2
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    add a3, a3, a2
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    add a2, a2, a3
-; RV64-NEXT:    add a2, sp, a2
-; RV64-NEXT:    addi a2, a2, 128
-; RV64-NEXT:    vl4r.v v24, (a2) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v8, v8, v24
-; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    slli a2, a2, 3
-; RV64-NEXT:    mv a3, a2
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    add a3, a3, a2
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    add a3, a3, a2
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    add a2, a2, a3
-; RV64-NEXT:    add a2, sp, a2
-; RV64-NEXT:    addi a2, a2, 128
-; RV64-NEXT:    vl4r.v v24, (a2) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v8, v8, v24
-; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    slli a2, a2, 2
-; RV64-NEXT:    mv a3, a2
-; RV64-NEXT:    slli a2, a2, 2
-; RV64-NEXT:    add a3, a3, a2
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    add a3, a3, a2
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    add a2, a2, a3
-; RV64-NEXT:    add a2, sp, a2
-; RV64-NEXT:    addi a2, a2, 128
-; RV64-NEXT:    vl4r.v v24, (a2) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v8, v8, v24
-; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    slli a2, a2, 4
-; RV64-NEXT:    mv a3, a2
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    add a3, a3, a2
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    add a2, a2, a3
-; RV64-NEXT:    add a2, sp, a2
-; RV64-NEXT:    addi a2, a2, 128
-; RV64-NEXT:    vl4r.v v24, (a2) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v8, v8, v24
-; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    slli a2, a2, 2
-; RV64-NEXT:    mv a3, a2
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    add a3, a3, a2
-; RV64-NEXT:    slli a2, a2, 2
-; RV64-NEXT:    add a3, a3, a2
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    add a2, a2, a3
-; RV64-NEXT:    add a2, sp, a2
-; RV64-NEXT:    addi a2, a2, 128
-; RV64-NEXT:    vl4r.v v24, (a2) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v8, v8, v24
-; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    slli a2, a2, 3
-; RV64-NEXT:    mv a3, a2
-; RV64-NEXT:    slli a2, a2, 2
-; RV64-NEXT:    add a3, a3, a2
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    add a2, a2, a3
-; RV64-NEXT:    add a2, sp, a2
-; RV64-NEXT:    addi a2, a2, 128
-; RV64-NEXT:    vl4r.v v24, (a2) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v8, v8, v24
-; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    slli a2, a2, 2
-; RV64-NEXT:    mv a3, a2
-; RV64-NEXT:    slli a2, a2, 3
-; RV64-NEXT:    add a3, a3, a2
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    add a2, a2, a3
-; RV64-NEXT:    add a2, sp, a2
-; RV64-NEXT:    addi a2, a2, 128
-; RV64-NEXT:    vl4r.v v24, (a2) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v8, v8, v24
-; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    slli a2, a2, 5
-; RV64-NEXT:    mv a3, a2
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    add a2, a2, a3
-; RV64-NEXT:    add a2, sp, a2
-; RV64-NEXT:    addi a2, a2, 128
-; RV64-NEXT:    vl4r.v v24, (a2) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v8, v8, v24
-; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    slli a2, a2, 2
-; RV64-NEXT:    mv a3, a2
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    add a3, a3, a2
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    add a3, a3, a2
-; RV64-NEXT:    slli a2, a2, 2
-; RV64-NEXT:    add a2, a2, a3
-; RV64-NEXT:    add a2, sp, a2
-; RV64-NEXT:    addi a2, a2, 128
-; RV64-NEXT:    vl4r.v v24, (a2) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v8, v8, v24
-; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    slli a2, a2, 3
-; RV64-NEXT:    mv a3, a2
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    add a3, a3, a2
-; RV64-NEXT:    slli a2, a2, 2
-; RV64-NEXT:    add a2, a2, a3
-; RV64-NEXT:    add a2, sp, a2
-; RV64-NEXT:    addi a2, a2, 128
-; RV64-NEXT:    vl4r.v v24, (a2) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v8, v8, v24
-; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    slli a2, a2, 2
-; RV64-NEXT:    mv a3, a2
-; RV64-NEXT:    slli a2, a2, 2
-; RV64-NEXT:    add a3, a3, a2
-; RV64-NEXT:    slli a2, a2, 2
-; RV64-NEXT:    add a2, a2, a3
-; RV64-NEXT:    add a2, sp, a2
-; RV64-NEXT:    addi a2, a2, 128
-; RV64-NEXT:    vl4r.v v24, (a2) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v8, v8, v24
-; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    slli a2, a2, 4
-; RV64-NEXT:    mv a3, a2
-; RV64-NEXT:    slli a2, a2, 2
-; RV64-NEXT:    add a2, a2, a3
-; RV64-NEXT:    add a2, sp, a2
-; RV64-NEXT:    addi a2, a2, 128
-; RV64-NEXT:    vl4r.v v24, (a2) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v8, v24
-; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    slli a2, a2, 2
-; RV64-NEXT:    mv a3, a2
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    add a3, a3, a2
-; RV64-NEXT:    slli a2, a2, 3
-; RV64-NEXT:    add a2, a2, a3
-; RV64-NEXT:    add a2, sp, a2
-; RV64-NEXT:    addi a2, a2, 128
-; RV64-NEXT:    vl4r.v v24, (a2) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v24
-; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    slli a2, a2, 3
-; RV64-NEXT:    mv a3, a2
-; RV64-NEXT:    slli a2, a2, 3
-; RV64-NEXT:    add a2, a2, a3
-; RV64-NEXT:    add a2, sp, a2
-; RV64-NEXT:    addi a2, a2, 128
-; RV64-NEXT:    vl4r.v v24, (a2) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v24
-; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    slli a2, a2, 2
-; RV64-NEXT:    mv a3, a2
-; RV64-NEXT:    slli a2, a2, 4
-; RV64-NEXT:    add a2, a2, a3
-; RV64-NEXT:    add a2, sp, a2
-; RV64-NEXT:    addi a2, a2, 128
-; RV64-NEXT:    vl4r.v v24, (a2) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v24
-; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    slli a2, a2, 6
-; RV64-NEXT:    add a2, sp, a2
-; RV64-NEXT:    addi a2, a2, 128
-; RV64-NEXT:    vl4r.v v24, (a2) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v24
-; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    slli a2, a2, 2
-; RV64-NEXT:    mv a3, a2
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    add a3, a3, a2
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    add a3, a3, a2
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    add a2, a2, a3
-; RV64-NEXT:    add a2, sp, a2
-; RV64-NEXT:    addi a2, a2, 128
-; RV64-NEXT:    vl4r.v v24, (a2) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v24
-; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    slli a2, a2, 3
-; RV64-NEXT:    mv a3, a2
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    add a3, a3, a2
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    add a2, a2, a3
-; RV64-NEXT:    add a2, sp, a2
-; RV64-NEXT:    addi a2, a2, 128
-; RV64-NEXT:    vl4r.v v24, (a2) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v24
-; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    slli a2, a2, 4
-; RV64-NEXT:    mv a3, a2
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    add a2, a2, a3
-; RV64-NEXT:    add a2, sp, a2
-; RV64-NEXT:    addi a2, a2, 128
-; RV64-NEXT:    vl4r.v v24, (a2) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v24
-; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    slli a2, a2, 3
-; RV64-NEXT:    mv a3, a2
-; RV64-NEXT:    slli a2, a2, 2
-; RV64-NEXT:    add a2, a2, a3
-; RV64-NEXT:    add a2, sp, a2
-; RV64-NEXT:    addi a2, a2, 128
-; RV64-NEXT:    vl4r.v v24, (a2) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v24, v0, v24
-; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    slli a2, a2, 2
-; RV64-NEXT:    mv a3, a2
-; RV64-NEXT:    slli a2, a2, 2
-; RV64-NEXT:    add a2, a2, a3
-; RV64-NEXT:    add a2, sp, a2
-; RV64-NEXT:    addi a2, a2, 128
-; RV64-NEXT:    vl4r.v v28, (a2) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v24, v24, v28
-; RV64-NEXT:    vxor.vv v24, v24, v4
-; RV64-NEXT:    vsrl.vi v4, v8, 8
-; RV64-NEXT:    vand.vx v4, v4, t2
-; RV64-NEXT:    vsrl.vi v0, v0, 24
-; RV64-NEXT:    lui a2, 4080
-; RV64-NEXT:    vand.vx v0, v0, a2
-; RV64-NEXT:    vor.vv v4, v4, v0
-; RV64-NEXT:    csrr a3, vlenb
-; RV64-NEXT:    slli a3, a3, 2
-; RV64-NEXT:    mv a4, a3
-; RV64-NEXT:    slli a3, a3, 2
-; RV64-NEXT:    add a4, a4, a3
-; RV64-NEXT:    slli a3, a3, 1
-; RV64-NEXT:    add a3, a3, a4
-; RV64-NEXT:    add a3, sp, a3
-; RV64-NEXT:    addi a3, a3, 128
-; RV64-NEXT:    vl4r.v v0, (a3) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v24, v24, v0
-; RV64-NEXT:    csrr a3, vlenb
-; RV64-NEXT:    slli a3, a3, 2
-; RV64-NEXT:    mv a4, a3
-; RV64-NEXT:    slli a3, a3, 1
-; RV64-NEXT:    add a4, a4, a3
-; RV64-NEXT:    slli a3, a3, 2
-; RV64-NEXT:    add a3, a3, a4
-; RV64-NEXT:    add a3, sp, a3
-; RV64-NEXT:    addi a3, a3, 128
-; RV64-NEXT:    vl4r.v v0, (a3) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v24, v24, v0
-; RV64-NEXT:    csrr a3, vlenb
-; RV64-NEXT:    slli a3, a3, 2
-; RV64-NEXT:    mv a4, a3
-; RV64-NEXT:    slli a3, a3, 3
-; RV64-NEXT:    add a3, a3, a4
-; RV64-NEXT:    add a3, sp, a3
-; RV64-NEXT:    addi a3, a3, 128
-; RV64-NEXT:    vl4r.v v0, (a3) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v24, v24, v0
-; RV64-NEXT:    csrr a3, vlenb
-; RV64-NEXT:    slli a3, a3, 4
-; RV64-NEXT:    add a3, sp, a3
-; RV64-NEXT:    addi a3, a3, 128
-; RV64-NEXT:    vl4r.v v28, (a3) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v24, v24, v28
-; RV64-NEXT:    csrr a3, vlenb
-; RV64-NEXT:    slli a3, a3, 2
-; RV64-NEXT:    add a3, sp, a3
-; RV64-NEXT:    addi a3, a3, 128
-; RV64-NEXT:    vl4r.v v28, (a3) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v24, v24, v28
-; RV64-NEXT:    vxor.vv v20, v24, v20
-; RV64-NEXT:    vxor.vv v16, v20, v16
-; RV64-NEXT:    vand.vx v8, v8, a2
-; RV64-NEXT:    vsll.vi v8, v8, 24
-; RV64-NEXT:    vand.vx v20, v24, t2
-; RV64-NEXT:    vsll.vi v20, v20, 8
-; RV64-NEXT:    vor.vv v8, v8, v20
-; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    slli a2, a2, 5
-; RV64-NEXT:    add a2, sp, a2
-; RV64-NEXT:    addi a2, a2, 128
-; RV64-NEXT:    vl4r.v v20, (a2) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v16, v16, v20
-; RV64-NEXT:    vor.vv v8, v12, v8
-; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    slli a2, a2, 2
-; RV64-NEXT:    mv a3, a2
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    add a3, a3, a2
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    add a2, a2, a3
-; RV64-NEXT:    add a2, sp, a2
-; RV64-NEXT:    addi a2, a2, 128
-; RV64-NEXT:    vl4r.v v12, (a2) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v12, v16, v12
-; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    slli a2, a2, 2
-; RV64-NEXT:    mv a3, a2
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    add a2, a2, a3
-; RV64-NEXT:    add a2, sp, a2
-; RV64-NEXT:    addi a2, a2, 128
-; RV64-NEXT:    vl4r.v v16, (a2) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v12, v12, v16
-; RV64-NEXT:    vsrl.vx v16, v24, a0
-; RV64-NEXT:    vand.vx v16, v16, t6
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    mv a2, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a2
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl4r.v v20, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v12, v12, v20
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl4r.v v20, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v12, v12, v20
-; RV64-NEXT:    addi a0, sp, 128
-; RV64-NEXT:    vl4r.v v20, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v12, v12, v20
-; RV64-NEXT:    vsrl.vx v12, v12, a1
-; RV64-NEXT:    vor.vv v12, v16, v12
-; RV64-NEXT:    vor.vv v12, v4, v12
-; RV64-NEXT:    vor.vv v8, v8, v12
-; RV64-NEXT:    vsrl.vi v12, v8, 4
-; RV64-NEXT:    vand.vx v8, v8, a7
-; RV64-NEXT:    vand.vx v12, v12, a7
-; RV64-NEXT:    vsll.vi v8, v8, 4
-; RV64-NEXT:    vor.vv v8, v12, v8
-; RV64-NEXT:    vsrl.vi v12, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a6
-; RV64-NEXT:    vand.vx v12, v12, a6
-; RV64-NEXT:    vsll.vi v8, v8, 2
-; RV64-NEXT:    vor.vv v8, v12, v8
-; RV64-NEXT:    vsrl.vi v12, v8, 1
-; RV64-NEXT:    vand.vx v8, v8, a5
-; RV64-NEXT:    vand.vx v12, v12, a5
-; RV64-NEXT:    vadd.vv v8, v8, v8
-; RV64-NEXT:    vor.vv v8, v12, v8
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add sp, sp, a0
-; RV64-NEXT:    ld ra, 232(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s0, 224(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s1, 216(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s2, 208(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s3, 200(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s4, 192(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s5, 184(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s6, 176(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s7, 168(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s8, 160(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s9, 152(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s10, 144(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s11, 136(sp) # 8-byte Folded Reload
-; RV64-NEXT:    addi sp, sp, 240
-; RV64-NEXT:    ret
-  %a = call <vscale x 4 x i64> @llvm.clmulr.nxv4i64(<vscale x 4 x i64> %x, <vscale x 4 x i64> %y)
-  ret <vscale x 4 x i64> %a
-}
-
-define <vscale x 8 x i64> @clmulr_nxv8i64(<vscale x 8 x i64> %x, <vscale x 8 x i64> %y) nounwind {
-; RV32-LABEL: clmulr_nxv8i64:
-; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -352
-; RV32-NEXT:    sw ra, 348(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s0, 344(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s1, 340(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s2, 336(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s3, 332(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s4, 328(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s5, 324(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s6, 320(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s7, 316(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s8, 312(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s9, 308(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s10, 304(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s11, 300(sp) # 4-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    sub sp, sp, a0
-; RV32-NEXT:    lui s11, 1044480
-; RV32-NEXT:    lui s0, 524288
-; RV32-NEXT:    li a0, 1
-; RV32-NEXT:    li ra, 2
-; RV32-NEXT:    li t4, 4
-; RV32-NEXT:    li t2, 8
-; RV32-NEXT:    li t6, 16
-; RV32-NEXT:    li t5, 32
-; RV32-NEXT:    li t3, 64
-; RV32-NEXT:    li t1, 128
-; RV32-NEXT:    li t0, 256
-; RV32-NEXT:    li a7, 512
-; RV32-NEXT:    li a6, 1024
-; RV32-NEXT:    lui a4, 1
-; RV32-NEXT:    lui a3, 2
-; RV32-NEXT:    lui a2, 4
-; RV32-NEXT:    lui a5, 8
-; RV32-NEXT:    lui s1, 16
-; RV32-NEXT:    lui a1, 32
-; RV32-NEXT:    lui s2, 64
-; RV32-NEXT:    lui s3, 128
-; RV32-NEXT:    lui s4, 256
-; RV32-NEXT:    lui s5, 512
-; RV32-NEXT:    lui s6, 1024
-; RV32-NEXT:    lui s7, 2048
-; RV32-NEXT:    lui s8, 4096
-; RV32-NEXT:    lui s9, 8192
-; RV32-NEXT:    lui s10, 16384
-; RV32-NEXT:    sw s11, 272(sp)
-; RV32-NEXT:    lui s11, 32768
-; RV32-NEXT:    sw zero, 276(sp)
-; RV32-NEXT:    sw s0, 264(sp)
-; RV32-NEXT:    sw zero, 268(sp)
-; RV32-NEXT:    sw zero, 256(sp)
-; RV32-NEXT:    sw a0, 260(sp)
-; RV32-NEXT:    sw zero, 248(sp)
-; RV32-NEXT:    sw ra, 252(sp)
-; RV32-NEXT:    lui ra, 65536
-; RV32-NEXT:    sw zero, 240(sp)
-; RV32-NEXT:    sw t4, 244(sp)
-; RV32-NEXT:    lui t4, 131072
-; RV32-NEXT:    sw zero, 232(sp)
-; RV32-NEXT:    sw t2, 236(sp)
-; RV32-NEXT:    lui t2, 262144
-; RV32-NEXT:    sw zero, 224(sp)
-; RV32-NEXT:    sw t6, 228(sp)
-; RV32-NEXT:    sw zero, 216(sp)
-; RV32-NEXT:    sw t5, 220(sp)
-; RV32-NEXT:    sw zero, 208(sp)
-; RV32-NEXT:    sw t3, 212(sp)
-; RV32-NEXT:    sw zero, 200(sp)
-; RV32-NEXT:    sw t1, 204(sp)
-; RV32-NEXT:    sw zero, 192(sp)
-; RV32-NEXT:    sw t0, 196(sp)
-; RV32-NEXT:    sw zero, 184(sp)
-; RV32-NEXT:    sw a7, 188(sp)
-; RV32-NEXT:    sw zero, 176(sp)
-; RV32-NEXT:    sw a6, 180(sp)
-; RV32-NEXT:    li t1, 1024
-; RV32-NEXT:    slli t6, a0, 11
-; RV32-NEXT:    sw zero, 168(sp)
-; RV32-NEXT:    sw t6, 172(sp)
-; RV32-NEXT:    sw zero, 160(sp)
-; RV32-NEXT:    sw a4, 164(sp)
-; RV32-NEXT:    sw zero, 152(sp)
-; RV32-NEXT:    sw a3, 156(sp)
-; RV32-NEXT:    lui t3, 2
-; RV32-NEXT:    sw zero, 144(sp)
-; RV32-NEXT:    sw a2, 148(sp)
-; RV32-NEXT:    lui t5, 4
-; RV32-NEXT:    sw zero, 136(sp)
-; RV32-NEXT:    sw a5, 140(sp)
-; RV32-NEXT:    lui a4, 8
-; RV32-NEXT:    sw zero, 128(sp)
-; RV32-NEXT:    sw s1, 132(sp)
-; RV32-NEXT:    sw zero, 120(sp)
-; RV32-NEXT:    sw a1, 124(sp)
-; RV32-NEXT:    sw zero, 112(sp)
-; RV32-NEXT:    sw s2, 116(sp)
-; RV32-NEXT:    sw zero, 104(sp)
-; RV32-NEXT:    sw s3, 108(sp)
-; RV32-NEXT:    sw zero, 96(sp)
-; RV32-NEXT:    sw s4, 100(sp)
-; RV32-NEXT:    sw zero, 88(sp)
-; RV32-NEXT:    sw s5, 92(sp)
-; RV32-NEXT:    sw zero, 80(sp)
-; RV32-NEXT:    sw s6, 84(sp)
-; RV32-NEXT:    sw zero, 72(sp)
-; RV32-NEXT:    sw s7, 76(sp)
-; RV32-NEXT:    sw zero, 64(sp)
-; RV32-NEXT:    sw s8, 68(sp)
-; RV32-NEXT:    sw zero, 56(sp)
-; RV32-NEXT:    sw s9, 60(sp)
-; RV32-NEXT:    sw zero, 48(sp)
-; RV32-NEXT:    sw s10, 52(sp)
-; RV32-NEXT:    sw zero, 40(sp)
-; RV32-NEXT:    sw s11, 44(sp)
-; RV32-NEXT:    sw zero, 32(sp)
-; RV32-NEXT:    sw ra, 36(sp)
-; RV32-NEXT:    sw zero, 24(sp)
-; RV32-NEXT:    sw t4, 28(sp)
-; RV32-NEXT:    sw zero, 16(sp)
-; RV32-NEXT:    sw t2, 20(sp)
-; RV32-NEXT:    sw zero, 8(sp)
-; RV32-NEXT:    sw s0, 12(sp)
-; RV32-NEXT:    li a6, 56
-; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
-; RV32-NEXT:    vsrl.vx v16, v8, a6
-; RV32-NEXT:    li a5, 40
-; RV32-NEXT:    vsrl.vx v24, v8, a5
-; RV32-NEXT:    vsll.vx v0, v8, a6
-; RV32-NEXT:    addi a2, s1, -256
-; RV32-NEXT:    vand.vx v24, v24, a2
-; RV32-NEXT:    vor.vv v16, v24, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 6
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    vand.vx v24, v8, a2
-; RV32-NEXT:    vsll.vx v24, v24, a5
-; RV32-NEXT:    vor.vv v16, v0, v24
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    addi a3, sp, 272
-; RV32-NEXT:    vlse64.v v24, (a3), zero
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    lui a3, 4080
-; RV32-NEXT:    vsrl.vi v0, v8, 24
-; RV32-NEXT:    vand.vx v16, v0, a3
-; RV32-NEXT:    vsrl.vi v24, v8, 8
-; RV32-NEXT:    vmv8r.v v0, v8
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vand.vv v24, v24, v8
-; RV32-NEXT:    vor.vv v24, v24, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 6
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vor.vv v16, v24, v8
-; RV32-NEXT:    vand.vx v24, v0, a3
-; RV32-NEXT:    vsll.vi v24, v24, 24
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vand.vv v0, v0, v8
-; RV32-NEXT:    vsll.vi v0, v0, 8
-; RV32-NEXT:    vor.vv v24, v24, v0
-; RV32-NEXT:    lui a7, 61681
-; RV32-NEXT:    addi a7, a7, -241
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vor.vv v8, v8, v24
-; RV32-NEXT:    vsetvli t0, zero, e32, m8, ta, ma
-; RV32-NEXT:    vmv.v.x v24, a7
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    lui a7, 209715
-; RV32-NEXT:    addi a7, a7, 819
-; RV32-NEXT:    vsetvli t0, zero, e64, m8, ta, ma
-; RV32-NEXT:    vor.vv v8, v8, v16
-; RV32-NEXT:    vsrl.vi v16, v8, 4
-; RV32-NEXT:    vand.vv v8, v8, v24
-; RV32-NEXT:    vand.vv v16, v16, v24
-; RV32-NEXT:    vsll.vi v8, v8, 4
-; RV32-NEXT:    vor.vv v8, v16, v8
-; RV32-NEXT:    vsetvli t0, zero, e32, m8, ta, ma
-; RV32-NEXT:    vmv.v.x v24, a7
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 6
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    vsetvli a7, zero, e64, m8, ta, ma
-; RV32-NEXT:    vsrl.vi v16, v8, 2
-; RV32-NEXT:    vand.vv v8, v8, v24
-; RV32-NEXT:    vand.vv v16, v16, v24
-; RV32-NEXT:    vsll.vi v8, v8, 2
-; RV32-NEXT:    vor.vv v16, v16, v8
-; RV32-NEXT:    lui a7, 349525
-; RV32-NEXT:    addi a7, a7, 1365
-; RV32-NEXT:    vsetvli t0, zero, e32, m8, ta, ma
-; RV32-NEXT:    vmv.v.x v24, a7
-; RV32-NEXT:    vsetvli a7, zero, e64, m8, ta, ma
-; RV32-NEXT:    vsrl.vi v8, v16, 1
-; RV32-NEXT:    vand.vv v16, v16, v24
-; RV32-NEXT:    vmv8r.v v0, v24
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 9
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    addi a7, sp, 264
-; RV32-NEXT:    vlse64.v v24, (a7), zero
-; RV32-NEXT:    vand.vv v8, v8, v0
-; RV32-NEXT:    vadd.vv v16, v16, v16
-; RV32-NEXT:    vor.vv v8, v8, v16
-; RV32-NEXT:    vand.vv v16, v8, v24
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    addi a7, sp, 256
-; RV32-NEXT:    addi t0, sp, 248
-; RV32-NEXT:    addi a1, sp, 240
-; RV32-NEXT:    addi a0, sp, 232
-; RV32-NEXT:    vlse64.v v16, (a7), zero
-; RV32-NEXT:    csrr a7, vlenb
-; RV32-NEXT:    slli a7, a7, 4
-; RV32-NEXT:    mv s0, a7
-; RV32-NEXT:    slli a7, a7, 1
-; RV32-NEXT:    add s0, s0, a7
-; RV32-NEXT:    slli a7, a7, 1
-; RV32-NEXT:    add s0, s0, a7
-; RV32-NEXT:    slli a7, a7, 1
-; RV32-NEXT:    add s0, s0, a7
-; RV32-NEXT:    slli a7, a7, 1
-; RV32-NEXT:    add a7, a7, s0
-; RV32-NEXT:    add a7, sp, a7
-; RV32-NEXT:    addi a7, a7, 288
-; RV32-NEXT:    vs8r.v v16, (a7) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    vlse64.v v24, (t0), zero
-; RV32-NEXT:    vlse64.v v0, (a1), zero
-; RV32-NEXT:    vlse64.v v16, (a0), zero
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vand.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    vand.vv v16, v8, v24
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    vand.vv v16, v8, v0
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vand.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    addi a0, sp, 224
-; RV32-NEXT:    addi a1, sp, 216
-; RV32-NEXT:    addi a7, sp, 208
-; RV32-NEXT:    addi t0, sp, 200
-; RV32-NEXT:    vlse64.v v16, (a0), zero
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv s0, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add s0, s0, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add s0, s0, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, s0
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    vlse64.v v24, (a1), zero
-; RV32-NEXT:    vlse64.v v0, (a7), zero
-; RV32-NEXT:    vlse64.v v16, (t0), zero
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vand.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    vand.vv v16, v8, v24
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    vand.vv v16, v8, v0
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 6
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vand.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    addi a0, sp, 192
-; RV32-NEXT:    addi a1, sp, 184
-; RV32-NEXT:    addi a7, sp, 176
-; RV32-NEXT:    addi t0, sp, 168
-; RV32-NEXT:    vlse64.v v16, (a0), zero
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv s0, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add s0, s0, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add s0, s0, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, s0
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    vlse64.v v24, (a1), zero
-; RV32-NEXT:    vlse64.v v0, (a7), zero
-; RV32-NEXT:    vlse64.v v16, (t0), zero
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vand.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    vand.vv v16, v8, v24
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    vand.vv v16, v8, v0
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vand.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    addi a0, sp, 160
-; RV32-NEXT:    addi a1, sp, 152
-; RV32-NEXT:    addi a7, sp, 144
-; RV32-NEXT:    addi t0, sp, 136
-; RV32-NEXT:    vlse64.v v16, (a0), zero
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv s0, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add s0, s0, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, s0
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    vlse64.v v24, (a1), zero
-; RV32-NEXT:    vlse64.v v0, (a7), zero
-; RV32-NEXT:    vlse64.v v16, (t0), zero
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vand.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    vand.vv v16, v8, v24
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    vand.vv v16, v8, v0
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 7
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vand.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    addi a0, sp, 128
-; RV32-NEXT:    addi a1, sp, 120
-; RV32-NEXT:    addi a7, sp, 112
-; RV32-NEXT:    addi t0, sp, 104
-; RV32-NEXT:    vlse64.v v16, (a0), zero
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv s0, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add s0, s0, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add s0, s0, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, s0
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    vlse64.v v24, (a1), zero
-; RV32-NEXT:    vlse64.v v0, (a7), zero
-; RV32-NEXT:    vlse64.v v16, (t0), zero
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vand.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    vand.vv v16, v8, v24
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    vand.vv v16, v8, v0
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vand.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    addi a0, sp, 96
-; RV32-NEXT:    addi a1, sp, 88
-; RV32-NEXT:    addi a7, sp, 80
-; RV32-NEXT:    addi t0, sp, 72
-; RV32-NEXT:    vlse64.v v16, (a0), zero
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv s0, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add s0, s0, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, s0
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    vlse64.v v24, (a1), zero
-; RV32-NEXT:    vlse64.v v0, (a7), zero
-; RV32-NEXT:    vlse64.v v16, (t0), zero
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vand.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    vand.vv v16, v8, v24
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    vand.vv v16, v8, v0
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 6
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vand.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    addi a0, sp, 64
-; RV32-NEXT:    addi a1, sp, 56
-; RV32-NEXT:    addi a7, sp, 48
-; RV32-NEXT:    addi t0, sp, 40
-; RV32-NEXT:    vlse64.v v16, (a0), zero
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv s0, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add s0, s0, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, s0
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    vlse64.v v24, (a1), zero
-; RV32-NEXT:    vlse64.v v0, (a7), zero
-; RV32-NEXT:    vlse64.v v16, (t0), zero
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vand.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    vand.vv v16, v8, v24
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    vand.vv v16, v8, v0
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vand.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    addi a0, sp, 32
-; RV32-NEXT:    addi a1, sp, 24
-; RV32-NEXT:    addi a7, sp, 16
-; RV32-NEXT:    addi t0, sp, 8
-; RV32-NEXT:    vlse64.v v16, (a0), zero
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv s0, a0
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    add a0, a0, s0
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    vlse64.v v24, (a1), zero
-; RV32-NEXT:    vlse64.v v0, (a7), zero
-; RV32-NEXT:    vlse64.v v16, (t0), zero
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vand.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    vand.vv v16, v8, v24
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 8
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    vand.vv v16, v8, v0
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vand.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    vand.vi v16, v8, 2
-; RV32-NEXT:    vmul.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    vand.vi v16, v8, 1
-; RV32-NEXT:    vmul.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    vand.vi v16, v8, 4
-; RV32-NEXT:    vmul.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    vand.vi v16, v8, 8
-; RV32-NEXT:    vmul.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    li a0, 16
-; RV32-NEXT:    vand.vx v16, v8, a0
-; RV32-NEXT:    vmul.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    li a0, 32
-; RV32-NEXT:    vand.vx v16, v8, a0
-; RV32-NEXT:    vmul.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    li a0, 64
-; RV32-NEXT:    vand.vx v16, v8, a0
-; RV32-NEXT:    vmul.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 6
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    li a0, 128
-; RV32-NEXT:    vand.vx v16, v8, a0
-; RV32-NEXT:    vmul.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    li a0, 256
-; RV32-NEXT:    vand.vx v16, v8, a0
-; RV32-NEXT:    vmul.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    li a0, 512
-; RV32-NEXT:    vand.vx v16, v8, a0
-; RV32-NEXT:    vmul.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    vand.vx v16, v8, t1
-; RV32-NEXT:    vmul.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    vand.vx v16, v8, t6
-; RV32-NEXT:    vmul.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    vand.vx v16, v8, a0
-; RV32-NEXT:    vmul.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    vand.vx v16, v8, t3
-; RV32-NEXT:    vmul.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    vand.vx v16, v8, t5
-; RV32-NEXT:    vmul.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 7
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    vand.vx v16, v8, a4
-; RV32-NEXT:    vmul.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    vand.vx v16, v8, s1
-; RV32-NEXT:    vmul.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    lui a0, 32
-; RV32-NEXT:    vand.vx v16, v8, a0
-; RV32-NEXT:    vmul.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    vand.vx v16, v8, s2
-; RV32-NEXT:    vmul.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    vand.vx v16, v8, s3
-; RV32-NEXT:    vmul.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    vand.vx v16, v8, s4
-; RV32-NEXT:    vmul.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    vand.vx v16, v8, s5
-; RV32-NEXT:    vmul.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    vand.vx v16, v8, s6
-; RV32-NEXT:    vmul.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 6
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    vand.vx v16, v8, s7
-; RV32-NEXT:    vmul.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    vand.vx v16, v8, s8
-; RV32-NEXT:    vmul.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    vand.vx v16, v8, s9
-; RV32-NEXT:    vmul.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    vand.vx v16, v8, s10
-; RV32-NEXT:    vmul.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    vand.vx v16, v8, s11
-; RV32-NEXT:    vmul.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    vand.vx v16, v8, ra
-; RV32-NEXT:    vmul.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    vand.vx v16, v8, t4
-; RV32-NEXT:    vmul.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    vand.vx v16, v8, t2
-; RV32-NEXT:    vmul.vv v16, v8, v16
-; RV32-NEXT:    addi a0, sp, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vmul.vv v24, v8, v24
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vmul.vv v0, v8, v0
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vmul.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vmul.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vmul.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vmul.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vmul.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 6
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vmul.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vmul.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vmul.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 6
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vmul.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vmul.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vmul.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vmul.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vmul.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 7
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vmul.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vmul.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vmul.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 7
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vmul.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vmul.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vmul.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vmul.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vmul.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 6
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vmul.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vmul.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vmul.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 6
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vmul.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vmul.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vmul.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vmul.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 8
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vmul.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vmul.vv v16, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vmul.vv v8, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 8
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vxor.vi v8, v8, 0
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 6
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 7
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 6
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v16
-; RV32-NEXT:    addi a0, sp, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v16
-; RV32-NEXT:    vxor.vv v8, v8, v24
-; RV32-NEXT:    vxor.vv v8, v8, v0
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 6
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 7
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 6
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 8
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v16
-; RV32-NEXT:    vsrl.vx v16, v8, a5
-; RV32-NEXT:    vand.vx v16, v16, a2
-; RV32-NEXT:    vsrl.vx v24, v8, a6
-; RV32-NEXT:    vor.vv v16, v16, v24
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    vsrl.vi v24, v8, 24
-; RV32-NEXT:    vand.vx v24, v24, a3
-; RV32-NEXT:    vsrl.vi v0, v8, 8
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vand.vv v0, v0, v16
-; RV32-NEXT:    vor.vv v24, v0, v24
-; RV32-NEXT:    vand.vv v0, v8, v16
-; RV32-NEXT:    vsll.vi v0, v0, 8
-; RV32-NEXT:    vand.vx v16, v8, a3
-; RV32-NEXT:    vsll.vi v16, v16, 24
-; RV32-NEXT:    vor.vv v16, v16, v0
-; RV32-NEXT:    vsll.vx v0, v8, a6
-; RV32-NEXT:    vand.vx v8, v8, a2
-; RV32-NEXT:    vsll.vx v8, v8, a5
-; RV32-NEXT:    vor.vv v8, v0, v8
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vor.vv v24, v24, v0
-; RV32-NEXT:    vor.vv v8, v8, v16
-; RV32-NEXT:    vor.vv v8, v8, v24
-; RV32-NEXT:    vsrl.vi v16, v8, 4
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vand.vv v8, v8, v24
-; RV32-NEXT:    vand.vv v16, v16, v24
-; RV32-NEXT:    vsll.vi v8, v8, 4
-; RV32-NEXT:    vor.vv v8, v16, v8
-; RV32-NEXT:    vsrl.vi v16, v8, 2
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 6
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vand.vv v8, v8, v24
-; RV32-NEXT:    vand.vv v16, v16, v24
-; RV32-NEXT:    vsll.vi v8, v8, 2
-; RV32-NEXT:    vor.vv v8, v16, v8
-; RV32-NEXT:    vsrl.vi v16, v8, 1
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 9
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vand.vv v8, v8, v24
-; RV32-NEXT:    vand.vv v16, v16, v24
-; RV32-NEXT:    vadd.vv v8, v8, v8
-; RV32-NEXT:    vor.vv v8, v16, v8
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add sp, sp, a0
-; RV32-NEXT:    lw ra, 348(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s0, 344(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s1, 340(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s2, 336(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s3, 332(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s4, 328(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s5, 324(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s6, 320(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s7, 316(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s8, 312(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s9, 308(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s10, 304(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s11, 300(sp) # 4-byte Folded Reload
-; RV32-NEXT:    addi sp, sp, 352
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: clmulr_nxv8i64:
-; RV64:       # %bb.0:
-; RV64-NEXT:    addi sp, sp, -240
-; RV64-NEXT:    sd ra, 232(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s0, 224(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s1, 216(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s2, 208(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s3, 200(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s4, 192(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s5, 184(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s6, 176(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s7, 168(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s8, 160(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s9, 152(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s10, 144(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s11, 136(sp) # 8-byte Folded Spill
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 4
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    sub sp, sp, a0
-; RV64-NEXT:    li a1, 56
-; RV64-NEXT:    li a2, 40
-; RV64-NEXT:    lui a3, 16
-; RV64-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; RV64-NEXT:    vsrl.vi v24, v8, 24
-; RV64-NEXT:    vsrl.vx v16, v8, a1
-; RV64-NEXT:    li a5, 56
-; RV64-NEXT:    vsrl.vx v0, v8, a2
-; RV64-NEXT:    li s5, 40
-; RV64-NEXT:    addi s4, a3, -256
-; RV64-NEXT:    vand.vx v0, v0, s4
-; RV64-NEXT:    vor.vv v16, v0, v16
-; RV64-NEXT:    vsrl.vi v0, v8, 8
-; RV64-NEXT:    li a4, 255
-; RV64-NEXT:    lui a1, 61681
-; RV64-NEXT:    lui a2, 209715
-; RV64-NEXT:    lui a3, 349525
-; RV64-NEXT:    li a0, 1
-; RV64-NEXT:    lui a6, 4080
-; RV64-NEXT:    vand.vx v24, v24, a6
-; RV64-NEXT:    slli a4, a4, 24
-; RV64-NEXT:    vand.vx v0, v0, a4
-; RV64-NEXT:    vor.vv v24, v0, v24
-; RV64-NEXT:    vand.vx v0, v8, a6
-; RV64-NEXT:    vsll.vi v0, v0, 24
-; RV64-NEXT:    vor.vv v16, v24, v16
-; RV64-NEXT:    vand.vx v24, v8, a4
-; RV64-NEXT:    vsll.vi v24, v24, 8
-; RV64-NEXT:    vor.vv v24, v0, v24
-; RV64-NEXT:    vsll.vx v0, v8, a5
-; RV64-NEXT:    addi a7, a1, -241
-; RV64-NEXT:    addi a6, a2, 819
-; RV64-NEXT:    addi a5, a3, 1365
-; RV64-NEXT:    slli a1, a0, 11
-; RV64-NEXT:    sd a1, 112(sp) # 8-byte Folded Spill
-; RV64-NEXT:    slli a1, a0, 31
-; RV64-NEXT:    sd a1, 104(sp) # 8-byte Folded Spill
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    sd a1, 96(sp) # 8-byte Folded Spill
-; RV64-NEXT:    slli a1, a0, 33
-; RV64-NEXT:    sd a1, 88(sp) # 8-byte Folded Spill
-; RV64-NEXT:    slli a1, a0, 34
-; RV64-NEXT:    sd a1, 80(sp) # 8-byte Folded Spill
-; RV64-NEXT:    slli a1, a0, 35
-; RV64-NEXT:    sd a1, 72(sp) # 8-byte Folded Spill
-; RV64-NEXT:    slli a1, a0, 36
-; RV64-NEXT:    sd a1, 64(sp) # 8-byte Folded Spill
-; RV64-NEXT:    slli a1, a0, 37
-; RV64-NEXT:    sd a1, 56(sp) # 8-byte Folded Spill
-; RV64-NEXT:    slli a1, a0, 38
-; RV64-NEXT:    sd a1, 48(sp) # 8-byte Folded Spill
-; RV64-NEXT:    slli a1, a0, 39
-; RV64-NEXT:    sd a1, 40(sp) # 8-byte Folded Spill
-; RV64-NEXT:    slli a1, a0, 40
-; RV64-NEXT:    sd a1, 32(sp) # 8-byte Folded Spill
-; RV64-NEXT:    slli a1, a0, 41
-; RV64-NEXT:    sd a1, 24(sp) # 8-byte Folded Spill
-; RV64-NEXT:    slli s6, a0, 42
-; RV64-NEXT:    slli s7, a0, 43
-; RV64-NEXT:    slli s8, a0, 44
-; RV64-NEXT:    slli s9, a0, 45
-; RV64-NEXT:    slli s10, a0, 46
-; RV64-NEXT:    slli a1, a7, 32
-; RV64-NEXT:    add a7, a7, a1
-; RV64-NEXT:    slli a1, a6, 32
-; RV64-NEXT:    add a6, a6, a1
-; RV64-NEXT:    slli a1, a5, 32
-; RV64-NEXT:    add a5, a5, a1
-; RV64-NEXT:    slli s11, a0, 47
-; RV64-NEXT:    slli ra, a0, 48
-; RV64-NEXT:    slli s3, a0, 49
-; RV64-NEXT:    slli s2, a0, 50
-; RV64-NEXT:    slli s1, a0, 51
-; RV64-NEXT:    slli s0, a0, 52
-; RV64-NEXT:    slli t6, a0, 53
-; RV64-NEXT:    slli t5, a0, 54
-; RV64-NEXT:    slli t4, a0, 55
-; RV64-NEXT:    slli t3, a0, 56
-; RV64-NEXT:    slli t2, a0, 57
-; RV64-NEXT:    slli t1, a0, 58
-; RV64-NEXT:    slli t0, a0, 59
-; RV64-NEXT:    slli a3, a0, 60
-; RV64-NEXT:    slli a2, a0, 61
-; RV64-NEXT:    slli a1, a0, 62
-; RV64-NEXT:    li a0, -1
-; RV64-NEXT:    slli a0, a0, 63
-; RV64-NEXT:    vand.vx v8, v8, s4
-; RV64-NEXT:    vsll.vx v8, v8, s5
-; RV64-NEXT:    vor.vv v8, v0, v8
-; RV64-NEXT:    vor.vv v8, v8, v24
-; RV64-NEXT:    vor.vv v8, v8, v16
-; RV64-NEXT:    vsrl.vi v16, v8, 4
-; RV64-NEXT:    vand.vx v8, v8, a7
-; RV64-NEXT:    vand.vx v16, v16, a7
-; RV64-NEXT:    vsll.vi v8, v8, 4
-; RV64-NEXT:    vor.vv v8, v16, v8
-; RV64-NEXT:    vsrl.vi v16, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a6
-; RV64-NEXT:    vand.vx v16, v16, a6
-; RV64-NEXT:    vsll.vi v8, v8, 2
-; RV64-NEXT:    vor.vv v8, v16, v8
-; RV64-NEXT:    vsrl.vi v16, v8, 1
-; RV64-NEXT:    vand.vx v8, v8, a5
-; RV64-NEXT:    vand.vx v16, v16, a5
-; RV64-NEXT:    vadd.vv v8, v8, v8
-; RV64-NEXT:    vor.vv v8, v16, v8
-; RV64-NEXT:    vand.vi v16, v8, 2
-; RV64-NEXT:    vand.vi v24, v8, 1
-; RV64-NEXT:    vand.vi v0, v8, 4
-; RV64-NEXT:    vmul.vv v16, v8, v16
-; RV64-NEXT:    sd a1, 8(sp) # 8-byte Folded Spill
-; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    slli a1, a1, 3
-; RV64-NEXT:    mv s5, a1
-; RV64-NEXT:    slli a1, a1, 2
-; RV64-NEXT:    add s5, s5, a1
-; RV64-NEXT:    slli a1, a1, 1
-; RV64-NEXT:    add s5, s5, a1
-; RV64-NEXT:    slli a1, a1, 1
-; RV64-NEXT:    add s5, s5, a1
-; RV64-NEXT:    slli a1, a1, 1
-; RV64-NEXT:    add a1, a1, s5
-; RV64-NEXT:    add a1, sp, a1
-; RV64-NEXT:    addi a1, a1, 128
-; RV64-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
-; RV64-NEXT:    vmul.vv v24, v8, v24
-; RV64-NEXT:    vmul.vv v0, v8, v0
-; RV64-NEXT:    vand.vi v16, v8, 8
-; RV64-NEXT:    vmul.vv v16, v8, v16
-; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    slli a1, a1, 5
-; RV64-NEXT:    mv s5, a1
-; RV64-NEXT:    slli a1, a1, 1
-; RV64-NEXT:    add s5, s5, a1
-; RV64-NEXT:    slli a1, a1, 1
-; RV64-NEXT:    add s5, s5, a1
-; RV64-NEXT:    slli a1, a1, 1
-; RV64-NEXT:    add a1, a1, s5
-; RV64-NEXT:    add a1, sp, a1
-; RV64-NEXT:    addi a1, a1, 128
-; RV64-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
-; RV64-NEXT:    li s5, 16
-; RV64-NEXT:    vand.vx v16, v8, s5
-; RV64-NEXT:    vmul.vv v16, v8, v16
-; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    slli a1, a1, 3
-; RV64-NEXT:    mv s5, a1
-; RV64-NEXT:    slli a1, a1, 1
-; RV64-NEXT:    add s5, s5, a1
-; RV64-NEXT:    slli a1, a1, 2
-; RV64-NEXT:    add s5, s5, a1
-; RV64-NEXT:    slli a1, a1, 1
-; RV64-NEXT:    add s5, s5, a1
-; RV64-NEXT:    slli a1, a1, 1
-; RV64-NEXT:    add a1, a1, s5
-; RV64-NEXT:    add a1, sp, a1
-; RV64-NEXT:    addi a1, a1, 128
-; RV64-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
-; RV64-NEXT:    li s5, 32
-; RV64-NEXT:    vand.vx v16, v8, s5
-; RV64-NEXT:    vmul.vv v16, v8, v16
-; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    slli a1, a1, 4
-; RV64-NEXT:    mv s5, a1
-; RV64-NEXT:    slli a1, a1, 2
-; RV64-NEXT:    add s5, s5, a1
-; RV64-NEXT:    slli a1, a1, 1
-; RV64-NEXT:    add s5, s5, a1
-; RV64-NEXT:    slli a1, a1, 1
-; RV64-NEXT:    add a1, a1, s5
-; RV64-NEXT:    add a1, sp, a1
-; RV64-NEXT:    addi a1, a1, 128
-; RV64-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
-; RV64-NEXT:    li s5, 64
-; RV64-NEXT:    vand.vx v16, v8, s5
-; RV64-NEXT:    vmul.vv v16, v8, v16
-; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    slli a1, a1, 3
-; RV64-NEXT:    mv s5, a1
-; RV64-NEXT:    slli a1, a1, 3
-; RV64-NEXT:    add s5, s5, a1
-; RV64-NEXT:    slli a1, a1, 1
-; RV64-NEXT:    add s5, s5, a1
-; RV64-NEXT:    slli a1, a1, 1
-; RV64-NEXT:    add a1, a1, s5
-; RV64-NEXT:    add a1, sp, a1
-; RV64-NEXT:    addi a1, a1, 128
-; RV64-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
-; RV64-NEXT:    li s5, 128
-; RV64-NEXT:    vand.vx v16, v8, s5
-; RV64-NEXT:    vmul.vv v16, v8, v16
-; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    slli a1, a1, 6
-; RV64-NEXT:    mv s5, a1
-; RV64-NEXT:    slli a1, a1, 1
-; RV64-NEXT:    add s5, s5, a1
-; RV64-NEXT:    slli a1, a1, 1
-; RV64-NEXT:    add a1, a1, s5
-; RV64-NEXT:    add a1, sp, a1
-; RV64-NEXT:    addi a1, a1, 128
-; RV64-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
-; RV64-NEXT:    li s5, 256
-; RV64-NEXT:    vand.vx v16, v8, s5
-; RV64-NEXT:    vmul.vv v16, v8, v16
-; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    slli a1, a1, 3
-; RV64-NEXT:    mv s5, a1
-; RV64-NEXT:    slli a1, a1, 1
-; RV64-NEXT:    add s5, s5, a1
-; RV64-NEXT:    slli a1, a1, 1
-; RV64-NEXT:    add s5, s5, a1
-; RV64-NEXT:    slli a1, a1, 2
-; RV64-NEXT:    add s5, s5, a1
-; RV64-NEXT:    slli a1, a1, 1
-; RV64-NEXT:    add a1, a1, s5
-; RV64-NEXT:    add a1, sp, a1
-; RV64-NEXT:    addi a1, a1, 128
-; RV64-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
-; RV64-NEXT:    li s5, 512
-; RV64-NEXT:    vand.vx v16, v8, s5
-; RV64-NEXT:    vmul.vv v16, v8, v16
-; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    slli a1, a1, 4
-; RV64-NEXT:    mv s5, a1
-; RV64-NEXT:    slli a1, a1, 1
-; RV64-NEXT:    add s5, s5, a1
-; RV64-NEXT:    slli a1, a1, 2
-; RV64-NEXT:    add s5, s5, a1
-; RV64-NEXT:    slli a1, a1, 1
-; RV64-NEXT:    add a1, a1, s5
-; RV64-NEXT:    add a1, sp, a1
-; RV64-NEXT:    addi a1, a1, 128
-; RV64-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
-; RV64-NEXT:    li s5, 1024
-; RV64-NEXT:    vand.vx v16, v8, s5
-; RV64-NEXT:    vmul.vv v16, v8, v16
-; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    slli a1, a1, 3
-; RV64-NEXT:    mv s5, a1
-; RV64-NEXT:    slli a1, a1, 2
-; RV64-NEXT:    add s5, s5, a1
-; RV64-NEXT:    slli a1, a1, 2
-; RV64-NEXT:    add s5, s5, a1
-; RV64-NEXT:    slli a1, a1, 1
-; RV64-NEXT:    add a1, a1, s5
-; RV64-NEXT:    add a1, sp, a1
-; RV64-NEXT:    addi a1, a1, 128
-; RV64-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
-; RV64-NEXT:    ld s5, 112(sp) # 8-byte Folded Reload
-; RV64-NEXT:    vand.vx v16, v8, s5
-; RV64-NEXT:    vmul.vv v16, v8, v16
-; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    slli a1, a1, 5
-; RV64-NEXT:    mv s5, a1
-; RV64-NEXT:    slli a1, a1, 2
-; RV64-NEXT:    add s5, s5, a1
-; RV64-NEXT:    slli a1, a1, 1
-; RV64-NEXT:    add a1, a1, s5
-; RV64-NEXT:    add a1, sp, a1
-; RV64-NEXT:    addi a1, a1, 128
-; RV64-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
-; RV64-NEXT:    lui s5, 1
-; RV64-NEXT:    vand.vx v16, v8, s5
-; RV64-NEXT:    vmul.vv v16, v8, v16
-; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    slli a1, a1, 3
-; RV64-NEXT:    mv s5, a1
-; RV64-NEXT:    slli a1, a1, 1
-; RV64-NEXT:    add s5, s5, a1
-; RV64-NEXT:    slli a1, a1, 3
-; RV64-NEXT:    add s5, s5, a1
-; RV64-NEXT:    slli a1, a1, 1
-; RV64-NEXT:    add a1, a1, s5
-; RV64-NEXT:    add a1, sp, a1
-; RV64-NEXT:    addi a1, a1, 128
-; RV64-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
-; RV64-NEXT:    lui s5, 2
-; RV64-NEXT:    vand.vx v16, v8, s5
-; RV64-NEXT:    vmul.vv v16, v8, v16
-; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    slli a1, a1, 4
-; RV64-NEXT:    mv s5, a1
-; RV64-NEXT:    slli a1, a1, 3
-; RV64-NEXT:    add s5, s5, a1
-; RV64-NEXT:    slli a1, a1, 1
-; RV64-NEXT:    add a1, a1, s5
-; RV64-NEXT:    add a1, sp, a1
-; RV64-NEXT:    addi a1, a1, 128
-; RV64-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
-; RV64-NEXT:    lui s5, 4
-; RV64-NEXT:    vand.vx v16, v8, s5
-; RV64-NEXT:    vmul.vv v16, v8, v16
-; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    slli a1, a1, 3
-; RV64-NEXT:    mv s5, a1
-; RV64-NEXT:    slli a1, a1, 4
-; RV64-NEXT:    add s5, s5, a1
-; RV64-NEXT:    slli a1, a1, 1
-; RV64-NEXT:    add a1, a1, s5
-; RV64-NEXT:    add a1, sp, a1
-; RV64-NEXT:    addi a1, a1, 128
-; RV64-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
-; RV64-NEXT:    lui s5, 8
-; RV64-NEXT:    vand.vx v16, v8, s5
-; RV64-NEXT:    vmul.vv v16, v8, v16
-; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    slli a1, a1, 7
-; RV64-NEXT:    mv s5, a1
-; RV64-NEXT:    slli a1, a1, 1
-; RV64-NEXT:    add a1, a1, s5
-; RV64-NEXT:    add a1, sp, a1
-; RV64-NEXT:    addi a1, a1, 128
-; RV64-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
-; RV64-NEXT:    lui s5, 16
-; RV64-NEXT:    vand.vx v16, v8, s5
-; RV64-NEXT:    vmul.vv v16, v8, v16
-; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    slli a1, a1, 3
-; RV64-NEXT:    mv s5, a1
-; RV64-NEXT:    slli a1, a1, 1
-; RV64-NEXT:    add s5, s5, a1
-; RV64-NEXT:    slli a1, a1, 1
-; RV64-NEXT:    add s5, s5, a1
-; RV64-NEXT:    slli a1, a1, 1
-; RV64-NEXT:    add s5, s5, a1
-; RV64-NEXT:    slli a1, a1, 2
-; RV64-NEXT:    add a1, a1, s5
-; RV64-NEXT:    add a1, sp, a1
-; RV64-NEXT:    addi a1, a1, 128
-; RV64-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
-; RV64-NEXT:    lui s5, 32
-; RV64-NEXT:    vand.vx v16, v8, s5
-; RV64-NEXT:    vmul.vv v16, v8, v16
-; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    slli a1, a1, 4
-; RV64-NEXT:    mv s5, a1
-; RV64-NEXT:    slli a1, a1, 1
-; RV64-NEXT:    add s5, s5, a1
-; RV64-NEXT:    slli a1, a1, 1
-; RV64-NEXT:    add s5, s5, a1
-; RV64-NEXT:    slli a1, a1, 2
-; RV64-NEXT:    add a1, a1, s5
-; RV64-NEXT:    add a1, sp, a1
-; RV64-NEXT:    addi a1, a1, 128
-; RV64-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
-; RV64-NEXT:    lui s5, 64
-; RV64-NEXT:    vand.vx v16, v8, s5
-; RV64-NEXT:    vmul.vv v16, v8, v16
-; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    slli a1, a1, 3
-; RV64-NEXT:    mv s5, a1
-; RV64-NEXT:    slli a1, a1, 2
-; RV64-NEXT:    add s5, s5, a1
-; RV64-NEXT:    slli a1, a1, 1
-; RV64-NEXT:    add s5, s5, a1
-; RV64-NEXT:    slli a1, a1, 2
-; RV64-NEXT:    add a1, a1, s5
-; RV64-NEXT:    add a1, sp, a1
-; RV64-NEXT:    addi a1, a1, 128
-; RV64-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
-; RV64-NEXT:    lui s5, 128
-; RV64-NEXT:    vand.vx v16, v8, s5
-; RV64-NEXT:    vmul.vv v16, v8, v16
-; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    slli a1, a1, 5
-; RV64-NEXT:    mv s5, a1
-; RV64-NEXT:    slli a1, a1, 1
-; RV64-NEXT:    add s5, s5, a1
-; RV64-NEXT:    slli a1, a1, 2
-; RV64-NEXT:    add a1, a1, s5
-; RV64-NEXT:    add a1, sp, a1
-; RV64-NEXT:    addi a1, a1, 128
-; RV64-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
-; RV64-NEXT:    lui s5, 256
-; RV64-NEXT:    vand.vx v16, v8, s5
-; RV64-NEXT:    vmul.vv v16, v8, v16
-; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    slli a1, a1, 3
-; RV64-NEXT:    mv s5, a1
-; RV64-NEXT:    slli a1, a1, 1
-; RV64-NEXT:    add s5, s5, a1
-; RV64-NEXT:    slli a1, a1, 2
-; RV64-NEXT:    add s5, s5, a1
-; RV64-NEXT:    slli a1, a1, 2
-; RV64-NEXT:    add a1, a1, s5
-; RV64-NEXT:    add a1, sp, a1
-; RV64-NEXT:    addi a1, a1, 128
-; RV64-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
-; RV64-NEXT:    lui s5, 512
-; RV64-NEXT:    vand.vx v16, v8, s5
-; RV64-NEXT:    vmul.vv v16, v8, v16
-; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    slli a1, a1, 4
-; RV64-NEXT:    mv s5, a1
-; RV64-NEXT:    slli a1, a1, 2
-; RV64-NEXT:    add s5, s5, a1
-; RV64-NEXT:    slli a1, a1, 2
-; RV64-NEXT:    add a1, a1, s5
-; RV64-NEXT:    add a1, sp, a1
-; RV64-NEXT:    addi a1, a1, 128
-; RV64-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
-; RV64-NEXT:    lui s5, 1024
-; RV64-NEXT:    vand.vx v16, v8, s5
-; RV64-NEXT:    vmul.vv v16, v8, v16
-; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    slli a1, a1, 3
-; RV64-NEXT:    mv s5, a1
-; RV64-NEXT:    slli a1, a1, 3
-; RV64-NEXT:    add s5, s5, a1
-; RV64-NEXT:    slli a1, a1, 2
-; RV64-NEXT:    add a1, a1, s5
-; RV64-NEXT:    add a1, sp, a1
-; RV64-NEXT:    addi a1, a1, 128
-; RV64-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
-; RV64-NEXT:    lui s5, 2048
-; RV64-NEXT:    vand.vx v16, v8, s5
-; RV64-NEXT:    vmul.vv v16, v8, v16
-; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    slli a1, a1, 6
-; RV64-NEXT:    mv s5, a1
-; RV64-NEXT:    slli a1, a1, 2
-; RV64-NEXT:    add a1, a1, s5
-; RV64-NEXT:    add a1, sp, a1
-; RV64-NEXT:    addi a1, a1, 128
-; RV64-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
-; RV64-NEXT:    lui s5, 4096
-; RV64-NEXT:    vand.vx v16, v8, s5
-; RV64-NEXT:    vmul.vv v16, v8, v16
-; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    slli a1, a1, 3
-; RV64-NEXT:    mv s5, a1
-; RV64-NEXT:    slli a1, a1, 1
-; RV64-NEXT:    add s5, s5, a1
-; RV64-NEXT:    slli a1, a1, 1
-; RV64-NEXT:    add s5, s5, a1
-; RV64-NEXT:    slli a1, a1, 3
-; RV64-NEXT:    add a1, a1, s5
-; RV64-NEXT:    add a1, sp, a1
-; RV64-NEXT:    addi a1, a1, 128
-; RV64-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
-; RV64-NEXT:    lui s5, 8192
-; RV64-NEXT:    vand.vx v16, v8, s5
-; RV64-NEXT:    vmul.vv v16, v8, v16
-; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    slli a1, a1, 3
-; RV64-NEXT:    mv s5, a1
-; RV64-NEXT:    slli a1, a1, 2
-; RV64-NEXT:    add s5, s5, a1
-; RV64-NEXT:    slli a1, a1, 3
-; RV64-NEXT:    add a1, a1, s5
-; RV64-NEXT:    add a1, sp, a1
-; RV64-NEXT:    addi a1, a1, 128
-; RV64-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
-; RV64-NEXT:    lui s5, 16384
-; RV64-NEXT:    vand.vx v16, v8, s5
-; RV64-NEXT:    vmul.vv v16, v8, v16
-; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    slli a1, a1, 4
-; RV64-NEXT:    mv s5, a1
-; RV64-NEXT:    slli a1, a1, 1
-; RV64-NEXT:    add s5, s5, a1
-; RV64-NEXT:    slli a1, a1, 3
-; RV64-NEXT:    add a1, a1, s5
-; RV64-NEXT:    add a1, sp, a1
-; RV64-NEXT:    addi a1, a1, 128
-; RV64-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
-; RV64-NEXT:    lui s5, 32768
-; RV64-NEXT:    vand.vx v16, v8, s5
-; RV64-NEXT:    vmul.vv v16, v8, v16
-; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    slli a1, a1, 5
-; RV64-NEXT:    mv s5, a1
-; RV64-NEXT:    slli a1, a1, 3
-; RV64-NEXT:    add a1, a1, s5
-; RV64-NEXT:    add a1, sp, a1
-; RV64-NEXT:    addi a1, a1, 128
-; RV64-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
-; RV64-NEXT:    lui s5, 65536
-; RV64-NEXT:    vand.vx v16, v8, s5
-; RV64-NEXT:    vmul.vv v16, v8, v16
-; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    slli a1, a1, 3
-; RV64-NEXT:    mv s5, a1
-; RV64-NEXT:    slli a1, a1, 1
-; RV64-NEXT:    add s5, s5, a1
-; RV64-NEXT:    slli a1, a1, 4
-; RV64-NEXT:    add a1, a1, s5
-; RV64-NEXT:    add a1, sp, a1
-; RV64-NEXT:    addi a1, a1, 128
-; RV64-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
-; RV64-NEXT:    lui s5, 131072
-; RV64-NEXT:    vand.vx v16, v8, s5
-; RV64-NEXT:    vmul.vv v16, v8, v16
-; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    slli a1, a1, 4
-; RV64-NEXT:    mv s5, a1
-; RV64-NEXT:    slli a1, a1, 4
-; RV64-NEXT:    add a1, a1, s5
-; RV64-NEXT:    add a1, sp, a1
-; RV64-NEXT:    addi a1, a1, 128
-; RV64-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
-; RV64-NEXT:    lui s5, 262144
-; RV64-NEXT:    vand.vx v16, v8, s5
-; RV64-NEXT:    vmul.vv v16, v8, v16
-; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    slli a1, a1, 3
-; RV64-NEXT:    mv s5, a1
-; RV64-NEXT:    slli a1, a1, 5
-; RV64-NEXT:    add a1, a1, s5
-; RV64-NEXT:    add a1, sp, a1
-; RV64-NEXT:    addi a1, a1, 128
-; RV64-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
-; RV64-NEXT:    ld s5, 104(sp) # 8-byte Folded Reload
-; RV64-NEXT:    vand.vx v16, v8, s5
-; RV64-NEXT:    vmul.vv v16, v8, v16
-; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    slli a1, a1, 8
-; RV64-NEXT:    add a1, sp, a1
-; RV64-NEXT:    addi a1, a1, 128
-; RV64-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
-; RV64-NEXT:    ld s5, 96(sp) # 8-byte Folded Reload
-; RV64-NEXT:    vand.vx v16, v8, s5
-; RV64-NEXT:    vmul.vv v16, v8, v16
-; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    slli a1, a1, 3
-; RV64-NEXT:    mv s5, a1
-; RV64-NEXT:    slli a1, a1, 1
-; RV64-NEXT:    add s5, s5, a1
-; RV64-NEXT:    slli a1, a1, 1
-; RV64-NEXT:    add s5, s5, a1
-; RV64-NEXT:    slli a1, a1, 1
-; RV64-NEXT:    add s5, s5, a1
-; RV64-NEXT:    slli a1, a1, 1
-; RV64-NEXT:    add a1, a1, s5
-; RV64-NEXT:    add a1, sp, a1
-; RV64-NEXT:    addi a1, a1, 128
-; RV64-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
-; RV64-NEXT:    ld s5, 88(sp) # 8-byte Folded Reload
-; RV64-NEXT:    vand.vx v16, v8, s5
-; RV64-NEXT:    vmul.vv v16, v8, v16
-; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    slli a1, a1, 4
-; RV64-NEXT:    mv s5, a1
-; RV64-NEXT:    slli a1, a1, 1
-; RV64-NEXT:    add s5, s5, a1
-; RV64-NEXT:    slli a1, a1, 1
-; RV64-NEXT:    add s5, s5, a1
-; RV64-NEXT:    slli a1, a1, 1
-; RV64-NEXT:    add a1, a1, s5
-; RV64-NEXT:    add a1, sp, a1
-; RV64-NEXT:    addi a1, a1, 128
-; RV64-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
-; RV64-NEXT:    ld s5, 80(sp) # 8-byte Folded Reload
-; RV64-NEXT:    vand.vx v16, v8, s5
-; RV64-NEXT:    vmul.vv v16, v8, v16
-; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    slli a1, a1, 3
-; RV64-NEXT:    mv s5, a1
-; RV64-NEXT:    slli a1, a1, 2
-; RV64-NEXT:    add s5, s5, a1
-; RV64-NEXT:    slli a1, a1, 1
-; RV64-NEXT:    add s5, s5, a1
-; RV64-NEXT:    slli a1, a1, 1
-; RV64-NEXT:    add a1, a1, s5
-; RV64-NEXT:    add a1, sp, a1
-; RV64-NEXT:    addi a1, a1, 128
-; RV64-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
-; RV64-NEXT:    ld s5, 72(sp) # 8-byte Folded Reload
-; RV64-NEXT:    vand.vx v16, v8, s5
-; RV64-NEXT:    vmul.vv v16, v8, v16
-; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    slli a1, a1, 5
-; RV64-NEXT:    mv s5, a1
-; RV64-NEXT:    slli a1, a1, 1
-; RV64-NEXT:    add s5, s5, a1
-; RV64-NEXT:    slli a1, a1, 1
-; RV64-NEXT:    add a1, a1, s5
-; RV64-NEXT:    add a1, sp, a1
-; RV64-NEXT:    addi a1, a1, 128
-; RV64-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
-; RV64-NEXT:    ld s5, 64(sp) # 8-byte Folded Reload
-; RV64-NEXT:    vand.vx v16, v8, s5
-; RV64-NEXT:    vmul.vv v16, v8, v16
-; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    slli a1, a1, 3
-; RV64-NEXT:    mv s5, a1
-; RV64-NEXT:    slli a1, a1, 1
-; RV64-NEXT:    add s5, s5, a1
-; RV64-NEXT:    slli a1, a1, 2
-; RV64-NEXT:    add s5, s5, a1
-; RV64-NEXT:    slli a1, a1, 1
-; RV64-NEXT:    add a1, a1, s5
-; RV64-NEXT:    add a1, sp, a1
-; RV64-NEXT:    addi a1, a1, 128
-; RV64-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
-; RV64-NEXT:    ld s5, 56(sp) # 8-byte Folded Reload
-; RV64-NEXT:    vand.vx v16, v8, s5
-; RV64-NEXT:    vmul.vv v16, v8, v16
-; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    slli a1, a1, 4
-; RV64-NEXT:    mv s5, a1
-; RV64-NEXT:    slli a1, a1, 2
-; RV64-NEXT:    add s5, s5, a1
-; RV64-NEXT:    slli a1, a1, 1
-; RV64-NEXT:    add a1, a1, s5
-; RV64-NEXT:    add a1, sp, a1
-; RV64-NEXT:    addi a1, a1, 128
-; RV64-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
-; RV64-NEXT:    ld s5, 48(sp) # 8-byte Folded Reload
-; RV64-NEXT:    vand.vx v16, v8, s5
-; RV64-NEXT:    vmul.vv v16, v8, v16
-; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    slli a1, a1, 3
-; RV64-NEXT:    mv s5, a1
-; RV64-NEXT:    slli a1, a1, 3
-; RV64-NEXT:    add s5, s5, a1
-; RV64-NEXT:    slli a1, a1, 1
-; RV64-NEXT:    add a1, a1, s5
-; RV64-NEXT:    add a1, sp, a1
-; RV64-NEXT:    addi a1, a1, 128
-; RV64-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
-; RV64-NEXT:    ld s5, 40(sp) # 8-byte Folded Reload
-; RV64-NEXT:    vand.vx v16, v8, s5
-; RV64-NEXT:    vmul.vv v16, v8, v16
-; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    slli a1, a1, 6
-; RV64-NEXT:    mv s5, a1
-; RV64-NEXT:    slli a1, a1, 1
-; RV64-NEXT:    add a1, a1, s5
-; RV64-NEXT:    add a1, sp, a1
-; RV64-NEXT:    addi a1, a1, 128
-; RV64-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
-; RV64-NEXT:    ld s5, 32(sp) # 8-byte Folded Reload
-; RV64-NEXT:    vand.vx v16, v8, s5
-; RV64-NEXT:    vmul.vv v16, v8, v16
-; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    slli a1, a1, 3
-; RV64-NEXT:    mv s5, a1
-; RV64-NEXT:    slli a1, a1, 1
-; RV64-NEXT:    add s5, s5, a1
-; RV64-NEXT:    slli a1, a1, 1
-; RV64-NEXT:    add s5, s5, a1
-; RV64-NEXT:    slli a1, a1, 2
-; RV64-NEXT:    add a1, a1, s5
-; RV64-NEXT:    add a1, sp, a1
-; RV64-NEXT:    addi a1, a1, 128
-; RV64-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
-; RV64-NEXT:    ld s5, 24(sp) # 8-byte Folded Reload
-; RV64-NEXT:    vand.vx v16, v8, s5
-; RV64-NEXT:    vmul.vv v16, v8, v16
-; RV64-NEXT:    csrr s5, vlenb
-; RV64-NEXT:    slli s5, s5, 4
-; RV64-NEXT:    mv a1, s5
-; RV64-NEXT:    slli s5, s5, 1
-; RV64-NEXT:    add a1, a1, s5
-; RV64-NEXT:    slli s5, s5, 2
-; RV64-NEXT:    add s5, s5, a1
-; RV64-NEXT:    ld a1, 8(sp) # 8-byte Folded Reload
-; RV64-NEXT:    add s5, sp, s5
-; RV64-NEXT:    addi s5, s5, 128
-; RV64-NEXT:    vs8r.v v16, (s5) # vscale x 64-byte Folded Spill
-; RV64-NEXT:    vand.vx v16, v8, s6
-; RV64-NEXT:    vmul.vv v16, v8, v16
-; RV64-NEXT:    csrr s5, vlenb
-; RV64-NEXT:    slli s5, s5, 3
-; RV64-NEXT:    mv s6, s5
-; RV64-NEXT:    slli s5, s5, 2
-; RV64-NEXT:    add s6, s6, s5
-; RV64-NEXT:    slli s5, s5, 2
-; RV64-NEXT:    add s5, s5, s6
-; RV64-NEXT:    add s5, sp, s5
-; RV64-NEXT:    addi s5, s5, 128
-; RV64-NEXT:    vs8r.v v16, (s5) # vscale x 64-byte Folded Spill
-; RV64-NEXT:    vand.vx v16, v8, s7
-; RV64-NEXT:    vmul.vv v16, v8, v16
-; RV64-NEXT:    csrr s5, vlenb
-; RV64-NEXT:    slli s5, s5, 5
-; RV64-NEXT:    mv s6, s5
-; RV64-NEXT:    slli s5, s5, 2
-; RV64-NEXT:    add s5, s5, s6
-; RV64-NEXT:    add s5, sp, s5
-; RV64-NEXT:    addi s5, s5, 128
-; RV64-NEXT:    vs8r.v v16, (s5) # vscale x 64-byte Folded Spill
-; RV64-NEXT:    vand.vx v16, v8, s8
-; RV64-NEXT:    vmul.vv v16, v8, v16
-; RV64-NEXT:    csrr s5, vlenb
-; RV64-NEXT:    slli s5, s5, 3
-; RV64-NEXT:    mv s6, s5
-; RV64-NEXT:    slli s5, s5, 1
-; RV64-NEXT:    add s6, s6, s5
-; RV64-NEXT:    slli s5, s5, 3
-; RV64-NEXT:    add s5, s5, s6
-; RV64-NEXT:    add s5, sp, s5
-; RV64-NEXT:    addi s5, s5, 128
-; RV64-NEXT:    vs8r.v v16, (s5) # vscale x 64-byte Folded Spill
-; RV64-NEXT:    vand.vx v16, v8, s9
-; RV64-NEXT:    vmul.vv v16, v8, v16
-; RV64-NEXT:    csrr s5, vlenb
-; RV64-NEXT:    slli s5, s5, 4
-; RV64-NEXT:    mv s6, s5
-; RV64-NEXT:    slli s5, s5, 3
-; RV64-NEXT:    add s5, s5, s6
-; RV64-NEXT:    add s5, sp, s5
-; RV64-NEXT:    addi s5, s5, 128
-; RV64-NEXT:    vs8r.v v16, (s5) # vscale x 64-byte Folded Spill
-; RV64-NEXT:    vand.vx v16, v8, s10
-; RV64-NEXT:    vmul.vv v16, v8, v16
-; RV64-NEXT:    csrr s5, vlenb
-; RV64-NEXT:    slli s5, s5, 3
-; RV64-NEXT:    mv s6, s5
-; RV64-NEXT:    slli s5, s5, 4
-; RV64-NEXT:    add s5, s5, s6
-; RV64-NEXT:    add s5, sp, s5
-; RV64-NEXT:    addi s5, s5, 128
-; RV64-NEXT:    vs8r.v v16, (s5) # vscale x 64-byte Folded Spill
-; RV64-NEXT:    vand.vx v16, v8, s11
-; RV64-NEXT:    vmul.vv v16, v8, v16
-; RV64-NEXT:    csrr s5, vlenb
-; RV64-NEXT:    slli s5, s5, 3
-; RV64-NEXT:    mv s6, s5
-; RV64-NEXT:    slli s5, s5, 1
-; RV64-NEXT:    add s6, s6, s5
-; RV64-NEXT:    slli s5, s5, 1
-; RV64-NEXT:    add s6, s6, s5
-; RV64-NEXT:    slli s5, s5, 1
-; RV64-NEXT:    add s5, s5, s6
-; RV64-NEXT:    add s5, sp, s5
-; RV64-NEXT:    addi s5, s5, 128
-; RV64-NEXT:    vs8r.v v16, (s5) # vscale x 64-byte Folded Spill
-; RV64-NEXT:    vand.vx v16, v8, ra
-; RV64-NEXT:    vmul.vv v16, v8, v16
-; RV64-NEXT:    csrr s5, vlenb
-; RV64-NEXT:    slli s5, s5, 3
-; RV64-NEXT:    mv s6, s5
-; RV64-NEXT:    slli s5, s5, 2
-; RV64-NEXT:    add s6, s6, s5
-; RV64-NEXT:    slli s5, s5, 1
-; RV64-NEXT:    add s5, s5, s6
-; RV64-NEXT:    add s5, sp, s5
-; RV64-NEXT:    addi s5, s5, 128
-; RV64-NEXT:    vs8r.v v16, (s5) # vscale x 64-byte Folded Spill
-; RV64-NEXT:    vand.vx v16, v8, s3
-; RV64-NEXT:    vmul.vv v16, v8, v16
-; RV64-NEXT:    csrr s3, vlenb
-; RV64-NEXT:    slli s3, s3, 3
-; RV64-NEXT:    mv s5, s3
-; RV64-NEXT:    slli s3, s3, 3
-; RV64-NEXT:    add s3, s3, s5
-; RV64-NEXT:    add s3, sp, s3
-; RV64-NEXT:    addi s3, s3, 128
-; RV64-NEXT:    vs8r.v v16, (s3) # vscale x 64-byte Folded Spill
-; RV64-NEXT:    vand.vx v16, v8, s2
-; RV64-NEXT:    vmul.vv v16, v8, v16
-; RV64-NEXT:    csrr s2, vlenb
-; RV64-NEXT:    slli s2, s2, 4
-; RV64-NEXT:    mv s3, s2
-; RV64-NEXT:    slli s2, s2, 1
-; RV64-NEXT:    add s2, s2, s3
-; RV64-NEXT:    add s2, sp, s2
-; RV64-NEXT:    addi s2, s2, 128
-; RV64-NEXT:    vs8r.v v16, (s2) # vscale x 64-byte Folded Spill
-; RV64-NEXT:    vand.vx v16, v8, s1
-; RV64-NEXT:    vmul.vv v16, v8, v16
-; RV64-NEXT:    csrr s1, vlenb
-; RV64-NEXT:    slli s1, s1, 7
-; RV64-NEXT:    add s1, sp, s1
-; RV64-NEXT:    addi s1, s1, 128
-; RV64-NEXT:    vs8r.v v16, (s1) # vscale x 64-byte Folded Spill
-; RV64-NEXT:    vand.vx v16, v8, s0
-; RV64-NEXT:    vmul.vv v16, v8, v16
-; RV64-NEXT:    csrr s0, vlenb
-; RV64-NEXT:    slli s0, s0, 4
-; RV64-NEXT:    mv s1, s0
-; RV64-NEXT:    slli s0, s0, 1
-; RV64-NEXT:    add s1, s1, s0
-; RV64-NEXT:    slli s0, s0, 1
-; RV64-NEXT:    add s0, s0, s1
-; RV64-NEXT:    add s0, sp, s0
-; RV64-NEXT:    addi s0, s0, 128
-; RV64-NEXT:    vs8r.v v16, (s0) # vscale x 64-byte Folded Spill
-; RV64-NEXT:    vand.vx v16, v8, t6
-; RV64-NEXT:    vmul.vv v16, v8, v16
-; RV64-NEXT:    csrr t6, vlenb
-; RV64-NEXT:    slli t6, t6, 5
-; RV64-NEXT:    mv s0, t6
-; RV64-NEXT:    slli t6, t6, 1
-; RV64-NEXT:    add t6, t6, s0
-; RV64-NEXT:    add t6, sp, t6
-; RV64-NEXT:    addi t6, t6, 128
-; RV64-NEXT:    vs8r.v v16, (t6) # vscale x 64-byte Folded Spill
-; RV64-NEXT:    vand.vx v16, v8, t5
-; RV64-NEXT:    vmul.vv v16, v8, v16
-; RV64-NEXT:    csrr t5, vlenb
-; RV64-NEXT:    slli t5, t5, 6
-; RV64-NEXT:    add t5, sp, t5
-; RV64-NEXT:    addi t5, t5, 128
-; RV64-NEXT:    vs8r.v v16, (t5) # vscale x 64-byte Folded Spill
-; RV64-NEXT:    vand.vx v16, v8, t4
-; RV64-NEXT:    vmul.vv v16, v8, v16
-; RV64-NEXT:    csrr t4, vlenb
-; RV64-NEXT:    slli t4, t4, 3
-; RV64-NEXT:    mv t5, t4
-; RV64-NEXT:    slli t4, t4, 2
-; RV64-NEXT:    add t4, t4, t5
-; RV64-NEXT:    add t4, sp, t4
-; RV64-NEXT:    addi t4, t4, 128
-; RV64-NEXT:    vs8r.v v16, (t4) # vscale x 64-byte Folded Spill
-; RV64-NEXT:    vand.vx v16, v8, t3
-; RV64-NEXT:    vmul.vv v16, v8, v16
-; RV64-NEXT:    csrr t3, vlenb
-; RV64-NEXT:    slli t3, t3, 3
-; RV64-NEXT:    mv t4, t3
-; RV64-NEXT:    slli t3, t3, 1
-; RV64-NEXT:    add t3, t3, t4
-; RV64-NEXT:    add t3, sp, t3
-; RV64-NEXT:    addi t3, t3, 128
-; RV64-NEXT:    vs8r.v v16, (t3) # vscale x 64-byte Folded Spill
-; RV64-NEXT:    vand.vx v16, v8, t2
-; RV64-NEXT:    vmul.vv v16, v8, v16
-; RV64-NEXT:    csrr t2, vlenb
-; RV64-NEXT:    slli t2, t2, 3
-; RV64-NEXT:    add t2, sp, t2
-; RV64-NEXT:    addi t2, t2, 128
-; RV64-NEXT:    vs8r.v v16, (t2) # vscale x 64-byte Folded Spill
-; RV64-NEXT:    vand.vx v16, v8, t1
-; RV64-NEXT:    vmul.vv v16, v8, v16
-; RV64-NEXT:    csrr t1, vlenb
-; RV64-NEXT:    slli t1, t1, 3
-; RV64-NEXT:    mv t2, t1
-; RV64-NEXT:    slli t1, t1, 1
-; RV64-NEXT:    add t2, t2, t1
-; RV64-NEXT:    slli t1, t1, 2
-; RV64-NEXT:    add t1, t1, t2
-; RV64-NEXT:    add t1, sp, t1
-; RV64-NEXT:    addi t1, t1, 128
-; RV64-NEXT:    vs8r.v v16, (t1) # vscale x 64-byte Folded Spill
-; RV64-NEXT:    vand.vx v16, v8, t0
-; RV64-NEXT:    vmul.vv v16, v8, v16
-; RV64-NEXT:    csrr t0, vlenb
-; RV64-NEXT:    slli t0, t0, 4
-; RV64-NEXT:    mv t1, t0
-; RV64-NEXT:    slli t0, t0, 2
-; RV64-NEXT:    add t0, t0, t1
-; RV64-NEXT:    add t0, sp, t0
-; RV64-NEXT:    addi t0, t0, 128
-; RV64-NEXT:    vs8r.v v16, (t0) # vscale x 64-byte Folded Spill
-; RV64-NEXT:    vand.vx v16, v8, a3
-; RV64-NEXT:    vmul.vv v16, v8, v16
-; RV64-NEXT:    csrr a3, vlenb
-; RV64-NEXT:    slli a3, a3, 3
-; RV64-NEXT:    mv t0, a3
-; RV64-NEXT:    slli a3, a3, 1
-; RV64-NEXT:    add t0, t0, a3
-; RV64-NEXT:    slli a3, a3, 1
-; RV64-NEXT:    add a3, a3, t0
-; RV64-NEXT:    add a3, sp, a3
-; RV64-NEXT:    addi a3, a3, 128
-; RV64-NEXT:    vs8r.v v16, (a3) # vscale x 64-byte Folded Spill
-; RV64-NEXT:    vand.vx v16, v8, a2
-; RV64-NEXT:    vmul.vv v16, v8, v16
-; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    slli a2, a2, 5
-; RV64-NEXT:    add a2, sp, a2
-; RV64-NEXT:    addi a2, a2, 128
-; RV64-NEXT:    vs8r.v v16, (a2) # vscale x 64-byte Folded Spill
-; RV64-NEXT:    vand.vx v16, v8, a1
-; RV64-NEXT:    vmul.vv v16, v8, v16
-; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    slli a1, a1, 4
-; RV64-NEXT:    add a1, sp, a1
-; RV64-NEXT:    addi a1, a1, 128
-; RV64-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
-; RV64-NEXT:    vand.vx v16, v8, a0
-; RV64-NEXT:    vmul.vv v8, v8, v16
-; RV64-NEXT:    addi a0, sp, 128
-; RV64-NEXT:    vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
-; RV64-NEXT:    vxor.vv v8, v24, v8
-; RV64-NEXT:    vxor.vv v8, v8, v0
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 5
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV64-NEXT:    vxor.vv v8, v8, v16
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV64-NEXT:    vxor.vv v8, v8, v16
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 4
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV64-NEXT:    vxor.vv v8, v8, v16
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV64-NEXT:    vxor.vv v8, v8, v16
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 6
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV64-NEXT:    vxor.vv v8, v8, v16
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; RV64-NEXT:    vxor.vv v16, v8, v16
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 4
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; RV64-NEXT:    vxor.vv v16, v16, v24
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; RV64-NEXT:    vxor.vv v16, v16, v24
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 5
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; RV64-NEXT:    vxor.vv v16, v16, v24
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; RV64-NEXT:    vxor.vv v16, v16, v24
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 4
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; RV64-NEXT:    vxor.vv v16, v16, v24
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 4
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; RV64-NEXT:    vxor.vv v16, v16, v24
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 7
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; RV64-NEXT:    vxor.vv v16, v16, v24
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; RV64-NEXT:    vxor.vv v16, v16, v24
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 4
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; RV64-NEXT:    vxor.vv v16, v16, v24
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; RV64-NEXT:    vxor.vv v16, v16, v24
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 5
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; RV64-NEXT:    vxor.vv v16, v16, v24
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; RV64-NEXT:    vxor.vv v16, v16, v24
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 4
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; RV64-NEXT:    vxor.vv v16, v16, v24
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; RV64-NEXT:    vxor.vv v16, v16, v24
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 6
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; RV64-NEXT:    vxor.vv v16, v16, v24
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; RV64-NEXT:    vxor.vv v24, v16, v24
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
-; RV64-NEXT:    vxor.vv v24, v24, v0
-; RV64-NEXT:    li a0, 56
-; RV64-NEXT:    vsll.vx v8, v8, a0
-; RV64-NEXT:    vand.vx v16, v16, s4
-; RV64-NEXT:    li a1, 40
-; RV64-NEXT:    vsll.vx v16, v16, a1
-; RV64-NEXT:    vor.vv v8, v8, v16
-; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    slli a2, a2, 3
-; RV64-NEXT:    mv a3, a2
-; RV64-NEXT:    slli a2, a2, 2
-; RV64-NEXT:    add a3, a3, a2
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    add a3, a3, a2
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    add a3, a3, a2
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    add a2, a2, a3
-; RV64-NEXT:    add a2, sp, a2
-; RV64-NEXT:    addi a2, a2, 128
-; RV64-NEXT:    vs8r.v v8, (a2) # vscale x 64-byte Folded Spill
-; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    slli a2, a2, 4
-; RV64-NEXT:    mv a3, a2
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    add a3, a3, a2
-; RV64-NEXT:    slli a2, a2, 3
-; RV64-NEXT:    add a2, a2, a3
-; RV64-NEXT:    add a2, sp, a2
-; RV64-NEXT:    addi a2, a2, 128
-; RV64-NEXT:    vl8r.v v8, (a2) # vscale x 64-byte Folded Reload
-; RV64-NEXT:    vxor.vv v8, v24, v8
-; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    slli a2, a2, 5
-; RV64-NEXT:    mv a3, a2
-; RV64-NEXT:    slli a2, a2, 3
-; RV64-NEXT:    add a2, a2, a3
-; RV64-NEXT:    add a2, sp, a2
-; RV64-NEXT:    addi a2, a2, 128
-; RV64-NEXT:    vl8r.v v16, (a2) # vscale x 64-byte Folded Reload
-; RV64-NEXT:    vxor.vv v8, v8, v16
-; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    slli a2, a2, 3
-; RV64-NEXT:    mv a3, a2
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    add a3, a3, a2
-; RV64-NEXT:    slli a2, a2, 4
-; RV64-NEXT:    add a2, a2, a3
-; RV64-NEXT:    add a2, sp, a2
-; RV64-NEXT:    addi a2, a2, 128
-; RV64-NEXT:    vl8r.v v16, (a2) # vscale x 64-byte Folded Reload
-; RV64-NEXT:    vxor.vv v8, v8, v16
-; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    slli a2, a2, 4
-; RV64-NEXT:    mv a3, a2
-; RV64-NEXT:    slli a2, a2, 4
-; RV64-NEXT:    add a2, a2, a3
-; RV64-NEXT:    add a2, sp, a2
-; RV64-NEXT:    addi a2, a2, 128
-; RV64-NEXT:    vl8r.v v16, (a2) # vscale x 64-byte Folded Reload
-; RV64-NEXT:    vxor.vv v8, v8, v16
-; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    slli a2, a2, 3
-; RV64-NEXT:    mv a3, a2
-; RV64-NEXT:    slli a2, a2, 5
-; RV64-NEXT:    add a2, a2, a3
-; RV64-NEXT:    add a2, sp, a2
-; RV64-NEXT:    addi a2, a2, 128
-; RV64-NEXT:    vl8r.v v16, (a2) # vscale x 64-byte Folded Reload
-; RV64-NEXT:    vxor.vv v8, v8, v16
-; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    slli a2, a2, 8
-; RV64-NEXT:    add a2, sp, a2
-; RV64-NEXT:    addi a2, a2, 128
-; RV64-NEXT:    vl8r.v v16, (a2) # vscale x 64-byte Folded Reload
-; RV64-NEXT:    vxor.vv v8, v8, v16
-; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    slli a2, a2, 3
-; RV64-NEXT:    mv a3, a2
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    add a3, a3, a2
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    add a3, a3, a2
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    add a3, a3, a2
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    add a2, a2, a3
-; RV64-NEXT:    add a2, sp, a2
-; RV64-NEXT:    addi a2, a2, 128
-; RV64-NEXT:    vl8r.v v16, (a2) # vscale x 64-byte Folded Reload
-; RV64-NEXT:    vxor.vv v8, v8, v16
-; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    slli a2, a2, 4
-; RV64-NEXT:    mv a3, a2
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    add a3, a3, a2
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    add a3, a3, a2
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    add a2, a2, a3
-; RV64-NEXT:    add a2, sp, a2
-; RV64-NEXT:    addi a2, a2, 128
-; RV64-NEXT:    vl8r.v v16, (a2) # vscale x 64-byte Folded Reload
-; RV64-NEXT:    vxor.vv v8, v8, v16
-; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    slli a2, a2, 3
-; RV64-NEXT:    mv a3, a2
-; RV64-NEXT:    slli a2, a2, 2
-; RV64-NEXT:    add a3, a3, a2
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    add a3, a3, a2
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    add a2, a2, a3
-; RV64-NEXT:    add a2, sp, a2
-; RV64-NEXT:    addi a2, a2, 128
-; RV64-NEXT:    vl8r.v v16, (a2) # vscale x 64-byte Folded Reload
-; RV64-NEXT:    vxor.vv v8, v8, v16
-; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    slli a2, a2, 5
-; RV64-NEXT:    mv a3, a2
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    add a3, a3, a2
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    add a2, a2, a3
-; RV64-NEXT:    add a2, sp, a2
-; RV64-NEXT:    addi a2, a2, 128
-; RV64-NEXT:    vl8r.v v16, (a2) # vscale x 64-byte Folded Reload
-; RV64-NEXT:    vxor.vv v8, v8, v16
-; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    slli a2, a2, 3
-; RV64-NEXT:    mv a3, a2
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    add a3, a3, a2
-; RV64-NEXT:    slli a2, a2, 2
-; RV64-NEXT:    add a3, a3, a2
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    add a2, a2, a3
-; RV64-NEXT:    add a2, sp, a2
-; RV64-NEXT:    addi a2, a2, 128
-; RV64-NEXT:    vl8r.v v16, (a2) # vscale x 64-byte Folded Reload
-; RV64-NEXT:    vxor.vv v8, v8, v16
-; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    slli a2, a2, 4
-; RV64-NEXT:    mv a3, a2
-; RV64-NEXT:    slli a2, a2, 2
-; RV64-NEXT:    add a3, a3, a2
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    add a2, a2, a3
-; RV64-NEXT:    add a2, sp, a2
-; RV64-NEXT:    addi a2, a2, 128
-; RV64-NEXT:    vl8r.v v16, (a2) # vscale x 64-byte Folded Reload
-; RV64-NEXT:    vxor.vv v8, v8, v16
-; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    slli a2, a2, 3
-; RV64-NEXT:    mv a3, a2
-; RV64-NEXT:    slli a2, a2, 3
-; RV64-NEXT:    add a3, a3, a2
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    add a2, a2, a3
-; RV64-NEXT:    add a2, sp, a2
-; RV64-NEXT:    addi a2, a2, 128
-; RV64-NEXT:    vl8r.v v16, (a2) # vscale x 64-byte Folded Reload
-; RV64-NEXT:    vxor.vv v8, v8, v16
-; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    slli a2, a2, 6
-; RV64-NEXT:    mv a3, a2
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    add a2, a2, a3
-; RV64-NEXT:    add a2, sp, a2
-; RV64-NEXT:    addi a2, a2, 128
-; RV64-NEXT:    vl8r.v v16, (a2) # vscale x 64-byte Folded Reload
-; RV64-NEXT:    vxor.vv v16, v8, v16
-; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    slli a2, a2, 3
-; RV64-NEXT:    mv a3, a2
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    add a3, a3, a2
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    add a3, a3, a2
-; RV64-NEXT:    slli a2, a2, 2
-; RV64-NEXT:    add a2, a2, a3
-; RV64-NEXT:    add a2, sp, a2
-; RV64-NEXT:    addi a2, a2, 128
-; RV64-NEXT:    vl8r.v v8, (a2) # vscale x 64-byte Folded Reload
-; RV64-NEXT:    vxor.vv v8, v16, v8
-; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    slli a2, a2, 4
-; RV64-NEXT:    mv a3, a2
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    add a3, a3, a2
-; RV64-NEXT:    slli a2, a2, 2
-; RV64-NEXT:    add a2, a2, a3
-; RV64-NEXT:    add a2, sp, a2
-; RV64-NEXT:    addi a2, a2, 128
-; RV64-NEXT:    vl8r.v v24, (a2) # vscale x 64-byte Folded Reload
-; RV64-NEXT:    vxor.vv v8, v8, v24
-; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    slli a2, a2, 3
-; RV64-NEXT:    mv a3, a2
-; RV64-NEXT:    slli a2, a2, 2
-; RV64-NEXT:    add a3, a3, a2
-; RV64-NEXT:    slli a2, a2, 2
-; RV64-NEXT:    add a2, a2, a3
-; RV64-NEXT:    add a2, sp, a2
-; RV64-NEXT:    addi a2, a2, 128
-; RV64-NEXT:    vl8r.v v24, (a2) # vscale x 64-byte Folded Reload
-; RV64-NEXT:    vxor.vv v8, v8, v24
-; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    slli a2, a2, 5
-; RV64-NEXT:    mv a3, a2
-; RV64-NEXT:    slli a2, a2, 2
-; RV64-NEXT:    add a2, a2, a3
-; RV64-NEXT:    add a2, sp, a2
-; RV64-NEXT:    addi a2, a2, 128
-; RV64-NEXT:    vl8r.v v24, (a2) # vscale x 64-byte Folded Reload
-; RV64-NEXT:    vxor.vv v8, v8, v24
-; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    slli a2, a2, 3
-; RV64-NEXT:    mv a3, a2
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    add a3, a3, a2
-; RV64-NEXT:    slli a2, a2, 3
-; RV64-NEXT:    add a2, a2, a3
-; RV64-NEXT:    add a2, sp, a2
-; RV64-NEXT:    addi a2, a2, 128
-; RV64-NEXT:    vl8r.v v24, (a2) # vscale x 64-byte Folded Reload
-; RV64-NEXT:    vxor.vv v8, v8, v24
-; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    slli a2, a2, 4
-; RV64-NEXT:    mv a3, a2
-; RV64-NEXT:    slli a2, a2, 3
-; RV64-NEXT:    add a2, a2, a3
-; RV64-NEXT:    add a2, sp, a2
-; RV64-NEXT:    addi a2, a2, 128
-; RV64-NEXT:    vl8r.v v24, (a2) # vscale x 64-byte Folded Reload
-; RV64-NEXT:    vxor.vv v8, v8, v24
-; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    slli a2, a2, 3
-; RV64-NEXT:    mv a3, a2
-; RV64-NEXT:    slli a2, a2, 4
-; RV64-NEXT:    add a2, a2, a3
-; RV64-NEXT:    add a2, sp, a2
-; RV64-NEXT:    addi a2, a2, 128
-; RV64-NEXT:    vl8r.v v24, (a2) # vscale x 64-byte Folded Reload
-; RV64-NEXT:    vxor.vv v8, v8, v24
-; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    slli a2, a2, 3
-; RV64-NEXT:    mv a3, a2
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    add a3, a3, a2
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    add a3, a3, a2
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    add a2, a2, a3
-; RV64-NEXT:    add a2, sp, a2
-; RV64-NEXT:    addi a2, a2, 128
-; RV64-NEXT:    vl8r.v v24, (a2) # vscale x 64-byte Folded Reload
-; RV64-NEXT:    vxor.vv v8, v8, v24
-; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    slli a2, a2, 3
-; RV64-NEXT:    mv a3, a2
-; RV64-NEXT:    slli a2, a2, 2
-; RV64-NEXT:    add a3, a3, a2
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    add a2, a2, a3
-; RV64-NEXT:    add a2, sp, a2
-; RV64-NEXT:    addi a2, a2, 128
-; RV64-NEXT:    vl8r.v v24, (a2) # vscale x 64-byte Folded Reload
-; RV64-NEXT:    vxor.vv v24, v8, v24
-; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    slli a2, a2, 3
-; RV64-NEXT:    mv a3, a2
-; RV64-NEXT:    slli a2, a2, 3
-; RV64-NEXT:    add a2, a2, a3
-; RV64-NEXT:    add a2, sp, a2
-; RV64-NEXT:    addi a2, a2, 128
-; RV64-NEXT:    vl8r.v v0, (a2) # vscale x 64-byte Folded Reload
-; RV64-NEXT:    vxor.vv v24, v24, v0
-; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    slli a2, a2, 4
-; RV64-NEXT:    mv a3, a2
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    add a2, a2, a3
-; RV64-NEXT:    add a2, sp, a2
-; RV64-NEXT:    addi a2, a2, 128
-; RV64-NEXT:    vl8r.v v0, (a2) # vscale x 64-byte Folded Reload
-; RV64-NEXT:    vxor.vv v24, v24, v0
-; RV64-NEXT:    vsrl.vi v0, v16, 8
-; RV64-NEXT:    vand.vx v0, v0, a4
-; RV64-NEXT:    vsrl.vi v8, v8, 24
-; RV64-NEXT:    lui a2, 4080
-; RV64-NEXT:    vand.vx v8, v8, a2
-; RV64-NEXT:    vor.vv v8, v0, v8
-; RV64-NEXT:    csrr a3, vlenb
-; RV64-NEXT:    slli a3, a3, 5
-; RV64-NEXT:    mv t0, a3
-; RV64-NEXT:    slli a3, a3, 1
-; RV64-NEXT:    add t0, t0, a3
-; RV64-NEXT:    slli a3, a3, 1
-; RV64-NEXT:    add t0, t0, a3
-; RV64-NEXT:    slli a3, a3, 1
-; RV64-NEXT:    add a3, a3, t0
-; RV64-NEXT:    add a3, sp, a3
-; RV64-NEXT:    addi a3, a3, 128
-; RV64-NEXT:    vs8r.v v8, (a3) # vscale x 64-byte Folded Spill
-; RV64-NEXT:    csrr a3, vlenb
-; RV64-NEXT:    slli a3, a3, 7
-; RV64-NEXT:    add a3, sp, a3
-; RV64-NEXT:    addi a3, a3, 128
-; RV64-NEXT:    vl8r.v v8, (a3) # vscale x 64-byte Folded Reload
-; RV64-NEXT:    vxor.vv v8, v24, v8
-; RV64-NEXT:    csrr a3, vlenb
-; RV64-NEXT:    slli a3, a3, 4
-; RV64-NEXT:    mv t0, a3
-; RV64-NEXT:    slli a3, a3, 1
-; RV64-NEXT:    add t0, t0, a3
-; RV64-NEXT:    slli a3, a3, 1
-; RV64-NEXT:    add a3, a3, t0
-; RV64-NEXT:    add a3, sp, a3
-; RV64-NEXT:    addi a3, a3, 128
-; RV64-NEXT:    vl8r.v v24, (a3) # vscale x 64-byte Folded Reload
-; RV64-NEXT:    vxor.vv v8, v8, v24
-; RV64-NEXT:    csrr a3, vlenb
-; RV64-NEXT:    slli a3, a3, 5
-; RV64-NEXT:    mv t0, a3
-; RV64-NEXT:    slli a3, a3, 1
-; RV64-NEXT:    add a3, a3, t0
-; RV64-NEXT:    add a3, sp, a3
-; RV64-NEXT:    addi a3, a3, 128
-; RV64-NEXT:    vl8r.v v24, (a3) # vscale x 64-byte Folded Reload
-; RV64-NEXT:    vxor.vv v8, v8, v24
-; RV64-NEXT:    csrr a3, vlenb
-; RV64-NEXT:    slli a3, a3, 6
-; RV64-NEXT:    add a3, sp, a3
-; RV64-NEXT:    addi a3, a3, 128
-; RV64-NEXT:    vl8r.v v24, (a3) # vscale x 64-byte Folded Reload
-; RV64-NEXT:    vxor.vv v8, v8, v24
-; RV64-NEXT:    csrr a3, vlenb
-; RV64-NEXT:    slli a3, a3, 3
-; RV64-NEXT:    mv t0, a3
-; RV64-NEXT:    slli a3, a3, 2
-; RV64-NEXT:    add a3, a3, t0
-; RV64-NEXT:    add a3, sp, a3
-; RV64-NEXT:    addi a3, a3, 128
-; RV64-NEXT:    vl8r.v v24, (a3) # vscale x 64-byte Folded Reload
-; RV64-NEXT:    vxor.vv v8, v8, v24
-; RV64-NEXT:    csrr a3, vlenb
-; RV64-NEXT:    slli a3, a3, 3
-; RV64-NEXT:    mv t0, a3
-; RV64-NEXT:    slli a3, a3, 1
-; RV64-NEXT:    add a3, a3, t0
-; RV64-NEXT:    add a3, sp, a3
-; RV64-NEXT:    addi a3, a3, 128
-; RV64-NEXT:    vl8r.v v24, (a3) # vscale x 64-byte Folded Reload
-; RV64-NEXT:    vxor.vv v24, v8, v24
-; RV64-NEXT:    csrr a3, vlenb
-; RV64-NEXT:    slli a3, a3, 3
-; RV64-NEXT:    add a3, sp, a3
-; RV64-NEXT:    addi a3, a3, 128
-; RV64-NEXT:    vl8r.v v0, (a3) # vscale x 64-byte Folded Reload
-; RV64-NEXT:    vxor.vv v24, v24, v0
-; RV64-NEXT:    vand.vx v16, v16, a2
-; RV64-NEXT:    vsll.vi v16, v16, 24
-; RV64-NEXT:    vand.vx v0, v8, a4
-; RV64-NEXT:    vsll.vi v0, v0, 8
-; RV64-NEXT:    vor.vv v16, v16, v0
-; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    slli a2, a2, 3
-; RV64-NEXT:    mv a3, a2
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    add a3, a3, a2
-; RV64-NEXT:    slli a2, a2, 2
-; RV64-NEXT:    add a2, a2, a3
-; RV64-NEXT:    add a2, sp, a2
-; RV64-NEXT:    addi a2, a2, 128
-; RV64-NEXT:    vl8r.v v0, (a2) # vscale x 64-byte Folded Reload
-; RV64-NEXT:    vxor.vv v24, v24, v0
-; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    slli a2, a2, 3
-; RV64-NEXT:    mv a3, a2
-; RV64-NEXT:    slli a2, a2, 2
-; RV64-NEXT:    add a3, a3, a2
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    add a3, a3, a2
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    add a3, a3, a2
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    add a2, a2, a3
-; RV64-NEXT:    add a2, sp, a2
-; RV64-NEXT:    addi a2, a2, 128
-; RV64-NEXT:    vl8r.v v0, (a2) # vscale x 64-byte Folded Reload
-; RV64-NEXT:    vor.vv v16, v0, v16
-; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    slli a2, a2, 4
-; RV64-NEXT:    mv a3, a2
-; RV64-NEXT:    slli a2, a2, 2
-; RV64-NEXT:    add a2, a2, a3
-; RV64-NEXT:    add a2, sp, a2
-; RV64-NEXT:    addi a2, a2, 128
-; RV64-NEXT:    vl8r.v v0, (a2) # vscale x 64-byte Folded Reload
-; RV64-NEXT:    vxor.vv v24, v24, v0
-; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    slli a2, a2, 3
-; RV64-NEXT:    mv a3, a2
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    add a3, a3, a2
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    add a2, a2, a3
-; RV64-NEXT:    add a2, sp, a2
-; RV64-NEXT:    addi a2, a2, 128
-; RV64-NEXT:    vl8r.v v0, (a2) # vscale x 64-byte Folded Reload
-; RV64-NEXT:    vxor.vv v24, v24, v0
-; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    slli a2, a2, 5
-; RV64-NEXT:    add a2, sp, a2
-; RV64-NEXT:    addi a2, a2, 128
-; RV64-NEXT:    vl8r.v v0, (a2) # vscale x 64-byte Folded Reload
-; RV64-NEXT:    vxor.vv v24, v24, v0
-; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    slli a2, a2, 4
-; RV64-NEXT:    add a2, sp, a2
-; RV64-NEXT:    addi a2, a2, 128
-; RV64-NEXT:    vl8r.v v0, (a2) # vscale x 64-byte Folded Reload
-; RV64-NEXT:    vxor.vv v24, v24, v0
-; RV64-NEXT:    addi a2, sp, 128
-; RV64-NEXT:    vl8r.v v0, (a2) # vscale x 64-byte Folded Reload
-; RV64-NEXT:    vxor.vv v24, v24, v0
-; RV64-NEXT:    vsrl.vx v8, v8, a1
-; RV64-NEXT:    vand.vx v8, v8, s4
-; RV64-NEXT:    vsrl.vx v24, v24, a0
-; RV64-NEXT:    vor.vv v8, v8, v24
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 5
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; RV64-NEXT:    vor.vv v8, v24, v8
-; RV64-NEXT:    vor.vv v8, v16, v8
-; RV64-NEXT:    vsrl.vi v16, v8, 4
-; RV64-NEXT:    vand.vx v8, v8, a7
-; RV64-NEXT:    vand.vx v16, v16, a7
-; RV64-NEXT:    vsll.vi v8, v8, 4
-; RV64-NEXT:    vor.vv v8, v16, v8
-; RV64-NEXT:    vsrl.vi v16, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a6
-; RV64-NEXT:    vand.vx v16, v16, a6
-; RV64-NEXT:    vsll.vi v8, v8, 2
-; RV64-NEXT:    vor.vv v8, v16, v8
-; RV64-NEXT:    vsrl.vi v16, v8, 1
-; RV64-NEXT:    vand.vx v8, v8, a5
-; RV64-NEXT:    vand.vx v16, v16, a5
-; RV64-NEXT:    vadd.vv v8, v8, v8
-; RV64-NEXT:    vor.vv v8, v16, v8
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 4
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add sp, sp, a0
-; RV64-NEXT:    ld ra, 232(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s0, 224(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s1, 216(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s2, 208(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s3, 200(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s4, 192(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s5, 184(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s6, 176(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s7, 168(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s8, 160(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s9, 152(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s10, 144(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s11, 136(sp) # 8-byte Folded Reload
-; RV64-NEXT:    addi sp, sp, 240
-; RV64-NEXT:    ret
-  %a = call <vscale x 8 x i64> @llvm.clmulr.nxv8i64(<vscale x 8 x i64> %x, <vscale x 8 x i64> %y)
-  ret <vscale x 8 x i64> %a
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-clmul.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-clmul.ll
index 1c00086064133..56379e0b55e10 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-clmul.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-clmul.ll
@@ -4627,14740 +4627,3 @@ define <8 x i64> @clmul_v8i64(<8 x i64> %x, <8 x i64> %y) nounwind {
   %a = call <8 x i64> @llvm.clmul.v8i64(<8 x i64> %x, <8 x i64> %y)
   ret <8 x i64> %a
 }
-
-define <1 x i32> @clmulr_v1i32(<1 x i32> %x, <1 x i32> %y) nounwind {
-; CHECK-LABEL: clmulr_v1i32:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    sub sp, sp, a0
-; CHECK-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT:    vsrl.vi v9, v8, 8
-; CHECK-NEXT:    lui a4, 16
-; CHECK-NEXT:    vsrl.vi v10, v8, 24
-; CHECK-NEXT:    vsll.vi v11, v8, 24
-; CHECK-NEXT:    lui a0, 61681
-; CHECK-NEXT:    lui a1, 209715
-; CHECK-NEXT:    lui a5, 349525
-; CHECK-NEXT:    li a6, 16
-; CHECK-NEXT:    addi a3, a4, -256
-; CHECK-NEXT:    addi a2, a0, -241
-; CHECK-NEXT:    addi a1, a1, 819
-; CHECK-NEXT:    addi a0, a5, 1365
-; CHECK-NEXT:    vand.vx v9, v9, a3
-; CHECK-NEXT:    vand.vx v8, v8, a3
-; CHECK-NEXT:    vor.vv v9, v9, v10
-; CHECK-NEXT:    vsll.vi v8, v8, 8
-; CHECK-NEXT:    vor.vv v8, v11, v8
-; CHECK-NEXT:    vor.vv v8, v8, v9
-; CHECK-NEXT:    vsrl.vi v9, v8, 4
-; CHECK-NEXT:    vand.vx v8, v8, a2
-; CHECK-NEXT:    vand.vx v9, v9, a2
-; CHECK-NEXT:    vsll.vi v8, v8, 4
-; CHECK-NEXT:    vor.vv v8, v9, v8
-; CHECK-NEXT:    vsrl.vi v9, v8, 2
-; CHECK-NEXT:    vand.vx v8, v8, a1
-; CHECK-NEXT:    vand.vx v9, v9, a1
-; CHECK-NEXT:    vsll.vi v8, v8, 2
-; CHECK-NEXT:    vor.vv v8, v9, v8
-; CHECK-NEXT:    vsrl.vi v9, v8, 1
-; CHECK-NEXT:    vand.vx v8, v8, a0
-; CHECK-NEXT:    vand.vx v9, v9, a0
-; CHECK-NEXT:    vadd.vv v8, v8, v8
-; CHECK-NEXT:    vor.vv v8, v9, v8
-; CHECK-NEXT:    vand.vx v9, v8, a6
-; CHECK-NEXT:    li a5, 32
-; CHECK-NEXT:    vand.vx v10, v8, a5
-; CHECK-NEXT:    li a5, 64
-; CHECK-NEXT:    vand.vx v11, v8, a5
-; CHECK-NEXT:    li a5, 128
-; CHECK-NEXT:    vand.vx v12, v8, a5
-; CHECK-NEXT:    li a5, 256
-; CHECK-NEXT:    vand.vx v13, v8, a5
-; CHECK-NEXT:    li a5, 512
-; CHECK-NEXT:    vand.vx v14, v8, a5
-; CHECK-NEXT:    li a5, 1024
-; CHECK-NEXT:    vand.vx v15, v8, a5
-; CHECK-NEXT:    li a5, 1
-; CHECK-NEXT:    slli a5, a5, 11
-; CHECK-NEXT:    vand.vx v16, v8, a5
-; CHECK-NEXT:    lui a5, 1
-; CHECK-NEXT:    vand.vx v17, v8, a5
-; CHECK-NEXT:    lui a5, 2
-; CHECK-NEXT:    vand.vx v18, v8, a5
-; CHECK-NEXT:    lui a5, 4
-; CHECK-NEXT:    vand.vx v19, v8, a5
-; CHECK-NEXT:    lui a5, 8
-; CHECK-NEXT:    vand.vx v20, v8, a5
-; CHECK-NEXT:    lui a5, 32
-; CHECK-NEXT:    vand.vx v21, v8, a4
-; CHECK-NEXT:    lui a4, 64
-; CHECK-NEXT:    vand.vx v22, v8, a5
-; CHECK-NEXT:    lui a5, 128
-; CHECK-NEXT:    vand.vx v23, v8, a4
-; CHECK-NEXT:    lui a4, 256
-; CHECK-NEXT:    vand.vx v24, v8, a5
-; CHECK-NEXT:    lui a5, 512
-; CHECK-NEXT:    vand.vx v25, v8, a4
-; CHECK-NEXT:    lui a4, 1024
-; CHECK-NEXT:    vand.vx v26, v8, a5
-; CHECK-NEXT:    lui a5, 2048
-; CHECK-NEXT:    vand.vx v27, v8, a4
-; CHECK-NEXT:    lui a4, 4096
-; CHECK-NEXT:    vand.vx v28, v8, a5
-; CHECK-NEXT:    lui a5, 8192
-; CHECK-NEXT:    vand.vx v29, v8, a4
-; CHECK-NEXT:    lui a4, 16384
-; CHECK-NEXT:    vand.vx v30, v8, a5
-; CHECK-NEXT:    lui a5, 32768
-; CHECK-NEXT:    vand.vx v31, v8, a4
-; CHECK-NEXT:    lui a4, 65536
-; CHECK-NEXT:    vand.vx v7, v8, a5
-; CHECK-NEXT:    lui a5, 131072
-; CHECK-NEXT:    vand.vx v6, v8, a4
-; CHECK-NEXT:    lui a4, 262144
-; CHECK-NEXT:    vand.vx v5, v8, a5
-; CHECK-NEXT:    lui a5, 524288
-; CHECK-NEXT:    vand.vi v4, v8, 2
-; CHECK-NEXT:    vand.vi v3, v8, 1
-; CHECK-NEXT:    vand.vi v2, v8, 4
-; CHECK-NEXT:    vand.vi v1, v8, 8
-; CHECK-NEXT:    vand.vx v0, v8, a4
-; CHECK-NEXT:    vmul.vv v4, v8, v4
-; CHECK-NEXT:    addi a4, sp, 16
-; CHECK-NEXT:    vs1r.v v4, (a4) # vscale x 8-byte Folded Spill
-; CHECK-NEXT:    vmul.vv v3, v8, v3
-; CHECK-NEXT:    vmul.vv v2, v8, v2
-; CHECK-NEXT:    vmul.vv v1, v8, v1
-; CHECK-NEXT:    vmul.vv v9, v8, v9
-; CHECK-NEXT:    vmul.vv v10, v8, v10
-; CHECK-NEXT:    vmul.vv v11, v8, v11
-; CHECK-NEXT:    vmul.vv v12, v8, v12
-; CHECK-NEXT:    vmul.vv v13, v8, v13
-; CHECK-NEXT:    vmul.vv v14, v8, v14
-; CHECK-NEXT:    vmul.vv v15, v8, v15
-; CHECK-NEXT:    vmul.vv v16, v8, v16
-; CHECK-NEXT:    vmul.vv v17, v8, v17
-; CHECK-NEXT:    vmul.vv v18, v8, v18
-; CHECK-NEXT:    vmul.vv v19, v8, v19
-; CHECK-NEXT:    vmul.vv v20, v8, v20
-; CHECK-NEXT:    vmul.vv v21, v8, v21
-; CHECK-NEXT:    vmul.vv v22, v8, v22
-; CHECK-NEXT:    vmul.vv v23, v8, v23
-; CHECK-NEXT:    vmul.vv v24, v8, v24
-; CHECK-NEXT:    vmul.vv v25, v8, v25
-; CHECK-NEXT:    vmul.vv v26, v8, v26
-; CHECK-NEXT:    vmul.vv v27, v8, v27
-; CHECK-NEXT:    vmul.vv v28, v8, v28
-; CHECK-NEXT:    vmul.vv v29, v8, v29
-; CHECK-NEXT:    vmul.vv v30, v8, v30
-; CHECK-NEXT:    vmul.vv v31, v8, v31
-; CHECK-NEXT:    vmul.vv v7, v8, v7
-; CHECK-NEXT:    vmul.vv v6, v8, v6
-; CHECK-NEXT:    vmul.vv v5, v8, v5
-; CHECK-NEXT:    vmul.vv v0, v8, v0
-; CHECK-NEXT:    vand.vx v4, v8, a5
-; CHECK-NEXT:    vmul.vv v8, v8, v4
-; CHECK-NEXT:    vl1r.v v4, (a4) # vscale x 8-byte Folded Reload
-; CHECK-NEXT:    vxor.vv v4, v3, v4
-; CHECK-NEXT:    vxor.vv v4, v4, v2
-; CHECK-NEXT:    vxor.vv v4, v4, v1
-; CHECK-NEXT:    vxor.vv v9, v4, v9
-; CHECK-NEXT:    vxor.vv v9, v9, v10
-; CHECK-NEXT:    vxor.vv v9, v9, v11
-; CHECK-NEXT:    vxor.vv v9, v9, v12
-; CHECK-NEXT:    vxor.vv v9, v9, v13
-; CHECK-NEXT:    vxor.vv v9, v9, v14
-; CHECK-NEXT:    vxor.vv v9, v9, v15
-; CHECK-NEXT:    vxor.vv v9, v9, v16
-; CHECK-NEXT:    vxor.vv v9, v9, v17
-; CHECK-NEXT:    vxor.vv v9, v9, v18
-; CHECK-NEXT:    vxor.vv v9, v9, v19
-; CHECK-NEXT:    vxor.vv v9, v9, v20
-; CHECK-NEXT:    vxor.vv v9, v9, v21
-; CHECK-NEXT:    vxor.vv v9, v9, v22
-; CHECK-NEXT:    vxor.vv v9, v9, v23
-; CHECK-NEXT:    vxor.vv v9, v9, v24
-; CHECK-NEXT:    vxor.vv v9, v9, v25
-; CHECK-NEXT:    vxor.vv v9, v9, v26
-; CHECK-NEXT:    vxor.vv v9, v9, v27
-; CHECK-NEXT:    vxor.vv v9, v9, v28
-; CHECK-NEXT:    vxor.vv v9, v9, v29
-; CHECK-NEXT:    vxor.vv v9, v9, v30
-; CHECK-NEXT:    vxor.vv v9, v9, v31
-; CHECK-NEXT:    vxor.vv v9, v9, v7
-; CHECK-NEXT:    vxor.vv v9, v9, v6
-; CHECK-NEXT:    vxor.vv v9, v9, v5
-; CHECK-NEXT:    vxor.vv v9, v9, v0
-; CHECK-NEXT:    vxor.vv v8, v9, v8
-; CHECK-NEXT:    vsrl.vi v9, v8, 8
-; CHECK-NEXT:    vsrl.vi v10, v8, 24
-; CHECK-NEXT:    vand.vx v9, v9, a3
-; CHECK-NEXT:    vor.vv v9, v9, v10
-; CHECK-NEXT:    vsll.vi v10, v8, 24
-; CHECK-NEXT:    vand.vx v8, v8, a3
-; CHECK-NEXT:    vsll.vi v8, v8, 8
-; CHECK-NEXT:    vor.vv v8, v10, v8
-; CHECK-NEXT:    vor.vv v8, v8, v9
-; CHECK-NEXT:    vsrl.vi v9, v8, 4
-; CHECK-NEXT:    vand.vx v8, v8, a2
-; CHECK-NEXT:    vand.vx v9, v9, a2
-; CHECK-NEXT:    vsll.vi v8, v8, 4
-; CHECK-NEXT:    vor.vv v8, v9, v8
-; CHECK-NEXT:    vsrl.vi v9, v8, 2
-; CHECK-NEXT:    vand.vx v8, v8, a1
-; CHECK-NEXT:    vand.vx v9, v9, a1
-; CHECK-NEXT:    vsll.vi v8, v8, 2
-; CHECK-NEXT:    vor.vv v8, v9, v8
-; CHECK-NEXT:    vsrl.vi v9, v8, 1
-; CHECK-NEXT:    vand.vx v8, v8, a0
-; CHECK-NEXT:    vand.vx v9, v9, a0
-; CHECK-NEXT:    vadd.vv v8, v8, v8
-; CHECK-NEXT:    vor.vv v8, v9, v8
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    add sp, sp, a0
-; CHECK-NEXT:    addi sp, sp, 16
-; CHECK-NEXT:    ret
-  %a = call <1 x i32> @llvm.clmulr.v1i32(<1 x i32> %x, <1 x i32> %y)
-  ret <1 x i32> %a
-}
-
-define <2 x i32> @clmulr_v2i32(<2 x i32> %x, <2 x i32> %y) nounwind {
-; CHECK-LABEL: clmulr_v2i32:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    sub sp, sp, a0
-; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT:    vsrl.vi v9, v8, 8
-; CHECK-NEXT:    lui a4, 16
-; CHECK-NEXT:    vsrl.vi v10, v8, 24
-; CHECK-NEXT:    vsll.vi v11, v8, 24
-; CHECK-NEXT:    lui a0, 61681
-; CHECK-NEXT:    lui a1, 209715
-; CHECK-NEXT:    lui a5, 349525
-; CHECK-NEXT:    li a6, 16
-; CHECK-NEXT:    addi a3, a4, -256
-; CHECK-NEXT:    addi a2, a0, -241
-; CHECK-NEXT:    addi a1, a1, 819
-; CHECK-NEXT:    addi a0, a5, 1365
-; CHECK-NEXT:    vand.vx v9, v9, a3
-; CHECK-NEXT:    vand.vx v8, v8, a3
-; CHECK-NEXT:    vor.vv v9, v9, v10
-; CHECK-NEXT:    vsll.vi v8, v8, 8
-; CHECK-NEXT:    vor.vv v8, v11, v8
-; CHECK-NEXT:    vor.vv v8, v8, v9
-; CHECK-NEXT:    vsrl.vi v9, v8, 4
-; CHECK-NEXT:    vand.vx v8, v8, a2
-; CHECK-NEXT:    vand.vx v9, v9, a2
-; CHECK-NEXT:    vsll.vi v8, v8, 4
-; CHECK-NEXT:    vor.vv v8, v9, v8
-; CHECK-NEXT:    vsrl.vi v9, v8, 2
-; CHECK-NEXT:    vand.vx v8, v8, a1
-; CHECK-NEXT:    vand.vx v9, v9, a1
-; CHECK-NEXT:    vsll.vi v8, v8, 2
-; CHECK-NEXT:    vor.vv v8, v9, v8
-; CHECK-NEXT:    vsrl.vi v9, v8, 1
-; CHECK-NEXT:    vand.vx v8, v8, a0
-; CHECK-NEXT:    vand.vx v9, v9, a0
-; CHECK-NEXT:    vadd.vv v8, v8, v8
-; CHECK-NEXT:    vor.vv v8, v9, v8
-; CHECK-NEXT:    vand.vx v9, v8, a6
-; CHECK-NEXT:    li a5, 32
-; CHECK-NEXT:    vand.vx v10, v8, a5
-; CHECK-NEXT:    li a5, 64
-; CHECK-NEXT:    vand.vx v11, v8, a5
-; CHECK-NEXT:    li a5, 128
-; CHECK-NEXT:    vand.vx v12, v8, a5
-; CHECK-NEXT:    li a5, 256
-; CHECK-NEXT:    vand.vx v13, v8, a5
-; CHECK-NEXT:    li a5, 512
-; CHECK-NEXT:    vand.vx v14, v8, a5
-; CHECK-NEXT:    li a5, 1024
-; CHECK-NEXT:    vand.vx v15, v8, a5
-; CHECK-NEXT:    li a5, 1
-; CHECK-NEXT:    slli a5, a5, 11
-; CHECK-NEXT:    vand.vx v16, v8, a5
-; CHECK-NEXT:    lui a5, 1
-; CHECK-NEXT:    vand.vx v17, v8, a5
-; CHECK-NEXT:    lui a5, 2
-; CHECK-NEXT:    vand.vx v18, v8, a5
-; CHECK-NEXT:    lui a5, 4
-; CHECK-NEXT:    vand.vx v19, v8, a5
-; CHECK-NEXT:    lui a5, 8
-; CHECK-NEXT:    vand.vx v20, v8, a5
-; CHECK-NEXT:    lui a5, 32
-; CHECK-NEXT:    vand.vx v21, v8, a4
-; CHECK-NEXT:    lui a4, 64
-; CHECK-NEXT:    vand.vx v22, v8, a5
-; CHECK-NEXT:    lui a5, 128
-; CHECK-NEXT:    vand.vx v23, v8, a4
-; CHECK-NEXT:    lui a4, 256
-; CHECK-NEXT:    vand.vx v24, v8, a5
-; CHECK-NEXT:    lui a5, 512
-; CHECK-NEXT:    vand.vx v25, v8, a4
-; CHECK-NEXT:    lui a4, 1024
-; CHECK-NEXT:    vand.vx v26, v8, a5
-; CHECK-NEXT:    lui a5, 2048
-; CHECK-NEXT:    vand.vx v27, v8, a4
-; CHECK-NEXT:    lui a4, 4096
-; CHECK-NEXT:    vand.vx v28, v8, a5
-; CHECK-NEXT:    lui a5, 8192
-; CHECK-NEXT:    vand.vx v29, v8, a4
-; CHECK-NEXT:    lui a4, 16384
-; CHECK-NEXT:    vand.vx v30, v8, a5
-; CHECK-NEXT:    lui a5, 32768
-; CHECK-NEXT:    vand.vx v31, v8, a4
-; CHECK-NEXT:    lui a4, 65536
-; CHECK-NEXT:    vand.vx v7, v8, a5
-; CHECK-NEXT:    lui a5, 131072
-; CHECK-NEXT:    vand.vx v6, v8, a4
-; CHECK-NEXT:    lui a4, 262144
-; CHECK-NEXT:    vand.vx v5, v8, a5
-; CHECK-NEXT:    lui a5, 524288
-; CHECK-NEXT:    vand.vi v4, v8, 2
-; CHECK-NEXT:    vand.vi v3, v8, 1
-; CHECK-NEXT:    vand.vi v2, v8, 4
-; CHECK-NEXT:    vand.vi v1, v8, 8
-; CHECK-NEXT:    vand.vx v0, v8, a4
-; CHECK-NEXT:    vmul.vv v4, v8, v4
-; CHECK-NEXT:    addi a4, sp, 16
-; CHECK-NEXT:    vs1r.v v4, (a4) # vscale x 8-byte Folded Spill
-; CHECK-NEXT:    vmul.vv v3, v8, v3
-; CHECK-NEXT:    vmul.vv v2, v8, v2
-; CHECK-NEXT:    vmul.vv v1, v8, v1
-; CHECK-NEXT:    vmul.vv v9, v8, v9
-; CHECK-NEXT:    vmul.vv v10, v8, v10
-; CHECK-NEXT:    vmul.vv v11, v8, v11
-; CHECK-NEXT:    vmul.vv v12, v8, v12
-; CHECK-NEXT:    vmul.vv v13, v8, v13
-; CHECK-NEXT:    vmul.vv v14, v8, v14
-; CHECK-NEXT:    vmul.vv v15, v8, v15
-; CHECK-NEXT:    vmul.vv v16, v8, v16
-; CHECK-NEXT:    vmul.vv v17, v8, v17
-; CHECK-NEXT:    vmul.vv v18, v8, v18
-; CHECK-NEXT:    vmul.vv v19, v8, v19
-; CHECK-NEXT:    vmul.vv v20, v8, v20
-; CHECK-NEXT:    vmul.vv v21, v8, v21
-; CHECK-NEXT:    vmul.vv v22, v8, v22
-; CHECK-NEXT:    vmul.vv v23, v8, v23
-; CHECK-NEXT:    vmul.vv v24, v8, v24
-; CHECK-NEXT:    vmul.vv v25, v8, v25
-; CHECK-NEXT:    vmul.vv v26, v8, v26
-; CHECK-NEXT:    vmul.vv v27, v8, v27
-; CHECK-NEXT:    vmul.vv v28, v8, v28
-; CHECK-NEXT:    vmul.vv v29, v8, v29
-; CHECK-NEXT:    vmul.vv v30, v8, v30
-; CHECK-NEXT:    vmul.vv v31, v8, v31
-; CHECK-NEXT:    vmul.vv v7, v8, v7
-; CHECK-NEXT:    vmul.vv v6, v8, v6
-; CHECK-NEXT:    vmul.vv v5, v8, v5
-; CHECK-NEXT:    vmul.vv v0, v8, v0
-; CHECK-NEXT:    vand.vx v4, v8, a5
-; CHECK-NEXT:    vmul.vv v8, v8, v4
-; CHECK-NEXT:    vl1r.v v4, (a4) # vscale x 8-byte Folded Reload
-; CHECK-NEXT:    vxor.vv v4, v3, v4
-; CHECK-NEXT:    vxor.vv v4, v4, v2
-; CHECK-NEXT:    vxor.vv v4, v4, v1
-; CHECK-NEXT:    vxor.vv v9, v4, v9
-; CHECK-NEXT:    vxor.vv v9, v9, v10
-; CHECK-NEXT:    vxor.vv v9, v9, v11
-; CHECK-NEXT:    vxor.vv v9, v9, v12
-; CHECK-NEXT:    vxor.vv v9, v9, v13
-; CHECK-NEXT:    vxor.vv v9, v9, v14
-; CHECK-NEXT:    vxor.vv v9, v9, v15
-; CHECK-NEXT:    vxor.vv v9, v9, v16
-; CHECK-NEXT:    vxor.vv v9, v9, v17
-; CHECK-NEXT:    vxor.vv v9, v9, v18
-; CHECK-NEXT:    vxor.vv v9, v9, v19
-; CHECK-NEXT:    vxor.vv v9, v9, v20
-; CHECK-NEXT:    vxor.vv v9, v9, v21
-; CHECK-NEXT:    vxor.vv v9, v9, v22
-; CHECK-NEXT:    vxor.vv v9, v9, v23
-; CHECK-NEXT:    vxor.vv v9, v9, v24
-; CHECK-NEXT:    vxor.vv v9, v9, v25
-; CHECK-NEXT:    vxor.vv v9, v9, v26
-; CHECK-NEXT:    vxor.vv v9, v9, v27
-; CHECK-NEXT:    vxor.vv v9, v9, v28
-; CHECK-NEXT:    vxor.vv v9, v9, v29
-; CHECK-NEXT:    vxor.vv v9, v9, v30
-; CHECK-NEXT:    vxor.vv v9, v9, v31
-; CHECK-NEXT:    vxor.vv v9, v9, v7
-; CHECK-NEXT:    vxor.vv v9, v9, v6
-; CHECK-NEXT:    vxor.vv v9, v9, v5
-; CHECK-NEXT:    vxor.vv v9, v9, v0
-; CHECK-NEXT:    vxor.vv v8, v9, v8
-; CHECK-NEXT:    vsrl.vi v9, v8, 8
-; CHECK-NEXT:    vsrl.vi v10, v8, 24
-; CHECK-NEXT:    vand.vx v9, v9, a3
-; CHECK-NEXT:    vor.vv v9, v9, v10
-; CHECK-NEXT:    vsll.vi v10, v8, 24
-; CHECK-NEXT:    vand.vx v8, v8, a3
-; CHECK-NEXT:    vsll.vi v8, v8, 8
-; CHECK-NEXT:    vor.vv v8, v10, v8
-; CHECK-NEXT:    vor.vv v8, v8, v9
-; CHECK-NEXT:    vsrl.vi v9, v8, 4
-; CHECK-NEXT:    vand.vx v8, v8, a2
-; CHECK-NEXT:    vand.vx v9, v9, a2
-; CHECK-NEXT:    vsll.vi v8, v8, 4
-; CHECK-NEXT:    vor.vv v8, v9, v8
-; CHECK-NEXT:    vsrl.vi v9, v8, 2
-; CHECK-NEXT:    vand.vx v8, v8, a1
-; CHECK-NEXT:    vand.vx v9, v9, a1
-; CHECK-NEXT:    vsll.vi v8, v8, 2
-; CHECK-NEXT:    vor.vv v8, v9, v8
-; CHECK-NEXT:    vsrl.vi v9, v8, 1
-; CHECK-NEXT:    vand.vx v8, v8, a0
-; CHECK-NEXT:    vand.vx v9, v9, a0
-; CHECK-NEXT:    vadd.vv v8, v8, v8
-; CHECK-NEXT:    vor.vv v8, v9, v8
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    add sp, sp, a0
-; CHECK-NEXT:    addi sp, sp, 16
-; CHECK-NEXT:    ret
-  %a = call <2 x i32> @llvm.clmulr.v2i32(<2 x i32> %x, <2 x i32> %y)
-  ret <2 x i32> %a
-}
-
-define <4 x i32> @clmulr_v4i32(<4 x i32> %x, <4 x i32> %y) nounwind {
-; CHECK-LABEL: clmulr_v4i32:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    sub sp, sp, a0
-; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vsrl.vi v9, v8, 8
-; CHECK-NEXT:    lui a4, 16
-; CHECK-NEXT:    vsrl.vi v10, v8, 24
-; CHECK-NEXT:    vsll.vi v11, v8, 24
-; CHECK-NEXT:    lui a0, 61681
-; CHECK-NEXT:    lui a1, 209715
-; CHECK-NEXT:    lui a5, 349525
-; CHECK-NEXT:    li a6, 16
-; CHECK-NEXT:    addi a3, a4, -256
-; CHECK-NEXT:    addi a2, a0, -241
-; CHECK-NEXT:    addi a1, a1, 819
-; CHECK-NEXT:    addi a0, a5, 1365
-; CHECK-NEXT:    vand.vx v9, v9, a3
-; CHECK-NEXT:    vand.vx v8, v8, a3
-; CHECK-NEXT:    vor.vv v9, v9, v10
-; CHECK-NEXT:    vsll.vi v8, v8, 8
-; CHECK-NEXT:    vor.vv v8, v11, v8
-; CHECK-NEXT:    vor.vv v8, v8, v9
-; CHECK-NEXT:    vsrl.vi v9, v8, 4
-; CHECK-NEXT:    vand.vx v8, v8, a2
-; CHECK-NEXT:    vand.vx v9, v9, a2
-; CHECK-NEXT:    vsll.vi v8, v8, 4
-; CHECK-NEXT:    vor.vv v8, v9, v8
-; CHECK-NEXT:    vsrl.vi v9, v8, 2
-; CHECK-NEXT:    vand.vx v8, v8, a1
-; CHECK-NEXT:    vand.vx v9, v9, a1
-; CHECK-NEXT:    vsll.vi v8, v8, 2
-; CHECK-NEXT:    vor.vv v8, v9, v8
-; CHECK-NEXT:    vsrl.vi v9, v8, 1
-; CHECK-NEXT:    vand.vx v8, v8, a0
-; CHECK-NEXT:    vand.vx v9, v9, a0
-; CHECK-NEXT:    vadd.vv v8, v8, v8
-; CHECK-NEXT:    vor.vv v8, v9, v8
-; CHECK-NEXT:    vand.vx v9, v8, a6
-; CHECK-NEXT:    li a5, 32
-; CHECK-NEXT:    vand.vx v10, v8, a5
-; CHECK-NEXT:    li a5, 64
-; CHECK-NEXT:    vand.vx v11, v8, a5
-; CHECK-NEXT:    li a5, 128
-; CHECK-NEXT:    vand.vx v12, v8, a5
-; CHECK-NEXT:    li a5, 256
-; CHECK-NEXT:    vand.vx v13, v8, a5
-; CHECK-NEXT:    li a5, 512
-; CHECK-NEXT:    vand.vx v14, v8, a5
-; CHECK-NEXT:    li a5, 1024
-; CHECK-NEXT:    vand.vx v15, v8, a5
-; CHECK-NEXT:    li a5, 1
-; CHECK-NEXT:    slli a5, a5, 11
-; CHECK-NEXT:    vand.vx v16, v8, a5
-; CHECK-NEXT:    lui a5, 1
-; CHECK-NEXT:    vand.vx v17, v8, a5
-; CHECK-NEXT:    lui a5, 2
-; CHECK-NEXT:    vand.vx v18, v8, a5
-; CHECK-NEXT:    lui a5, 4
-; CHECK-NEXT:    vand.vx v19, v8, a5
-; CHECK-NEXT:    lui a5, 8
-; CHECK-NEXT:    vand.vx v20, v8, a5
-; CHECK-NEXT:    lui a5, 32
-; CHECK-NEXT:    vand.vx v21, v8, a4
-; CHECK-NEXT:    lui a4, 64
-; CHECK-NEXT:    vand.vx v22, v8, a5
-; CHECK-NEXT:    lui a5, 128
-; CHECK-NEXT:    vand.vx v23, v8, a4
-; CHECK-NEXT:    lui a4, 256
-; CHECK-NEXT:    vand.vx v24, v8, a5
-; CHECK-NEXT:    lui a5, 512
-; CHECK-NEXT:    vand.vx v25, v8, a4
-; CHECK-NEXT:    lui a4, 1024
-; CHECK-NEXT:    vand.vx v26, v8, a5
-; CHECK-NEXT:    lui a5, 2048
-; CHECK-NEXT:    vand.vx v27, v8, a4
-; CHECK-NEXT:    lui a4, 4096
-; CHECK-NEXT:    vand.vx v28, v8, a5
-; CHECK-NEXT:    lui a5, 8192
-; CHECK-NEXT:    vand.vx v29, v8, a4
-; CHECK-NEXT:    lui a4, 16384
-; CHECK-NEXT:    vand.vx v30, v8, a5
-; CHECK-NEXT:    lui a5, 32768
-; CHECK-NEXT:    vand.vx v31, v8, a4
-; CHECK-NEXT:    lui a4, 65536
-; CHECK-NEXT:    vand.vx v7, v8, a5
-; CHECK-NEXT:    lui a5, 131072
-; CHECK-NEXT:    vand.vx v6, v8, a4
-; CHECK-NEXT:    lui a4, 262144
-; CHECK-NEXT:    vand.vx v5, v8, a5
-; CHECK-NEXT:    lui a5, 524288
-; CHECK-NEXT:    vand.vi v4, v8, 2
-; CHECK-NEXT:    vand.vi v3, v8, 1
-; CHECK-NEXT:    vand.vi v2, v8, 4
-; CHECK-NEXT:    vand.vi v1, v8, 8
-; CHECK-NEXT:    vand.vx v0, v8, a4
-; CHECK-NEXT:    vmul.vv v4, v8, v4
-; CHECK-NEXT:    addi a4, sp, 16
-; CHECK-NEXT:    vs1r.v v4, (a4) # vscale x 8-byte Folded Spill
-; CHECK-NEXT:    vmul.vv v3, v8, v3
-; CHECK-NEXT:    vmul.vv v2, v8, v2
-; CHECK-NEXT:    vmul.vv v1, v8, v1
-; CHECK-NEXT:    vmul.vv v9, v8, v9
-; CHECK-NEXT:    vmul.vv v10, v8, v10
-; CHECK-NEXT:    vmul.vv v11, v8, v11
-; CHECK-NEXT:    vmul.vv v12, v8, v12
-; CHECK-NEXT:    vmul.vv v13, v8, v13
-; CHECK-NEXT:    vmul.vv v14, v8, v14
-; CHECK-NEXT:    vmul.vv v15, v8, v15
-; CHECK-NEXT:    vmul.vv v16, v8, v16
-; CHECK-NEXT:    vmul.vv v17, v8, v17
-; CHECK-NEXT:    vmul.vv v18, v8, v18
-; CHECK-NEXT:    vmul.vv v19, v8, v19
-; CHECK-NEXT:    vmul.vv v20, v8, v20
-; CHECK-NEXT:    vmul.vv v21, v8, v21
-; CHECK-NEXT:    vmul.vv v22, v8, v22
-; CHECK-NEXT:    vmul.vv v23, v8, v23
-; CHECK-NEXT:    vmul.vv v24, v8, v24
-; CHECK-NEXT:    vmul.vv v25, v8, v25
-; CHECK-NEXT:    vmul.vv v26, v8, v26
-; CHECK-NEXT:    vmul.vv v27, v8, v27
-; CHECK-NEXT:    vmul.vv v28, v8, v28
-; CHECK-NEXT:    vmul.vv v29, v8, v29
-; CHECK-NEXT:    vmul.vv v30, v8, v30
-; CHECK-NEXT:    vmul.vv v31, v8, v31
-; CHECK-NEXT:    vmul.vv v7, v8, v7
-; CHECK-NEXT:    vmul.vv v6, v8, v6
-; CHECK-NEXT:    vmul.vv v5, v8, v5
-; CHECK-NEXT:    vmul.vv v0, v8, v0
-; CHECK-NEXT:    vand.vx v4, v8, a5
-; CHECK-NEXT:    vmul.vv v8, v8, v4
-; CHECK-NEXT:    vl1r.v v4, (a4) # vscale x 8-byte Folded Reload
-; CHECK-NEXT:    vxor.vv v4, v3, v4
-; CHECK-NEXT:    vxor.vv v4, v4, v2
-; CHECK-NEXT:    vxor.vv v4, v4, v1
-; CHECK-NEXT:    vxor.vv v9, v4, v9
-; CHECK-NEXT:    vxor.vv v9, v9, v10
-; CHECK-NEXT:    vxor.vv v9, v9, v11
-; CHECK-NEXT:    vxor.vv v9, v9, v12
-; CHECK-NEXT:    vxor.vv v9, v9, v13
-; CHECK-NEXT:    vxor.vv v9, v9, v14
-; CHECK-NEXT:    vxor.vv v9, v9, v15
-; CHECK-NEXT:    vxor.vv v9, v9, v16
-; CHECK-NEXT:    vxor.vv v9, v9, v17
-; CHECK-NEXT:    vxor.vv v9, v9, v18
-; CHECK-NEXT:    vxor.vv v9, v9, v19
-; CHECK-NEXT:    vxor.vv v9, v9, v20
-; CHECK-NEXT:    vxor.vv v9, v9, v21
-; CHECK-NEXT:    vxor.vv v9, v9, v22
-; CHECK-NEXT:    vxor.vv v9, v9, v23
-; CHECK-NEXT:    vxor.vv v9, v9, v24
-; CHECK-NEXT:    vxor.vv v9, v9, v25
-; CHECK-NEXT:    vxor.vv v9, v9, v26
-; CHECK-NEXT:    vxor.vv v9, v9, v27
-; CHECK-NEXT:    vxor.vv v9, v9, v28
-; CHECK-NEXT:    vxor.vv v9, v9, v29
-; CHECK-NEXT:    vxor.vv v9, v9, v30
-; CHECK-NEXT:    vxor.vv v9, v9, v31
-; CHECK-NEXT:    vxor.vv v9, v9, v7
-; CHECK-NEXT:    vxor.vv v9, v9, v6
-; CHECK-NEXT:    vxor.vv v9, v9, v5
-; CHECK-NEXT:    vxor.vv v9, v9, v0
-; CHECK-NEXT:    vxor.vv v8, v9, v8
-; CHECK-NEXT:    vsrl.vi v9, v8, 8
-; CHECK-NEXT:    vsrl.vi v10, v8, 24
-; CHECK-NEXT:    vand.vx v9, v9, a3
-; CHECK-NEXT:    vor.vv v9, v9, v10
-; CHECK-NEXT:    vsll.vi v10, v8, 24
-; CHECK-NEXT:    vand.vx v8, v8, a3
-; CHECK-NEXT:    vsll.vi v8, v8, 8
-; CHECK-NEXT:    vor.vv v8, v10, v8
-; CHECK-NEXT:    vor.vv v8, v8, v9
-; CHECK-NEXT:    vsrl.vi v9, v8, 4
-; CHECK-NEXT:    vand.vx v8, v8, a2
-; CHECK-NEXT:    vand.vx v9, v9, a2
-; CHECK-NEXT:    vsll.vi v8, v8, 4
-; CHECK-NEXT:    vor.vv v8, v9, v8
-; CHECK-NEXT:    vsrl.vi v9, v8, 2
-; CHECK-NEXT:    vand.vx v8, v8, a1
-; CHECK-NEXT:    vand.vx v9, v9, a1
-; CHECK-NEXT:    vsll.vi v8, v8, 2
-; CHECK-NEXT:    vor.vv v8, v9, v8
-; CHECK-NEXT:    vsrl.vi v9, v8, 1
-; CHECK-NEXT:    vand.vx v8, v8, a0
-; CHECK-NEXT:    vand.vx v9, v9, a0
-; CHECK-NEXT:    vadd.vv v8, v8, v8
-; CHECK-NEXT:    vor.vv v8, v9, v8
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    add sp, sp, a0
-; CHECK-NEXT:    addi sp, sp, 16
-; CHECK-NEXT:    ret
-  %a = call <4 x i32> @llvm.clmulr.v4i32(<4 x i32> %x, <4 x i32> %y)
-  ret <4 x i32> %a
-}
-
-define <8 x i32> @clmulr_v8i32(<8 x i32> %x, <8 x i32> %y) nounwind {
-; RV32-LABEL: clmulr_v8i32:
-; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -64
-; RV32-NEXT:    sw s0, 60(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s1, 56(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s2, 52(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s3, 48(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s4, 44(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s5, 40(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s6, 36(sp) # 4-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    sub sp, sp, a0
-; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT:    vsrl.vi v10, v8, 8
-; RV32-NEXT:    lui a0, 16
-; RV32-NEXT:    vsrl.vi v12, v8, 24
-; RV32-NEXT:    vsll.vi v14, v8, 24
-; RV32-NEXT:    lui a1, 61681
-; RV32-NEXT:    lui a2, 209715
-; RV32-NEXT:    lui s6, 349525
-; RV32-NEXT:    li t2, 16
-; RV32-NEXT:    li t6, 32
-; RV32-NEXT:    li s3, 64
-; RV32-NEXT:    li s5, 128
-; RV32-NEXT:    li s4, 256
-; RV32-NEXT:    li s2, 512
-; RV32-NEXT:    li s1, 1024
-; RV32-NEXT:    li s0, 1
-; RV32-NEXT:    lui t5, 1
-; RV32-NEXT:    lui t4, 2
-; RV32-NEXT:    lui t3, 4
-; RV32-NEXT:    lui a5, 8
-; RV32-NEXT:    lui a6, 32
-; RV32-NEXT:    lui a7, 64
-; RV32-NEXT:    lui t0, 128
-; RV32-NEXT:    lui t1, 256
-; RV32-NEXT:    addi a4, a0, -256
-; RV32-NEXT:    addi a3, a1, -241
-; RV32-NEXT:    addi a2, a2, 819
-; RV32-NEXT:    addi a1, s6, 1365
-; RV32-NEXT:    vand.vx v10, v10, a4
-; RV32-NEXT:    vand.vx v8, v8, a4
-; RV32-NEXT:    vor.vv v10, v10, v12
-; RV32-NEXT:    vsll.vi v8, v8, 8
-; RV32-NEXT:    vor.vv v8, v14, v8
-; RV32-NEXT:    vor.vv v8, v8, v10
-; RV32-NEXT:    vsrl.vi v10, v8, 4
-; RV32-NEXT:    vand.vx v8, v8, a3
-; RV32-NEXT:    vand.vx v10, v10, a3
-; RV32-NEXT:    vsll.vi v8, v8, 4
-; RV32-NEXT:    vor.vv v8, v10, v8
-; RV32-NEXT:    vsrl.vi v10, v8, 2
-; RV32-NEXT:    vand.vx v8, v8, a2
-; RV32-NEXT:    vand.vx v10, v10, a2
-; RV32-NEXT:    vsll.vi v8, v8, 2
-; RV32-NEXT:    vor.vv v8, v10, v8
-; RV32-NEXT:    vsrl.vi v10, v8, 1
-; RV32-NEXT:    vand.vx v8, v8, a1
-; RV32-NEXT:    vand.vx v10, v10, a1
-; RV32-NEXT:    vadd.vv v8, v8, v8
-; RV32-NEXT:    vor.vv v8, v10, v8
-; RV32-NEXT:    vand.vx v10, v8, t2
-; RV32-NEXT:    lui t2, 512
-; RV32-NEXT:    vand.vx v12, v8, t6
-; RV32-NEXT:    lui t6, 1024
-; RV32-NEXT:    vand.vx v14, v8, s3
-; RV32-NEXT:    lui s3, 2048
-; RV32-NEXT:    vand.vx v16, v8, s5
-; RV32-NEXT:    lui s5, 4096
-; RV32-NEXT:    vand.vx v26, v8, s4
-; RV32-NEXT:    lui s4, 8192
-; RV32-NEXT:    vand.vx v28, v8, s2
-; RV32-NEXT:    lui s2, 16384
-; RV32-NEXT:    vand.vx v18, v8, s1
-; RV32-NEXT:    lui s1, 32768
-; RV32-NEXT:    slli s0, s0, 11
-; RV32-NEXT:    vand.vx v20, v8, s0
-; RV32-NEXT:    lui s0, 65536
-; RV32-NEXT:    vand.vx v22, v8, t5
-; RV32-NEXT:    lui t5, 131072
-; RV32-NEXT:    vand.vx v24, v8, t4
-; RV32-NEXT:    lui t4, 262144
-; RV32-NEXT:    vand.vx v30, v8, t3
-; RV32-NEXT:    lui t3, 524288
-; RV32-NEXT:    vand.vi v6, v8, 2
-; RV32-NEXT:    vand.vi v4, v8, 1
-; RV32-NEXT:    vand.vi v2, v8, 4
-; RV32-NEXT:    vand.vi v0, v8, 8
-; RV32-NEXT:    vmul.vv v6, v8, v6
-; RV32-NEXT:    sw a0, 4(sp) # 4-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv s6, a0
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a0, a0, s6
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 32
-; RV32-NEXT:    vs2r.v v6, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vmul.vv v6, v8, v4
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 32
-; RV32-NEXT:    vs2r.v v6, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vmul.vv v6, v8, v2
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv s6, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add s6, s6, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add s6, s6, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, s6
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 32
-; RV32-NEXT:    vs2r.v v6, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vmul.vv v6, v8, v0
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv s6, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add s6, s6, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, s6
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 32
-; RV32-NEXT:    vs2r.v v6, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv s6, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add s6, s6, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, s6
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 32
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vmul.vv v10, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv s6, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, s6
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 32
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vmul.vv v10, v8, v14
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv s6, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add s6, s6, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, s6
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 32
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vmul.vv v10, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv s6, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, s6
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 32
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vmul.vv v10, v8, v26
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv s6, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, s6
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 32
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vmul.vv v10, v8, v28
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 32
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vmul.vv v10, v8, v18
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv s6, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add s6, s6, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, s6
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 32
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vmul.vv v10, v8, v20
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv s6, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, s6
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 32
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vmul.vv v10, v8, v22
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv s6, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, s6
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 32
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vmul.vv v10, v8, v24
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 32
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vmul.vv v10, v8, v30
-; RV32-NEXT:    csrr s6, vlenb
-; RV32-NEXT:    slli s6, s6, 1
-; RV32-NEXT:    mv a0, s6
-; RV32-NEXT:    slli s6, s6, 1
-; RV32-NEXT:    add s6, s6, a0
-; RV32-NEXT:    lw a0, 4(sp) # 4-byte Folded Reload
-; RV32-NEXT:    add s6, sp, s6
-; RV32-NEXT:    addi s6, s6, 32
-; RV32-NEXT:    vs2r.v v10, (s6) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vx v10, v8, a5
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a5, vlenb
-; RV32-NEXT:    slli a5, a5, 2
-; RV32-NEXT:    add a5, sp, a5
-; RV32-NEXT:    addi a5, a5, 32
-; RV32-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vx v10, v8, a0
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 32
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vx v10, v8, a6
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    addi a0, sp, 32
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vx v10, v8, a7
-; RV32-NEXT:    vmul.vv v6, v8, v10
-; RV32-NEXT:    vand.vx v10, v8, t0
-; RV32-NEXT:    vmul.vv v30, v8, v10
-; RV32-NEXT:    vand.vx v10, v8, t1
-; RV32-NEXT:    vmul.vv v28, v8, v10
-; RV32-NEXT:    vand.vx v10, v8, t2
-; RV32-NEXT:    vmul.vv v26, v8, v10
-; RV32-NEXT:    vand.vx v10, v8, t6
-; RV32-NEXT:    vmul.vv v24, v8, v10
-; RV32-NEXT:    vand.vx v10, v8, s3
-; RV32-NEXT:    vmul.vv v22, v8, v10
-; RV32-NEXT:    vand.vx v10, v8, s5
-; RV32-NEXT:    vmul.vv v20, v8, v10
-; RV32-NEXT:    vand.vx v10, v8, s4
-; RV32-NEXT:    vmul.vv v18, v8, v10
-; RV32-NEXT:    vand.vx v10, v8, s2
-; RV32-NEXT:    vmul.vv v16, v8, v10
-; RV32-NEXT:    vand.vx v10, v8, s1
-; RV32-NEXT:    vmul.vv v14, v8, v10
-; RV32-NEXT:    vand.vx v10, v8, s0
-; RV32-NEXT:    vmul.vv v12, v8, v10
-; RV32-NEXT:    vand.vx v10, v8, t5
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    vand.vx v0, v8, t4
-; RV32-NEXT:    vmul.vv v0, v8, v0
-; RV32-NEXT:    vand.vx v2, v8, t3
-; RV32-NEXT:    vmul.vv v8, v8, v2
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 32
-; RV32-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 32
-; RV32-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v2, v4, v2
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a5, a5, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a5, a5, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 32
-; RV32-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v2, v2, v4
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a5, a5, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 32
-; RV32-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v2, v2, v4
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a5, a5, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 32
-; RV32-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v2, v2, v4
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 32
-; RV32-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v2, v2, v4
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a5, a5, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 32
-; RV32-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v2, v2, v4
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 32
-; RV32-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v2, v2, v4
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 32
-; RV32-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v2, v2, v4
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 32
-; RV32-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v2, v2, v4
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a5, a5, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 32
-; RV32-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v2, v2, v4
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 32
-; RV32-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v2, v2, v4
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 32
-; RV32-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v2, v2, v4
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 32
-; RV32-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v2, v2, v4
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 32
-; RV32-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v2, v2, v4
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 32
-; RV32-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v2, v2, v4
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 32
-; RV32-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v2, v2, v4
-; RV32-NEXT:    addi a0, sp, 32
-; RV32-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v4, v2, v4
-; RV32-NEXT:    vxor.vv v6, v4, v6
-; RV32-NEXT:    vxor.vv v30, v6, v30
-; RV32-NEXT:    vxor.vv v28, v30, v28
-; RV32-NEXT:    vxor.vv v26, v28, v26
-; RV32-NEXT:    vxor.vv v24, v26, v24
-; RV32-NEXT:    vxor.vv v22, v24, v22
-; RV32-NEXT:    vxor.vv v20, v22, v20
-; RV32-NEXT:    vxor.vv v18, v20, v18
-; RV32-NEXT:    vxor.vv v16, v18, v16
-; RV32-NEXT:    vxor.vv v14, v16, v14
-; RV32-NEXT:    vxor.vv v12, v14, v12
-; RV32-NEXT:    vxor.vv v10, v12, v10
-; RV32-NEXT:    vxor.vv v10, v10, v0
-; RV32-NEXT:    vxor.vv v8, v10, v8
-; RV32-NEXT:    vsrl.vi v10, v8, 8
-; RV32-NEXT:    vsrl.vi v12, v8, 24
-; RV32-NEXT:    vand.vx v10, v10, a4
-; RV32-NEXT:    vor.vv v10, v10, v12
-; RV32-NEXT:    vsll.vi v12, v8, 24
-; RV32-NEXT:    vand.vx v8, v8, a4
-; RV32-NEXT:    vsll.vi v8, v8, 8
-; RV32-NEXT:    vor.vv v8, v12, v8
-; RV32-NEXT:    vor.vv v8, v8, v10
-; RV32-NEXT:    vsrl.vi v10, v8, 4
-; RV32-NEXT:    vand.vx v8, v8, a3
-; RV32-NEXT:    vand.vx v10, v10, a3
-; RV32-NEXT:    vsll.vi v8, v8, 4
-; RV32-NEXT:    vor.vv v8, v10, v8
-; RV32-NEXT:    vsrl.vi v10, v8, 2
-; RV32-NEXT:    vand.vx v8, v8, a2
-; RV32-NEXT:    vand.vx v10, v10, a2
-; RV32-NEXT:    vsll.vi v8, v8, 2
-; RV32-NEXT:    vor.vv v8, v10, v8
-; RV32-NEXT:    vsrl.vi v10, v8, 1
-; RV32-NEXT:    vand.vx v8, v8, a1
-; RV32-NEXT:    vand.vx v10, v10, a1
-; RV32-NEXT:    vadd.vv v8, v8, v8
-; RV32-NEXT:    vor.vv v8, v10, v8
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add sp, sp, a0
-; RV32-NEXT:    lw s0, 60(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s1, 56(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s2, 52(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s3, 48(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s4, 44(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s5, 40(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s6, 36(sp) # 4-byte Folded Reload
-; RV32-NEXT:    addi sp, sp, 64
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: clmulr_v8i32:
-; RV64:       # %bb.0:
-; RV64-NEXT:    addi sp, sp, -96
-; RV64-NEXT:    sd s0, 88(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s1, 80(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s2, 72(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s3, 64(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s4, 56(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s5, 48(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s6, 40(sp) # 8-byte Folded Spill
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    sub sp, sp, a0
-; RV64-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; RV64-NEXT:    vsrl.vi v10, v8, 8
-; RV64-NEXT:    lui a0, 16
-; RV64-NEXT:    vsrl.vi v12, v8, 24
-; RV64-NEXT:    vsll.vi v14, v8, 24
-; RV64-NEXT:    lui a1, 61681
-; RV64-NEXT:    lui a2, 209715
-; RV64-NEXT:    lui s6, 349525
-; RV64-NEXT:    li t2, 16
-; RV64-NEXT:    li t6, 32
-; RV64-NEXT:    li s3, 64
-; RV64-NEXT:    li s5, 128
-; RV64-NEXT:    li s4, 256
-; RV64-NEXT:    li s2, 512
-; RV64-NEXT:    li s1, 1024
-; RV64-NEXT:    li s0, 1
-; RV64-NEXT:    lui t5, 1
-; RV64-NEXT:    lui t4, 2
-; RV64-NEXT:    lui t3, 4
-; RV64-NEXT:    lui a5, 8
-; RV64-NEXT:    lui a6, 32
-; RV64-NEXT:    lui a7, 64
-; RV64-NEXT:    lui t0, 128
-; RV64-NEXT:    lui t1, 256
-; RV64-NEXT:    addi a4, a0, -256
-; RV64-NEXT:    addi a3, a1, -241
-; RV64-NEXT:    addi a2, a2, 819
-; RV64-NEXT:    addi a1, s6, 1365
-; RV64-NEXT:    vand.vx v10, v10, a4
-; RV64-NEXT:    vand.vx v8, v8, a4
-; RV64-NEXT:    vor.vv v10, v10, v12
-; RV64-NEXT:    vsll.vi v8, v8, 8
-; RV64-NEXT:    vor.vv v8, v14, v8
-; RV64-NEXT:    vor.vv v8, v8, v10
-; RV64-NEXT:    vsrl.vi v10, v8, 4
-; RV64-NEXT:    vand.vx v8, v8, a3
-; RV64-NEXT:    vand.vx v10, v10, a3
-; RV64-NEXT:    vsll.vi v8, v8, 4
-; RV64-NEXT:    vor.vv v8, v10, v8
-; RV64-NEXT:    vsrl.vi v10, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a2
-; RV64-NEXT:    vand.vx v10, v10, a2
-; RV64-NEXT:    vsll.vi v8, v8, 2
-; RV64-NEXT:    vor.vv v8, v10, v8
-; RV64-NEXT:    vsrl.vi v10, v8, 1
-; RV64-NEXT:    vand.vx v8, v8, a1
-; RV64-NEXT:    vand.vx v10, v10, a1
-; RV64-NEXT:    vadd.vv v8, v8, v8
-; RV64-NEXT:    vor.vv v8, v10, v8
-; RV64-NEXT:    vand.vx v10, v8, t2
-; RV64-NEXT:    lui t2, 512
-; RV64-NEXT:    vand.vx v12, v8, t6
-; RV64-NEXT:    lui t6, 1024
-; RV64-NEXT:    vand.vx v14, v8, s3
-; RV64-NEXT:    lui s3, 2048
-; RV64-NEXT:    vand.vx v16, v8, s5
-; RV64-NEXT:    lui s5, 4096
-; RV64-NEXT:    vand.vx v26, v8, s4
-; RV64-NEXT:    lui s4, 8192
-; RV64-NEXT:    vand.vx v28, v8, s2
-; RV64-NEXT:    lui s2, 16384
-; RV64-NEXT:    vand.vx v18, v8, s1
-; RV64-NEXT:    lui s1, 32768
-; RV64-NEXT:    slli s0, s0, 11
-; RV64-NEXT:    vand.vx v20, v8, s0
-; RV64-NEXT:    lui s0, 65536
-; RV64-NEXT:    vand.vx v22, v8, t5
-; RV64-NEXT:    lui t5, 131072
-; RV64-NEXT:    vand.vx v24, v8, t4
-; RV64-NEXT:    lui t4, 262144
-; RV64-NEXT:    vand.vx v30, v8, t3
-; RV64-NEXT:    lui t3, 524288
-; RV64-NEXT:    vand.vi v6, v8, 2
-; RV64-NEXT:    vand.vi v4, v8, 1
-; RV64-NEXT:    vand.vi v2, v8, 4
-; RV64-NEXT:    vand.vi v0, v8, 8
-; RV64-NEXT:    vmul.vv v6, v8, v6
-; RV64-NEXT:    sd a0, 8(sp) # 8-byte Folded Spill
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    mv s6, a0
-; RV64-NEXT:    slli a0, a0, 4
-; RV64-NEXT:    add a0, a0, s6
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vs2r.v v6, (a0) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    vmul.vv v6, v8, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vs2r.v v6, (a0) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    vmul.vv v6, v8, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    mv s6, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add s6, s6, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add s6, s6, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, s6
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vs2r.v v6, (a0) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    vmul.vv v6, v8, v0
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv s6, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add s6, s6, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, s6
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vs2r.v v6, (a0) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    vmul.vv v10, v8, v10
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    mv s6, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add s6, s6, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, s6
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    vmul.vv v10, v8, v12
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    mv s6, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, s6
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    vmul.vv v10, v8, v14
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    mv s6, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add s6, s6, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, s6
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    vmul.vv v10, v8, v16
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv s6, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, s6
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    vmul.vv v10, v8, v26
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    mv s6, a0
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a0, a0, s6
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    vmul.vv v10, v8, v28
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 4
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    vmul.vv v10, v8, v18
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    mv s6, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add s6, s6, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, s6
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    vmul.vv v10, v8, v20
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv s6, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, s6
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    vmul.vv v10, v8, v22
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    mv s6, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, s6
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    vmul.vv v10, v8, v24
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    vmul.vv v10, v8, v30
-; RV64-NEXT:    csrr s6, vlenb
-; RV64-NEXT:    slli s6, s6, 1
-; RV64-NEXT:    mv a0, s6
-; RV64-NEXT:    slli s6, s6, 1
-; RV64-NEXT:    add s6, s6, a0
-; RV64-NEXT:    ld a0, 8(sp) # 8-byte Folded Reload
-; RV64-NEXT:    add s6, sp, s6
-; RV64-NEXT:    addi s6, s6, 32
-; RV64-NEXT:    vs2r.v v10, (s6) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    vand.vx v10, v8, a5
-; RV64-NEXT:    vmul.vv v10, v8, v10
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 32
-; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    vand.vx v10, v8, a0
-; RV64-NEXT:    vmul.vv v10, v8, v10
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    vand.vx v10, v8, a6
-; RV64-NEXT:    vmul.vv v10, v8, v10
-; RV64-NEXT:    addi a0, sp, 32
-; RV64-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    vand.vx v10, v8, a7
-; RV64-NEXT:    vmul.vv v6, v8, v10
-; RV64-NEXT:    vand.vx v10, v8, t0
-; RV64-NEXT:    vmul.vv v30, v8, v10
-; RV64-NEXT:    vand.vx v10, v8, t1
-; RV64-NEXT:    vmul.vv v28, v8, v10
-; RV64-NEXT:    vand.vx v10, v8, t2
-; RV64-NEXT:    vmul.vv v26, v8, v10
-; RV64-NEXT:    vand.vx v10, v8, t6
-; RV64-NEXT:    vmul.vv v24, v8, v10
-; RV64-NEXT:    vand.vx v10, v8, s3
-; RV64-NEXT:    vmul.vv v22, v8, v10
-; RV64-NEXT:    vand.vx v10, v8, s5
-; RV64-NEXT:    vmul.vv v20, v8, v10
-; RV64-NEXT:    vand.vx v10, v8, s4
-; RV64-NEXT:    vmul.vv v18, v8, v10
-; RV64-NEXT:    vand.vx v10, v8, s2
-; RV64-NEXT:    vmul.vv v16, v8, v10
-; RV64-NEXT:    vand.vx v10, v8, s1
-; RV64-NEXT:    vmul.vv v14, v8, v10
-; RV64-NEXT:    vand.vx v10, v8, s0
-; RV64-NEXT:    vmul.vv v12, v8, v10
-; RV64-NEXT:    vand.vx v10, v8, t5
-; RV64-NEXT:    vmul.vv v10, v8, v10
-; RV64-NEXT:    vand.vx v0, v8, t4
-; RV64-NEXT:    vmul.vv v0, v8, v0
-; RV64-NEXT:    vand.vx v2, v8, t3
-; RV64-NEXT:    vmul.vv v8, v8, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 4
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v2, v4, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a5, a5, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a5, a5, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v2, v2, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a5, a5, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v2, v2, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a5, a5, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v2, v2, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v2, v2, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a5, a5, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v2, v2, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v2, v2, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v2, v2, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 4
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v2, v2, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a5, a5, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v2, v2, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v2, v2, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v2, v2, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v2, v2, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v2, v2, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v2, v2, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v2, v2, v4
-; RV64-NEXT:    addi a0, sp, 32
-; RV64-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v4, v2, v4
-; RV64-NEXT:    vxor.vv v6, v4, v6
-; RV64-NEXT:    vxor.vv v30, v6, v30
-; RV64-NEXT:    vxor.vv v28, v30, v28
-; RV64-NEXT:    vxor.vv v26, v28, v26
-; RV64-NEXT:    vxor.vv v24, v26, v24
-; RV64-NEXT:    vxor.vv v22, v24, v22
-; RV64-NEXT:    vxor.vv v20, v22, v20
-; RV64-NEXT:    vxor.vv v18, v20, v18
-; RV64-NEXT:    vxor.vv v16, v18, v16
-; RV64-NEXT:    vxor.vv v14, v16, v14
-; RV64-NEXT:    vxor.vv v12, v14, v12
-; RV64-NEXT:    vxor.vv v10, v12, v10
-; RV64-NEXT:    vxor.vv v10, v10, v0
-; RV64-NEXT:    vxor.vv v8, v10, v8
-; RV64-NEXT:    vsrl.vi v10, v8, 8
-; RV64-NEXT:    vsrl.vi v12, v8, 24
-; RV64-NEXT:    vand.vx v10, v10, a4
-; RV64-NEXT:    vor.vv v10, v10, v12
-; RV64-NEXT:    vsll.vi v12, v8, 24
-; RV64-NEXT:    vand.vx v8, v8, a4
-; RV64-NEXT:    vsll.vi v8, v8, 8
-; RV64-NEXT:    vor.vv v8, v12, v8
-; RV64-NEXT:    vor.vv v8, v8, v10
-; RV64-NEXT:    vsrl.vi v10, v8, 4
-; RV64-NEXT:    vand.vx v8, v8, a3
-; RV64-NEXT:    vand.vx v10, v10, a3
-; RV64-NEXT:    vsll.vi v8, v8, 4
-; RV64-NEXT:    vor.vv v8, v10, v8
-; RV64-NEXT:    vsrl.vi v10, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a2
-; RV64-NEXT:    vand.vx v10, v10, a2
-; RV64-NEXT:    vsll.vi v8, v8, 2
-; RV64-NEXT:    vor.vv v8, v10, v8
-; RV64-NEXT:    vsrl.vi v10, v8, 1
-; RV64-NEXT:    vand.vx v8, v8, a1
-; RV64-NEXT:    vand.vx v10, v10, a1
-; RV64-NEXT:    vadd.vv v8, v8, v8
-; RV64-NEXT:    vor.vv v8, v10, v8
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add sp, sp, a0
-; RV64-NEXT:    ld s0, 88(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s1, 80(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s2, 72(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s3, 64(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s4, 56(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s5, 48(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s6, 40(sp) # 8-byte Folded Reload
-; RV64-NEXT:    addi sp, sp, 96
-; RV64-NEXT:    ret
-  %a = call <8 x i32> @llvm.clmulr.v8i32(<8 x i32> %x, <8 x i32> %x)
-  ret <8 x i32> %a
-}
-
-define <16 x i32> @clmulr_v16i32(<16 x i32> %x, <16 x i32> %y) nounwind {
-; RV32-LABEL: clmulr_v16i32:
-; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -80
-; RV32-NEXT:    sw ra, 76(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s0, 72(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s1, 68(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s2, 64(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s3, 60(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s4, 56(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s5, 52(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s6, 48(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s7, 44(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s8, 40(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s9, 36(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s10, 32(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s11, 28(sp) # 4-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    sub sp, sp, a0
-; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; RV32-NEXT:    vsrl.vi v12, v8, 8
-; RV32-NEXT:    lui a5, 16
-; RV32-NEXT:    vsrl.vi v16, v8, 24
-; RV32-NEXT:    vsll.vi v20, v8, 24
-; RV32-NEXT:    lui a1, 61681
-; RV32-NEXT:    lui a2, 209715
-; RV32-NEXT:    lui ra, 349525
-; RV32-NEXT:    li s11, 16
-; RV32-NEXT:    li s10, 32
-; RV32-NEXT:    li s9, 64
-; RV32-NEXT:    li a7, 512
-; RV32-NEXT:    li t0, 1024
-; RV32-NEXT:    li a0, 1
-; RV32-NEXT:    lui t1, 1
-; RV32-NEXT:    lui t2, 2
-; RV32-NEXT:    lui t3, 4
-; RV32-NEXT:    lui t4, 8
-; RV32-NEXT:    lui t5, 32
-; RV32-NEXT:    lui t6, 64
-; RV32-NEXT:    lui s0, 128
-; RV32-NEXT:    lui s1, 256
-; RV32-NEXT:    lui s2, 512
-; RV32-NEXT:    lui s3, 1024
-; RV32-NEXT:    lui s4, 2048
-; RV32-NEXT:    lui s5, 4096
-; RV32-NEXT:    lui s6, 8192
-; RV32-NEXT:    lui s7, 16384
-; RV32-NEXT:    lui s8, 32768
-; RV32-NEXT:    addi a4, a5, -256
-; RV32-NEXT:    addi a3, a1, -241
-; RV32-NEXT:    addi a2, a2, 819
-; RV32-NEXT:    addi a1, ra, 1365
-; RV32-NEXT:    vand.vx v12, v12, a4
-; RV32-NEXT:    vand.vx v8, v8, a4
-; RV32-NEXT:    vor.vv v12, v12, v16
-; RV32-NEXT:    vsll.vi v8, v8, 8
-; RV32-NEXT:    vor.vv v8, v20, v8
-; RV32-NEXT:    vor.vv v8, v8, v12
-; RV32-NEXT:    vsrl.vi v12, v8, 4
-; RV32-NEXT:    vand.vx v8, v8, a3
-; RV32-NEXT:    vand.vx v12, v12, a3
-; RV32-NEXT:    vsll.vi v8, v8, 4
-; RV32-NEXT:    vor.vv v8, v12, v8
-; RV32-NEXT:    vsrl.vi v12, v8, 2
-; RV32-NEXT:    vand.vx v8, v8, a2
-; RV32-NEXT:    vand.vx v12, v12, a2
-; RV32-NEXT:    vsll.vi v8, v8, 2
-; RV32-NEXT:    vor.vv v8, v12, v8
-; RV32-NEXT:    vsrl.vi v12, v8, 1
-; RV32-NEXT:    vand.vx v8, v8, a1
-; RV32-NEXT:    vand.vx v12, v12, a1
-; RV32-NEXT:    vadd.vv v8, v8, v8
-; RV32-NEXT:    vor.vv v8, v12, v8
-; RV32-NEXT:    vand.vx v12, v8, s11
-; RV32-NEXT:    lui s11, 65536
-; RV32-NEXT:    vand.vx v16, v8, s10
-; RV32-NEXT:    lui s10, 131072
-; RV32-NEXT:    vand.vx v20, v8, s9
-; RV32-NEXT:    lui s9, 262144
-; RV32-NEXT:    slli ra, a0, 11
-; RV32-NEXT:    vand.vi v24, v8, 2
-; RV32-NEXT:    vand.vi v28, v8, 1
-; RV32-NEXT:    vand.vi v4, v8, 4
-; RV32-NEXT:    vand.vi v0, v8, 8
-; RV32-NEXT:    vmul.vv v24, v8, v24
-; RV32-NEXT:    sw a4, 4(sp) # 4-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a4, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a4, a4, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a4
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vs4r.v v24, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vmul.vv v24, v8, v28
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    mv a4, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a4
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vs4r.v v24, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vmul.vv v24, v8, v4
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a4, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a4, a4, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a4, a4, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a4
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vs4r.v v24, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vmul.vv v24, v8, v0
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a4, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a4, a4, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a4
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vs4r.v v24, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a4, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a4, a4, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a4
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vmul.vv v12, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a4, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a4
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vmul.vv v12, v8, v20
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a4, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a4, a4, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a4
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    lui a0, 524288
-; RV32-NEXT:    li a6, 128
-; RV32-NEXT:    vand.vx v12, v8, a6
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a4, vlenb
-; RV32-NEXT:    slli a4, a4, 3
-; RV32-NEXT:    mv a6, a4
-; RV32-NEXT:    slli a4, a4, 3
-; RV32-NEXT:    add a4, a4, a6
-; RV32-NEXT:    add a4, sp, a4
-; RV32-NEXT:    addi a4, a4, 16
-; RV32-NEXT:    vs4r.v v12, (a4) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    li a6, 256
-; RV32-NEXT:    vand.vx v12, v8, a6
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a6, vlenb
-; RV32-NEXT:    slli a6, a6, 2
-; RV32-NEXT:    mv a4, a6
-; RV32-NEXT:    slli a6, a6, 4
-; RV32-NEXT:    add a6, a6, a4
-; RV32-NEXT:    lw a4, 4(sp) # 4-byte Folded Reload
-; RV32-NEXT:    add a6, sp, a6
-; RV32-NEXT:    addi a6, a6, 16
-; RV32-NEXT:    vs4r.v v12, (a6) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vx v12, v8, a7
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a6, vlenb
-; RV32-NEXT:    slli a6, a6, 6
-; RV32-NEXT:    add a6, sp, a6
-; RV32-NEXT:    addi a6, a6, 16
-; RV32-NEXT:    vs4r.v v12, (a6) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vx v12, v8, t0
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a6, vlenb
-; RV32-NEXT:    slli a6, a6, 2
-; RV32-NEXT:    mv a7, a6
-; RV32-NEXT:    slli a6, a6, 1
-; RV32-NEXT:    add a7, a7, a6
-; RV32-NEXT:    slli a6, a6, 1
-; RV32-NEXT:    add a7, a7, a6
-; RV32-NEXT:    slli a6, a6, 1
-; RV32-NEXT:    add a6, a6, a7
-; RV32-NEXT:    add a6, sp, a6
-; RV32-NEXT:    addi a6, a6, 16
-; RV32-NEXT:    vs4r.v v12, (a6) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vx v12, v8, ra
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a6, vlenb
-; RV32-NEXT:    slli a6, a6, 3
-; RV32-NEXT:    mv a7, a6
-; RV32-NEXT:    slli a6, a6, 1
-; RV32-NEXT:    add a7, a7, a6
-; RV32-NEXT:    slli a6, a6, 1
-; RV32-NEXT:    add a6, a6, a7
-; RV32-NEXT:    add a6, sp, a6
-; RV32-NEXT:    addi a6, a6, 16
-; RV32-NEXT:    vs4r.v v12, (a6) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vx v12, v8, t1
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a6, vlenb
-; RV32-NEXT:    slli a6, a6, 2
-; RV32-NEXT:    mv a7, a6
-; RV32-NEXT:    slli a6, a6, 2
-; RV32-NEXT:    add a7, a7, a6
-; RV32-NEXT:    slli a6, a6, 1
-; RV32-NEXT:    add a6, a6, a7
-; RV32-NEXT:    add a6, sp, a6
-; RV32-NEXT:    addi a6, a6, 16
-; RV32-NEXT:    vs4r.v v12, (a6) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vx v12, v8, t2
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a6, vlenb
-; RV32-NEXT:    slli a6, a6, 4
-; RV32-NEXT:    mv a7, a6
-; RV32-NEXT:    slli a6, a6, 1
-; RV32-NEXT:    add a6, a6, a7
-; RV32-NEXT:    add a6, sp, a6
-; RV32-NEXT:    addi a6, a6, 16
-; RV32-NEXT:    vs4r.v v12, (a6) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vx v12, v8, t3
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a6, vlenb
-; RV32-NEXT:    slli a6, a6, 2
-; RV32-NEXT:    mv a7, a6
-; RV32-NEXT:    slli a6, a6, 1
-; RV32-NEXT:    add a7, a7, a6
-; RV32-NEXT:    slli a6, a6, 2
-; RV32-NEXT:    add a6, a6, a7
-; RV32-NEXT:    add a6, sp, a6
-; RV32-NEXT:    addi a6, a6, 16
-; RV32-NEXT:    vs4r.v v12, (a6) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vx v12, v8, t4
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a6, vlenb
-; RV32-NEXT:    slli a6, a6, 3
-; RV32-NEXT:    mv a7, a6
-; RV32-NEXT:    slli a6, a6, 2
-; RV32-NEXT:    add a6, a6, a7
-; RV32-NEXT:    add a6, sp, a6
-; RV32-NEXT:    addi a6, a6, 16
-; RV32-NEXT:    vs4r.v v12, (a6) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vx v12, v8, a5
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a5, vlenb
-; RV32-NEXT:    slli a5, a5, 2
-; RV32-NEXT:    mv a6, a5
-; RV32-NEXT:    slli a5, a5, 3
-; RV32-NEXT:    add a5, a5, a6
-; RV32-NEXT:    add a5, sp, a5
-; RV32-NEXT:    addi a5, a5, 16
-; RV32-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vx v12, v8, t5
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a5, vlenb
-; RV32-NEXT:    slli a5, a5, 5
-; RV32-NEXT:    add a5, sp, a5
-; RV32-NEXT:    addi a5, a5, 16
-; RV32-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vx v12, v8, t6
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a5, vlenb
-; RV32-NEXT:    slli a5, a5, 2
-; RV32-NEXT:    mv a6, a5
-; RV32-NEXT:    slli a5, a5, 1
-; RV32-NEXT:    add a6, a6, a5
-; RV32-NEXT:    slli a5, a5, 1
-; RV32-NEXT:    add a5, a5, a6
-; RV32-NEXT:    add a5, sp, a5
-; RV32-NEXT:    addi a5, a5, 16
-; RV32-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vx v12, v8, s0
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a5, vlenb
-; RV32-NEXT:    slli a5, a5, 3
-; RV32-NEXT:    mv a6, a5
-; RV32-NEXT:    slli a5, a5, 1
-; RV32-NEXT:    add a5, a5, a6
-; RV32-NEXT:    add a5, sp, a5
-; RV32-NEXT:    addi a5, a5, 16
-; RV32-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vx v12, v8, s1
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a5, vlenb
-; RV32-NEXT:    slli a5, a5, 2
-; RV32-NEXT:    mv a6, a5
-; RV32-NEXT:    slli a5, a5, 2
-; RV32-NEXT:    add a5, a5, a6
-; RV32-NEXT:    add a5, sp, a5
-; RV32-NEXT:    addi a5, a5, 16
-; RV32-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vx v12, v8, s2
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a5, vlenb
-; RV32-NEXT:    slli a5, a5, 4
-; RV32-NEXT:    add a5, sp, a5
-; RV32-NEXT:    addi a5, a5, 16
-; RV32-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vx v12, v8, s3
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a5, vlenb
-; RV32-NEXT:    slli a5, a5, 2
-; RV32-NEXT:    mv a6, a5
-; RV32-NEXT:    slli a5, a5, 1
-; RV32-NEXT:    add a5, a5, a6
-; RV32-NEXT:    add a5, sp, a5
-; RV32-NEXT:    addi a5, a5, 16
-; RV32-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vx v12, v8, s4
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a5, vlenb
-; RV32-NEXT:    slli a5, a5, 3
-; RV32-NEXT:    add a5, sp, a5
-; RV32-NEXT:    addi a5, a5, 16
-; RV32-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vx v12, v8, s5
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a5, vlenb
-; RV32-NEXT:    slli a5, a5, 2
-; RV32-NEXT:    add a5, sp, a5
-; RV32-NEXT:    addi a5, a5, 16
-; RV32-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vx v12, v8, s6
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    addi a5, sp, 16
-; RV32-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vx v12, v8, s7
-; RV32-NEXT:    vmul.vv v28, v8, v12
-; RV32-NEXT:    vand.vx v12, v8, s8
-; RV32-NEXT:    vmul.vv v24, v8, v12
-; RV32-NEXT:    vand.vx v12, v8, s11
-; RV32-NEXT:    vmul.vv v20, v8, v12
-; RV32-NEXT:    vand.vx v12, v8, s10
-; RV32-NEXT:    vmul.vv v16, v8, v12
-; RV32-NEXT:    vand.vx v12, v8, s9
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    vand.vx v0, v8, a0
-; RV32-NEXT:    vmul.vv v8, v8, v0
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a5, a5, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl4r.v v0, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v0, v4, v0
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a5, a5, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a5, a5, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v0, v0, v4
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a5, a5, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v0, v0, v4
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a5, a5, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v0, v0, v4
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v0, v0, v4
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a5, a5, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v0, v0, v4
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v0, v0, v4
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v0, v0, v4
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 6
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v0, v0, v4
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a5, a5, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a5, a5, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v0, v0, v4
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a5, a5, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v0, v0, v4
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a5, a5, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v0, v0, v4
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v0, v0, v4
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a5, a5, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v0, v0, v4
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v0, v0, v4
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v0, v0, v4
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v0, v0, v4
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a5, a5, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v0, v0, v4
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v0, v0, v4
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v0, v0, v4
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v0, v0, v4
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v0, v0, v4
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v0, v0, v4
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v0, v0, v4
-; RV32-NEXT:    addi a0, sp, 16
-; RV32-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v4, v0, v4
-; RV32-NEXT:    vxor.vv v28, v4, v28
-; RV32-NEXT:    vxor.vv v24, v28, v24
-; RV32-NEXT:    vxor.vv v20, v24, v20
-; RV32-NEXT:    vxor.vv v16, v20, v16
-; RV32-NEXT:    vxor.vv v12, v16, v12
-; RV32-NEXT:    vxor.vv v8, v12, v8
-; RV32-NEXT:    vsrl.vi v12, v8, 8
-; RV32-NEXT:    vsrl.vi v16, v8, 24
-; RV32-NEXT:    vand.vx v12, v12, a4
-; RV32-NEXT:    vor.vv v12, v12, v16
-; RV32-NEXT:    vsll.vi v16, v8, 24
-; RV32-NEXT:    vand.vx v8, v8, a4
-; RV32-NEXT:    vsll.vi v8, v8, 8
-; RV32-NEXT:    vor.vv v8, v16, v8
-; RV32-NEXT:    vor.vv v8, v8, v12
-; RV32-NEXT:    vsrl.vi v12, v8, 4
-; RV32-NEXT:    vand.vx v8, v8, a3
-; RV32-NEXT:    vand.vx v12, v12, a3
-; RV32-NEXT:    vsll.vi v8, v8, 4
-; RV32-NEXT:    vor.vv v8, v12, v8
-; RV32-NEXT:    vsrl.vi v12, v8, 2
-; RV32-NEXT:    vand.vx v8, v8, a2
-; RV32-NEXT:    vand.vx v12, v12, a2
-; RV32-NEXT:    vsll.vi v8, v8, 2
-; RV32-NEXT:    vor.vv v8, v12, v8
-; RV32-NEXT:    vsrl.vi v12, v8, 1
-; RV32-NEXT:    vand.vx v8, v8, a1
-; RV32-NEXT:    vand.vx v12, v12, a1
-; RV32-NEXT:    vadd.vv v8, v8, v8
-; RV32-NEXT:    vor.vv v8, v12, v8
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add sp, sp, a0
-; RV32-NEXT:    lw ra, 76(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s0, 72(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s1, 68(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s2, 64(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s3, 60(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s4, 56(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s5, 52(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s6, 48(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s7, 44(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s8, 40(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s9, 36(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s10, 32(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s11, 28(sp) # 4-byte Folded Reload
-; RV32-NEXT:    addi sp, sp, 80
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: clmulr_v16i32:
-; RV64:       # %bb.0:
-; RV64-NEXT:    addi sp, sp, -144
-; RV64-NEXT:    sd ra, 136(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s0, 128(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s1, 120(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s2, 112(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s3, 104(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s4, 96(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s5, 88(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s6, 80(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s7, 72(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s8, 64(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s9, 56(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s10, 48(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s11, 40(sp) # 8-byte Folded Spill
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    sub sp, sp, a0
-; RV64-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; RV64-NEXT:    vsrl.vi v12, v8, 8
-; RV64-NEXT:    lui a5, 16
-; RV64-NEXT:    vsrl.vi v16, v8, 24
-; RV64-NEXT:    vsll.vi v20, v8, 24
-; RV64-NEXT:    lui a1, 61681
-; RV64-NEXT:    lui a2, 209715
-; RV64-NEXT:    lui ra, 349525
-; RV64-NEXT:    li s11, 16
-; RV64-NEXT:    li s10, 32
-; RV64-NEXT:    li s9, 64
-; RV64-NEXT:    li a7, 512
-; RV64-NEXT:    li t0, 1024
-; RV64-NEXT:    li a0, 1
-; RV64-NEXT:    lui t1, 1
-; RV64-NEXT:    lui t2, 2
-; RV64-NEXT:    lui t3, 4
-; RV64-NEXT:    lui t4, 8
-; RV64-NEXT:    lui t5, 32
-; RV64-NEXT:    lui t6, 64
-; RV64-NEXT:    lui s0, 128
-; RV64-NEXT:    lui s1, 256
-; RV64-NEXT:    lui s2, 512
-; RV64-NEXT:    lui s3, 1024
-; RV64-NEXT:    lui s4, 2048
-; RV64-NEXT:    lui s5, 4096
-; RV64-NEXT:    lui s6, 8192
-; RV64-NEXT:    lui s7, 16384
-; RV64-NEXT:    lui s8, 32768
-; RV64-NEXT:    addi a4, a5, -256
-; RV64-NEXT:    addi a3, a1, -241
-; RV64-NEXT:    addi a2, a2, 819
-; RV64-NEXT:    addi a1, ra, 1365
-; RV64-NEXT:    vand.vx v12, v12, a4
-; RV64-NEXT:    vand.vx v8, v8, a4
-; RV64-NEXT:    vor.vv v12, v12, v16
-; RV64-NEXT:    vsll.vi v8, v8, 8
-; RV64-NEXT:    vor.vv v8, v20, v8
-; RV64-NEXT:    vor.vv v8, v8, v12
-; RV64-NEXT:    vsrl.vi v12, v8, 4
-; RV64-NEXT:    vand.vx v8, v8, a3
-; RV64-NEXT:    vand.vx v12, v12, a3
-; RV64-NEXT:    vsll.vi v8, v8, 4
-; RV64-NEXT:    vor.vv v8, v12, v8
-; RV64-NEXT:    vsrl.vi v12, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a2
-; RV64-NEXT:    vand.vx v12, v12, a2
-; RV64-NEXT:    vsll.vi v8, v8, 2
-; RV64-NEXT:    vor.vv v8, v12, v8
-; RV64-NEXT:    vsrl.vi v12, v8, 1
-; RV64-NEXT:    vand.vx v8, v8, a1
-; RV64-NEXT:    vand.vx v12, v12, a1
-; RV64-NEXT:    vadd.vv v8, v8, v8
-; RV64-NEXT:    vor.vv v8, v12, v8
-; RV64-NEXT:    vand.vx v12, v8, s11
-; RV64-NEXT:    lui s11, 65536
-; RV64-NEXT:    vand.vx v16, v8, s10
-; RV64-NEXT:    lui s10, 131072
-; RV64-NEXT:    vand.vx v20, v8, s9
-; RV64-NEXT:    lui s9, 262144
-; RV64-NEXT:    slli ra, a0, 11
-; RV64-NEXT:    vand.vi v24, v8, 2
-; RV64-NEXT:    vand.vi v28, v8, 1
-; RV64-NEXT:    vand.vi v4, v8, 4
-; RV64-NEXT:    vand.vi v0, v8, 8
-; RV64-NEXT:    vmul.vv v24, v8, v24
-; RV64-NEXT:    sd a4, 8(sp) # 8-byte Folded Spill
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a4, a0
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a4, a4, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a4
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vs4r.v v24, (a0) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    vmul.vv v24, v8, v28
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 5
-; RV64-NEXT:    mv a4, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a4
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vs4r.v v24, (a0) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    vmul.vv v24, v8, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a4, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a4, a4, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a4, a4, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a4
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vs4r.v v24, (a0) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    vmul.vv v24, v8, v0
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    mv a4, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a4, a4, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a4
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vs4r.v v24, (a0) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a4, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a4, a4, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a4
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    vmul.vv v12, v8, v16
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 4
-; RV64-NEXT:    mv a4, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a4
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    vmul.vv v12, v8, v20
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a4, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a4, a4, a0
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a0, a0, a4
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    lui a0, 524288
-; RV64-NEXT:    li a6, 128
-; RV64-NEXT:    vand.vx v12, v8, a6
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a4, vlenb
-; RV64-NEXT:    slli a4, a4, 3
-; RV64-NEXT:    mv a6, a4
-; RV64-NEXT:    slli a4, a4, 3
-; RV64-NEXT:    add a4, a4, a6
-; RV64-NEXT:    add a4, sp, a4
-; RV64-NEXT:    addi a4, a4, 32
-; RV64-NEXT:    vs4r.v v12, (a4) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    li a6, 256
-; RV64-NEXT:    vand.vx v12, v8, a6
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a6, vlenb
-; RV64-NEXT:    slli a6, a6, 2
-; RV64-NEXT:    mv a4, a6
-; RV64-NEXT:    slli a6, a6, 4
-; RV64-NEXT:    add a6, a6, a4
-; RV64-NEXT:    ld a4, 8(sp) # 8-byte Folded Reload
-; RV64-NEXT:    add a6, sp, a6
-; RV64-NEXT:    addi a6, a6, 32
-; RV64-NEXT:    vs4r.v v12, (a6) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    vand.vx v12, v8, a7
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a6, vlenb
-; RV64-NEXT:    slli a6, a6, 6
-; RV64-NEXT:    add a6, sp, a6
-; RV64-NEXT:    addi a6, a6, 32
-; RV64-NEXT:    vs4r.v v12, (a6) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    vand.vx v12, v8, t0
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a6, vlenb
-; RV64-NEXT:    slli a6, a6, 2
-; RV64-NEXT:    mv a7, a6
-; RV64-NEXT:    slli a6, a6, 1
-; RV64-NEXT:    add a7, a7, a6
-; RV64-NEXT:    slli a6, a6, 1
-; RV64-NEXT:    add a7, a7, a6
-; RV64-NEXT:    slli a6, a6, 1
-; RV64-NEXT:    add a6, a6, a7
-; RV64-NEXT:    add a6, sp, a6
-; RV64-NEXT:    addi a6, a6, 32
-; RV64-NEXT:    vs4r.v v12, (a6) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    vand.vx v12, v8, ra
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a6, vlenb
-; RV64-NEXT:    slli a6, a6, 3
-; RV64-NEXT:    mv a7, a6
-; RV64-NEXT:    slli a6, a6, 1
-; RV64-NEXT:    add a7, a7, a6
-; RV64-NEXT:    slli a6, a6, 1
-; RV64-NEXT:    add a6, a6, a7
-; RV64-NEXT:    add a6, sp, a6
-; RV64-NEXT:    addi a6, a6, 32
-; RV64-NEXT:    vs4r.v v12, (a6) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    vand.vx v12, v8, t1
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a6, vlenb
-; RV64-NEXT:    slli a6, a6, 2
-; RV64-NEXT:    mv a7, a6
-; RV64-NEXT:    slli a6, a6, 2
-; RV64-NEXT:    add a7, a7, a6
-; RV64-NEXT:    slli a6, a6, 1
-; RV64-NEXT:    add a6, a6, a7
-; RV64-NEXT:    add a6, sp, a6
-; RV64-NEXT:    addi a6, a6, 32
-; RV64-NEXT:    vs4r.v v12, (a6) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    vand.vx v12, v8, t2
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a6, vlenb
-; RV64-NEXT:    slli a6, a6, 4
-; RV64-NEXT:    mv a7, a6
-; RV64-NEXT:    slli a6, a6, 1
-; RV64-NEXT:    add a6, a6, a7
-; RV64-NEXT:    add a6, sp, a6
-; RV64-NEXT:    addi a6, a6, 32
-; RV64-NEXT:    vs4r.v v12, (a6) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    vand.vx v12, v8, t3
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a6, vlenb
-; RV64-NEXT:    slli a6, a6, 2
-; RV64-NEXT:    mv a7, a6
-; RV64-NEXT:    slli a6, a6, 1
-; RV64-NEXT:    add a7, a7, a6
-; RV64-NEXT:    slli a6, a6, 2
-; RV64-NEXT:    add a6, a6, a7
-; RV64-NEXT:    add a6, sp, a6
-; RV64-NEXT:    addi a6, a6, 32
-; RV64-NEXT:    vs4r.v v12, (a6) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    vand.vx v12, v8, t4
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a6, vlenb
-; RV64-NEXT:    slli a6, a6, 3
-; RV64-NEXT:    mv a7, a6
-; RV64-NEXT:    slli a6, a6, 2
-; RV64-NEXT:    add a6, a6, a7
-; RV64-NEXT:    add a6, sp, a6
-; RV64-NEXT:    addi a6, a6, 32
-; RV64-NEXT:    vs4r.v v12, (a6) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    vand.vx v12, v8, a5
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    mv a6, a5
-; RV64-NEXT:    slli a5, a5, 3
-; RV64-NEXT:    add a5, a5, a6
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 32
-; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    vand.vx v12, v8, t5
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 32
-; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    vand.vx v12, v8, t6
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    mv a6, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add a6, a6, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add a5, a5, a6
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 32
-; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    vand.vx v12, v8, s0
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 3
-; RV64-NEXT:    mv a6, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add a5, a5, a6
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 32
-; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    vand.vx v12, v8, s1
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    mv a6, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add a5, a5, a6
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 32
-; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    vand.vx v12, v8, s2
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 4
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 32
-; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    vand.vx v12, v8, s3
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    mv a6, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add a5, a5, a6
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 32
-; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    vand.vx v12, v8, s4
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 3
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 32
-; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    vand.vx v12, v8, s5
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 32
-; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    vand.vx v12, v8, s6
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    addi a5, sp, 32
-; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    vand.vx v12, v8, s7
-; RV64-NEXT:    vmul.vv v28, v8, v12
-; RV64-NEXT:    vand.vx v12, v8, s8
-; RV64-NEXT:    vmul.vv v24, v8, v12
-; RV64-NEXT:    vand.vx v12, v8, s11
-; RV64-NEXT:    vmul.vv v20, v8, v12
-; RV64-NEXT:    vand.vx v12, v8, s10
-; RV64-NEXT:    vmul.vv v16, v8, v12
-; RV64-NEXT:    vand.vx v12, v8, s9
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    vand.vx v0, v8, a0
-; RV64-NEXT:    vmul.vv v8, v8, v0
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a5, a5, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl4r.v v0, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 5
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v4, v0
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a5, a5, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a5, a5, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a5, a5, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a5, a5, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 4
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a5, a5, a0
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 4
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 6
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a5, a5, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a5, a5, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a5, a5, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a5, a5, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 4
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a5, a5, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a5, a5, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 4
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 32
-; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v4
-; RV64-NEXT:    addi a0, sp, 32
-; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v4, v0, v4
-; RV64-NEXT:    vxor.vv v28, v4, v28
-; RV64-NEXT:    vxor.vv v24, v28, v24
-; RV64-NEXT:    vxor.vv v20, v24, v20
-; RV64-NEXT:    vxor.vv v16, v20, v16
-; RV64-NEXT:    vxor.vv v12, v16, v12
-; RV64-NEXT:    vxor.vv v8, v12, v8
-; RV64-NEXT:    vsrl.vi v12, v8, 8
-; RV64-NEXT:    vsrl.vi v16, v8, 24
-; RV64-NEXT:    vand.vx v12, v12, a4
-; RV64-NEXT:    vor.vv v12, v12, v16
-; RV64-NEXT:    vsll.vi v16, v8, 24
-; RV64-NEXT:    vand.vx v8, v8, a4
-; RV64-NEXT:    vsll.vi v8, v8, 8
-; RV64-NEXT:    vor.vv v8, v16, v8
-; RV64-NEXT:    vor.vv v8, v8, v12
-; RV64-NEXT:    vsrl.vi v12, v8, 4
-; RV64-NEXT:    vand.vx v8, v8, a3
-; RV64-NEXT:    vand.vx v12, v12, a3
-; RV64-NEXT:    vsll.vi v8, v8, 4
-; RV64-NEXT:    vor.vv v8, v12, v8
-; RV64-NEXT:    vsrl.vi v12, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a2
-; RV64-NEXT:    vand.vx v12, v12, a2
-; RV64-NEXT:    vsll.vi v8, v8, 2
-; RV64-NEXT:    vor.vv v8, v12, v8
-; RV64-NEXT:    vsrl.vi v12, v8, 1
-; RV64-NEXT:    vand.vx v8, v8, a1
-; RV64-NEXT:    vand.vx v12, v12, a1
-; RV64-NEXT:    vadd.vv v8, v8, v8
-; RV64-NEXT:    vor.vv v8, v12, v8
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add sp, sp, a0
-; RV64-NEXT:    ld ra, 136(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s0, 128(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s1, 120(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s2, 112(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s3, 104(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s4, 96(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s5, 88(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s6, 80(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s7, 72(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s8, 64(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s9, 56(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s10, 48(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s11, 40(sp) # 8-byte Folded Reload
-; RV64-NEXT:    addi sp, sp, 144
-; RV64-NEXT:    ret
-  %a = call <16 x i32> @llvm.clmulr.v16i32(<16 x i32> %x, <16 x i32> %y)
-  ret <16 x i32> %a
-}
-
-define <1 x i64> @clmulr_v1i64(<1 x i64> %x, <1 x i64> %y) nounwind {
-; RV32-LABEL: clmulr_v1i64:
-; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -352
-; RV32-NEXT:    sw ra, 348(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s0, 344(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s1, 340(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s2, 336(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s3, 332(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s4, 328(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s5, 324(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s6, 320(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s7, 316(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s8, 312(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s9, 308(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s10, 304(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s11, 300(sp) # 4-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    sub sp, sp, a0
-; RV32-NEXT:    lui s7, 1044480
-; RV32-NEXT:    lui a7, 524288
-; RV32-NEXT:    li s11, 1
-; RV32-NEXT:    li s8, 2
-; RV32-NEXT:    li s9, 4
-; RV32-NEXT:    li s10, 8
-; RV32-NEXT:    li a3, 16
-; RV32-NEXT:    li a4, 32
-; RV32-NEXT:    li a5, 64
-; RV32-NEXT:    li a6, 128
-; RV32-NEXT:    li ra, 256
-; RV32-NEXT:    li a0, 512
-; RV32-NEXT:    li a1, 1024
-; RV32-NEXT:    lui a2, 1
-; RV32-NEXT:    lui t0, 2
-; RV32-NEXT:    lui t1, 4
-; RV32-NEXT:    lui t2, 8
-; RV32-NEXT:    lui t3, 16
-; RV32-NEXT:    lui t4, 32
-; RV32-NEXT:    lui t5, 64
-; RV32-NEXT:    lui t6, 128
-; RV32-NEXT:    lui s0, 256
-; RV32-NEXT:    lui s1, 512
-; RV32-NEXT:    lui s2, 1024
-; RV32-NEXT:    lui s3, 2048
-; RV32-NEXT:    lui s4, 4096
-; RV32-NEXT:    lui s5, 8192
-; RV32-NEXT:    lui s6, 16384
-; RV32-NEXT:    sw s7, 272(sp)
-; RV32-NEXT:    lui s7, 32768
-; RV32-NEXT:    sw zero, 276(sp)
-; RV32-NEXT:    sw a7, 264(sp)
-; RV32-NEXT:    sw zero, 268(sp)
-; RV32-NEXT:    sw zero, 256(sp)
-; RV32-NEXT:    sw s11, 260(sp)
-; RV32-NEXT:    sw zero, 248(sp)
-; RV32-NEXT:    sw s8, 252(sp)
-; RV32-NEXT:    lui s8, 65536
-; RV32-NEXT:    sw zero, 240(sp)
-; RV32-NEXT:    sw s9, 244(sp)
-; RV32-NEXT:    lui s9, 131072
-; RV32-NEXT:    sw zero, 232(sp)
-; RV32-NEXT:    sw s10, 236(sp)
-; RV32-NEXT:    lui s10, 262144
-; RV32-NEXT:    sw zero, 224(sp)
-; RV32-NEXT:    sw a3, 228(sp)
-; RV32-NEXT:    sw zero, 216(sp)
-; RV32-NEXT:    sw a4, 220(sp)
-; RV32-NEXT:    sw zero, 208(sp)
-; RV32-NEXT:    sw a5, 212(sp)
-; RV32-NEXT:    sw zero, 200(sp)
-; RV32-NEXT:    sw a6, 204(sp)
-; RV32-NEXT:    sw zero, 192(sp)
-; RV32-NEXT:    sw ra, 196(sp)
-; RV32-NEXT:    sw zero, 184(sp)
-; RV32-NEXT:    sw a0, 188(sp)
-; RV32-NEXT:    sw zero, 176(sp)
-; RV32-NEXT:    sw a1, 180(sp)
-; RV32-NEXT:    slli s11, s11, 11
-; RV32-NEXT:    sw zero, 168(sp)
-; RV32-NEXT:    sw s11, 172(sp)
-; RV32-NEXT:    sw zero, 160(sp)
-; RV32-NEXT:    sw a2, 164(sp)
-; RV32-NEXT:    sw zero, 152(sp)
-; RV32-NEXT:    sw t0, 156(sp)
-; RV32-NEXT:    sw zero, 144(sp)
-; RV32-NEXT:    sw t1, 148(sp)
-; RV32-NEXT:    sw zero, 136(sp)
-; RV32-NEXT:    sw t2, 140(sp)
-; RV32-NEXT:    sw zero, 128(sp)
-; RV32-NEXT:    sw t3, 132(sp)
-; RV32-NEXT:    sw zero, 120(sp)
-; RV32-NEXT:    sw t4, 124(sp)
-; RV32-NEXT:    sw zero, 112(sp)
-; RV32-NEXT:    sw t5, 116(sp)
-; RV32-NEXT:    sw zero, 104(sp)
-; RV32-NEXT:    sw t6, 108(sp)
-; RV32-NEXT:    sw zero, 96(sp)
-; RV32-NEXT:    sw s0, 100(sp)
-; RV32-NEXT:    sw zero, 88(sp)
-; RV32-NEXT:    sw s1, 92(sp)
-; RV32-NEXT:    sw zero, 80(sp)
-; RV32-NEXT:    sw s2, 84(sp)
-; RV32-NEXT:    sw zero, 72(sp)
-; RV32-NEXT:    sw s3, 76(sp)
-; RV32-NEXT:    sw zero, 64(sp)
-; RV32-NEXT:    sw s4, 68(sp)
-; RV32-NEXT:    sw zero, 56(sp)
-; RV32-NEXT:    sw s5, 60(sp)
-; RV32-NEXT:    sw zero, 48(sp)
-; RV32-NEXT:    sw s6, 52(sp)
-; RV32-NEXT:    sw zero, 40(sp)
-; RV32-NEXT:    sw s7, 44(sp)
-; RV32-NEXT:    sw zero, 32(sp)
-; RV32-NEXT:    sw s8, 36(sp)
-; RV32-NEXT:    sw zero, 24(sp)
-; RV32-NEXT:    sw s9, 28(sp)
-; RV32-NEXT:    sw zero, 16(sp)
-; RV32-NEXT:    sw s10, 20(sp)
-; RV32-NEXT:    sw zero, 8(sp)
-; RV32-NEXT:    sw a7, 12(sp)
-; RV32-NEXT:    lui a0, 61681
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vsetivli zero, 2, e32, m1, ta, ma
-; RV32-NEXT:    vmv.v.x v3, a0
-; RV32-NEXT:    lui a0, 209715
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vmv.v.x v2, a0
-; RV32-NEXT:    lui a0, 349525
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vmv.v.x v1, a0
-; RV32-NEXT:    addi a0, sp, 272
-; RV32-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; RV32-NEXT:    vlse64.v v0, (a0), zero
-; RV32-NEXT:    addi a0, sp, 264
-; RV32-NEXT:    vlse64.v v13, (a0), zero
-; RV32-NEXT:    addi a0, sp, 256
-; RV32-NEXT:    vlse64.v v14, (a0), zero
-; RV32-NEXT:    addi a0, sp, 248
-; RV32-NEXT:    vlse64.v v15, (a0), zero
-; RV32-NEXT:    addi a0, sp, 240
-; RV32-NEXT:    vlse64.v v16, (a0), zero
-; RV32-NEXT:    addi a0, sp, 232
-; RV32-NEXT:    vlse64.v v17, (a0), zero
-; RV32-NEXT:    addi a0, sp, 224
-; RV32-NEXT:    vlse64.v v18, (a0), zero
-; RV32-NEXT:    addi a0, sp, 216
-; RV32-NEXT:    vlse64.v v19, (a0), zero
-; RV32-NEXT:    addi a0, sp, 208
-; RV32-NEXT:    vlse64.v v20, (a0), zero
-; RV32-NEXT:    addi a0, sp, 200
-; RV32-NEXT:    vlse64.v v21, (a0), zero
-; RV32-NEXT:    addi a0, sp, 192
-; RV32-NEXT:    vlse64.v v22, (a0), zero
-; RV32-NEXT:    addi a0, sp, 184
-; RV32-NEXT:    vlse64.v v23, (a0), zero
-; RV32-NEXT:    addi a0, sp, 176
-; RV32-NEXT:    vlse64.v v24, (a0), zero
-; RV32-NEXT:    addi a0, sp, 168
-; RV32-NEXT:    vlse64.v v25, (a0), zero
-; RV32-NEXT:    addi a0, sp, 160
-; RV32-NEXT:    vlse64.v v26, (a0), zero
-; RV32-NEXT:    addi a0, sp, 152
-; RV32-NEXT:    vlse64.v v9, (a0), zero
-; RV32-NEXT:    addi a0, sp, 144
-; RV32-NEXT:    vlse64.v v10, (a0), zero
-; RV32-NEXT:    addi a0, sp, 136
-; RV32-NEXT:    vlse64.v v29, (a0), zero
-; RV32-NEXT:    addi a0, sp, 128
-; RV32-NEXT:    vlse64.v v30, (a0), zero
-; RV32-NEXT:    addi a0, sp, 120
-; RV32-NEXT:    vlse64.v v31, (a0), zero
-; RV32-NEXT:    addi a0, sp, 112
-; RV32-NEXT:    vlse64.v v11, (a0), zero
-; RV32-NEXT:    addi a0, sp, 104
-; RV32-NEXT:    vlse64.v v12, (a0), zero
-; RV32-NEXT:    addi a0, sp, 96
-; RV32-NEXT:    vlse64.v v5, (a0), zero
-; RV32-NEXT:    addi a0, sp, 88
-; RV32-NEXT:    vlse64.v v4, (a0), zero
-; RV32-NEXT:    li a6, 56
-; RV32-NEXT:    vsrl.vi v27, v8, 24
-; RV32-NEXT:    vsrl.vx v28, v8, a6
-; RV32-NEXT:    li ra, 40
-; RV32-NEXT:    vsrl.vx v7, v8, ra
-; RV32-NEXT:    vsll.vx v6, v8, a6
-; RV32-NEXT:    addi a4, t3, -256
-; RV32-NEXT:    vand.vx v7, v7, a4
-; RV32-NEXT:    vor.vv v28, v7, v28
-; RV32-NEXT:    vand.vx v7, v8, a4
-; RV32-NEXT:    vsll.vx v7, v7, ra
-; RV32-NEXT:    vor.vv v7, v6, v7
-; RV32-NEXT:    vsrl.vi v6, v8, 8
-; RV32-NEXT:    lui a5, 4080
-; RV32-NEXT:    vand.vx v27, v27, a5
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v0, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vv v6, v6, v0
-; RV32-NEXT:    vor.vv v27, v6, v27
-; RV32-NEXT:    addi a3, sp, 80
-; RV32-NEXT:    vlse64.v v6, (a3), zero
-; RV32-NEXT:    vor.vv v27, v27, v28
-; RV32-NEXT:    vand.vx v28, v8, a5
-; RV32-NEXT:    vsll.vi v28, v28, 24
-; RV32-NEXT:    vand.vv v8, v8, v0
-; RV32-NEXT:    vsll.vi v8, v8, 8
-; RV32-NEXT:    vor.vv v8, v28, v8
-; RV32-NEXT:    addi a3, sp, 72
-; RV32-NEXT:    vlse64.v v28, (a3), zero
-; RV32-NEXT:    vor.vv v8, v7, v8
-; RV32-NEXT:    addi a3, sp, 64
-; RV32-NEXT:    vlse64.v v7, (a3), zero
-; RV32-NEXT:    vor.vv v8, v8, v27
-; RV32-NEXT:    vsrl.vi v27, v8, 4
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v3, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vv v8, v8, v3
-; RV32-NEXT:    vand.vv v27, v27, v3
-; RV32-NEXT:    vsll.vi v8, v8, 4
-; RV32-NEXT:    vor.vv v8, v27, v8
-; RV32-NEXT:    vsrl.vi v27, v8, 2
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v2, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vv v8, v8, v2
-; RV32-NEXT:    vand.vv v27, v27, v2
-; RV32-NEXT:    vsll.vi v8, v8, 2
-; RV32-NEXT:    vor.vv v8, v27, v8
-; RV32-NEXT:    vsrl.vi v27, v8, 1
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v1, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vv v8, v8, v1
-; RV32-NEXT:    vand.vv v27, v27, v1
-; RV32-NEXT:    vadd.vv v8, v8, v8
-; RV32-NEXT:    vor.vv v8, v27, v8
-; RV32-NEXT:    addi a3, sp, 56
-; RV32-NEXT:    vlse64.v v27, (a3), zero
-; RV32-NEXT:    vand.vv v13, v8, v13
-; RV32-NEXT:    vand.vv v14, v8, v14
-; RV32-NEXT:    vand.vv v15, v8, v15
-; RV32-NEXT:    vand.vv v16, v8, v16
-; RV32-NEXT:    vand.vv v17, v8, v17
-; RV32-NEXT:    vand.vv v18, v8, v18
-; RV32-NEXT:    vand.vv v19, v8, v19
-; RV32-NEXT:    vand.vv v20, v8, v20
-; RV32-NEXT:    vand.vv v21, v8, v21
-; RV32-NEXT:    vand.vv v22, v8, v22
-; RV32-NEXT:    vand.vv v23, v8, v23
-; RV32-NEXT:    vand.vv v24, v8, v24
-; RV32-NEXT:    vand.vv v25, v8, v25
-; RV32-NEXT:    vand.vv v26, v8, v26
-; RV32-NEXT:    vand.vv v3, v8, v9
-; RV32-NEXT:    vand.vv v2, v8, v10
-; RV32-NEXT:    vand.vv v29, v8, v29
-; RV32-NEXT:    vand.vv v30, v8, v30
-; RV32-NEXT:    vand.vv v31, v8, v31
-; RV32-NEXT:    vand.vv v0, v8, v11
-; RV32-NEXT:    vand.vv v9, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vv v5, v8, v5
-; RV32-NEXT:    vand.vv v4, v8, v4
-; RV32-NEXT:    vand.vv v6, v8, v6
-; RV32-NEXT:    vand.vv v9, v8, v28
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    addi a3, sp, 48
-; RV32-NEXT:    addi a0, sp, 40
-; RV32-NEXT:    vlse64.v v9, (a3), zero
-; RV32-NEXT:    vlse64.v v10, (a0), zero
-; RV32-NEXT:    vand.vv v11, v8, v7
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v11, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vv v11, v8, v27
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v11, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vv v9, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    addi a2, sp, 32
-; RV32-NEXT:    addi a3, sp, 24
-; RV32-NEXT:    addi a1, sp, 16
-; RV32-NEXT:    addi a0, sp, 8
-; RV32-NEXT:    vlse64.v v9, (a2), zero
-; RV32-NEXT:    vlse64.v v10, (a3), zero
-; RV32-NEXT:    vlse64.v v11, (a1), zero
-; RV32-NEXT:    vlse64.v v12, (a0), zero
-; RV32-NEXT:    vand.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a1, a0, 5
-; RV32-NEXT:    add a0, a1, a0
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vv v9, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vv v9, v8, v11
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a1, a0, 5
-; RV32-NEXT:    sub a0, a1, a0
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vv v9, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vi v9, v8, 2
-; RV32-NEXT:    vmul.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vi v9, v8, 1
-; RV32-NEXT:    vmul.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vi v9, v8, 4
-; RV32-NEXT:    vmul.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vi v9, v8, 8
-; RV32-NEXT:    vmul.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    li a0, 16
-; RV32-NEXT:    vand.vx v9, v8, a0
-; RV32-NEXT:    vmul.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    li a0, 32
-; RV32-NEXT:    vand.vx v9, v8, a0
-; RV32-NEXT:    vmul.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    li a0, 64
-; RV32-NEXT:    vand.vx v9, v8, a0
-; RV32-NEXT:    vmul.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    li a0, 128
-; RV32-NEXT:    vand.vx v9, v8, a0
-; RV32-NEXT:    vmul.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    li a0, 256
-; RV32-NEXT:    vand.vx v9, v8, a0
-; RV32-NEXT:    vmul.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    li a0, 512
-; RV32-NEXT:    vand.vx v9, v8, a0
-; RV32-NEXT:    vmul.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    li a0, 1024
-; RV32-NEXT:    vand.vx v9, v8, a0
-; RV32-NEXT:    vmul.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vx v9, v8, s11
-; RV32-NEXT:    vmul.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    vand.vx v9, v8, a0
-; RV32-NEXT:    vmul.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vx v9, v8, t0
-; RV32-NEXT:    vmul.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a1, a0, 4
-; RV32-NEXT:    add a0, a1, a0
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vx v9, v8, t1
-; RV32-NEXT:    vmul.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vx v9, v8, t2
-; RV32-NEXT:    vmul.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a1, a0, 4
-; RV32-NEXT:    sub a0, a1, a0
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vx v9, v8, t3
-; RV32-NEXT:    vmul.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vx v9, v8, t4
-; RV32-NEXT:    vmul.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vx v9, v8, t5
-; RV32-NEXT:    vmul.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vx v9, v8, t6
-; RV32-NEXT:    vmul.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vx v9, v8, s0
-; RV32-NEXT:    vmul.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vx v9, v8, s1
-; RV32-NEXT:    vmul.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a1, a0, 3
-; RV32-NEXT:    add a0, a1, a0
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vx v9, v8, s2
-; RV32-NEXT:    vmul.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vx v9, v8, s3
-; RV32-NEXT:    vmul.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a1, a0, 3
-; RV32-NEXT:    sub a0, a1, a0
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vx v9, v8, s4
-; RV32-NEXT:    vmul.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vx v9, v8, s5
-; RV32-NEXT:    vmul.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a1, a0, 2
-; RV32-NEXT:    add a0, a1, a0
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vx v9, v8, s6
-; RV32-NEXT:    vmul.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vx v9, v8, s7
-; RV32-NEXT:    vmul.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a1, a0, 1
-; RV32-NEXT:    add a0, a1, a0
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vx v9, v8, s8
-; RV32-NEXT:    vmul.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vx v9, v8, s9
-; RV32-NEXT:    vmul.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vx v1, v8, s10
-; RV32-NEXT:    vmul.vv v1, v8, v1
-; RV32-NEXT:    vmul.vv v9, v8, v13
-; RV32-NEXT:    addi a0, sp, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vmul.vv v10, v8, v14
-; RV32-NEXT:    vmul.vv v11, v8, v15
-; RV32-NEXT:    vmul.vv v12, v8, v16
-; RV32-NEXT:    vmul.vv v13, v8, v17
-; RV32-NEXT:    vmul.vv v14, v8, v18
-; RV32-NEXT:    vmul.vv v15, v8, v19
-; RV32-NEXT:    vmul.vv v16, v8, v20
-; RV32-NEXT:    vmul.vv v17, v8, v21
-; RV32-NEXT:    vmul.vv v18, v8, v22
-; RV32-NEXT:    vmul.vv v19, v8, v23
-; RV32-NEXT:    vmul.vv v20, v8, v24
-; RV32-NEXT:    vmul.vv v21, v8, v25
-; RV32-NEXT:    vmul.vv v22, v8, v26
-; RV32-NEXT:    vmul.vv v23, v8, v3
-; RV32-NEXT:    vmul.vv v24, v8, v2
-; RV32-NEXT:    vmul.vv v25, v8, v29
-; RV32-NEXT:    vmul.vv v26, v8, v30
-; RV32-NEXT:    vmul.vv v27, v8, v31
-; RV32-NEXT:    vmul.vv v28, v8, v0
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v29, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vmul.vv v29, v8, v29
-; RV32-NEXT:    vmul.vv v30, v8, v5
-; RV32-NEXT:    vmul.vv v31, v8, v4
-; RV32-NEXT:    vmul.vv v7, v8, v6
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v6, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vmul.vv v6, v8, v6
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v5, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vmul.vv v5, v8, v5
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v4, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vmul.vv v4, v8, v4
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v3, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vmul.vv v3, v8, v3
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vmul.vv v2, v8, v2
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a1, a0, 5
-; RV32-NEXT:    add a0, a1, a0
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vmul.vv v0, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vmul.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a1, a0, 5
-; RV32-NEXT:    sub a0, a1, a0
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vmul.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vmul.vv v8, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vi v8, v8, 0
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a1, a0, 4
-; RV32-NEXT:    add a0, a1, a0
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a1, a0, 4
-; RV32-NEXT:    sub a0, a1, a0
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a1, a0, 3
-; RV32-NEXT:    add a0, a1, a0
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a1, a0, 3
-; RV32-NEXT:    sub a0, a1, a0
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a1, a0, 2
-; RV32-NEXT:    add a0, a1, a0
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a1, a0, 1
-; RV32-NEXT:    add a0, a1, a0
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    vxor.vv v8, v8, v1
-; RV32-NEXT:    addi a0, sp, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    vxor.vv v8, v8, v11
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    vxor.vv v8, v8, v13
-; RV32-NEXT:    vxor.vv v8, v8, v14
-; RV32-NEXT:    vxor.vv v8, v8, v15
-; RV32-NEXT:    vxor.vv v8, v8, v16
-; RV32-NEXT:    vxor.vv v8, v8, v17
-; RV32-NEXT:    vxor.vv v8, v8, v18
-; RV32-NEXT:    vxor.vv v8, v8, v19
-; RV32-NEXT:    vxor.vv v8, v8, v20
-; RV32-NEXT:    vxor.vv v8, v8, v21
-; RV32-NEXT:    vxor.vv v8, v8, v22
-; RV32-NEXT:    vxor.vv v8, v8, v23
-; RV32-NEXT:    vxor.vv v8, v8, v24
-; RV32-NEXT:    vxor.vv v8, v8, v25
-; RV32-NEXT:    vxor.vv v8, v8, v26
-; RV32-NEXT:    vxor.vv v8, v8, v27
-; RV32-NEXT:    vxor.vv v8, v8, v28
-; RV32-NEXT:    vxor.vv v8, v8, v29
-; RV32-NEXT:    vxor.vv v8, v8, v30
-; RV32-NEXT:    vxor.vv v8, v8, v31
-; RV32-NEXT:    vxor.vv v8, v8, v7
-; RV32-NEXT:    vxor.vv v8, v8, v6
-; RV32-NEXT:    vxor.vv v8, v8, v5
-; RV32-NEXT:    vxor.vv v8, v8, v4
-; RV32-NEXT:    vxor.vv v8, v8, v3
-; RV32-NEXT:    vxor.vv v8, v8, v2
-; RV32-NEXT:    vxor.vv v8, v8, v0
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    vsrl.vx v9, v8, a6
-; RV32-NEXT:    vsll.vx v10, v8, a6
-; RV32-NEXT:    vsrl.vx v11, v8, ra
-; RV32-NEXT:    vand.vx v12, v8, a4
-; RV32-NEXT:    vand.vx v11, v11, a4
-; RV32-NEXT:    vsrl.vi v13, v8, 24
-; RV32-NEXT:    vand.vx v14, v8, a5
-; RV32-NEXT:    vand.vx v13, v13, a5
-; RV32-NEXT:    vsll.vx v12, v12, ra
-; RV32-NEXT:    vsrl.vi v15, v8, 8
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v16, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vand.vv v8, v8, v16
-; RV32-NEXT:    vand.vv v15, v15, v16
-; RV32-NEXT:    vor.vv v9, v11, v9
-; RV32-NEXT:    vor.vv v11, v15, v13
-; RV32-NEXT:    vsll.vi v8, v8, 8
-; RV32-NEXT:    vsll.vi v13, v14, 24
-; RV32-NEXT:    vor.vv v8, v13, v8
-; RV32-NEXT:    vor.vv v10, v10, v12
-; RV32-NEXT:    vor.vv v9, v11, v9
-; RV32-NEXT:    vor.vv v8, v10, v8
-; RV32-NEXT:    vor.vv v8, v8, v9
-; RV32-NEXT:    vsrl.vi v9, v8, 4
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v10, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vand.vv v8, v8, v10
-; RV32-NEXT:    vand.vv v9, v9, v10
-; RV32-NEXT:    vsll.vi v8, v8, 4
-; RV32-NEXT:    vor.vv v8, v9, v8
-; RV32-NEXT:    vsrl.vi v9, v8, 2
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v10, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vand.vv v8, v8, v10
-; RV32-NEXT:    vand.vv v9, v9, v10
-; RV32-NEXT:    vsll.vi v8, v8, 2
-; RV32-NEXT:    vor.vv v8, v9, v8
-; RV32-NEXT:    vsrl.vi v9, v8, 1
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v10, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vand.vv v8, v8, v10
-; RV32-NEXT:    vand.vv v9, v9, v10
-; RV32-NEXT:    vadd.vv v8, v8, v8
-; RV32-NEXT:    vor.vv v8, v9, v8
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add sp, sp, a0
-; RV32-NEXT:    lw ra, 348(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s0, 344(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s1, 340(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s2, 336(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s3, 332(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s4, 328(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s5, 324(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s6, 320(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s7, 316(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s8, 312(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s9, 308(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s10, 304(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s11, 300(sp) # 4-byte Folded Reload
-; RV32-NEXT:    addi sp, sp, 352
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: clmulr_v1i64:
-; RV64:       # %bb.0:
-; RV64-NEXT:    addi sp, sp, -224
-; RV64-NEXT:    sd ra, 216(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s0, 208(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s1, 200(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s2, 192(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s3, 184(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s4, 176(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s5, 168(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s6, 160(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s7, 152(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s8, 144(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s9, 136(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s10, 128(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s11, 120(sp) # 8-byte Folded Spill
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 4
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    sub sp, sp, a0
-; RV64-NEXT:    li s11, 56
-; RV64-NEXT:    li ra, 40
-; RV64-NEXT:    lui a0, 16
-; RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT:    vsrl.vi v10, v8, 24
-; RV64-NEXT:    vsrl.vi v9, v8, 8
-; RV64-NEXT:    li t2, 255
-; RV64-NEXT:    lui t6, 61681
-; RV64-NEXT:    lui s0, 209715
-; RV64-NEXT:    lui s1, 349525
-; RV64-NEXT:    li s10, 16
-; RV64-NEXT:    li s9, 32
-; RV64-NEXT:    li s8, 64
-; RV64-NEXT:    li s7, 128
-; RV64-NEXT:    li s5, 256
-; RV64-NEXT:    li t5, 512
-; RV64-NEXT:    li t3, 1024
-; RV64-NEXT:    li t0, 1
-; RV64-NEXT:    lui s6, 1
-; RV64-NEXT:    lui s4, 2
-; RV64-NEXT:    lui t4, 4
-; RV64-NEXT:    lui t1, 8
-; RV64-NEXT:    lui a7, 32
-; RV64-NEXT:    lui a6, 64
-; RV64-NEXT:    lui a5, 128
-; RV64-NEXT:    lui a4, 256
-; RV64-NEXT:    lui a3, 512
-; RV64-NEXT:    lui a2, 1024
-; RV64-NEXT:    vsrl.vx v11, v8, s11
-; RV64-NEXT:    vsrl.vx v12, v8, ra
-; RV64-NEXT:    addi t6, t6, -241
-; RV64-NEXT:    addi s2, s0, 819
-; RV64-NEXT:    addi s3, s1, 1365
-; RV64-NEXT:    slli s1, t6, 32
-; RV64-NEXT:    add s1, t6, s1
-; RV64-NEXT:    slli t6, s2, 32
-; RV64-NEXT:    add s2, s2, t6
-; RV64-NEXT:    slli t6, s3, 32
-; RV64-NEXT:    add s3, s3, t6
-; RV64-NEXT:    addi s0, a0, -256
-; RV64-NEXT:    lui a1, 16
-; RV64-NEXT:    lui a0, 4080
-; RV64-NEXT:    vand.vx v10, v10, a0
-; RV64-NEXT:    slli t6, t2, 24
-; RV64-NEXT:    vand.vx v13, v8, a0
-; RV64-NEXT:    vsll.vx v14, v8, s11
-; RV64-NEXT:    vand.vx v12, v12, s0
-; RV64-NEXT:    vand.vx v9, v9, t6
-; RV64-NEXT:    vsll.vi v13, v13, 24
-; RV64-NEXT:    vand.vx v15, v8, t6
-; RV64-NEXT:    vand.vx v8, v8, s0
-; RV64-NEXT:    vor.vv v11, v12, v11
-; RV64-NEXT:    vor.vv v9, v9, v10
-; RV64-NEXT:    vsll.vi v10, v15, 8
-; RV64-NEXT:    vsll.vx v8, v8, ra
-; RV64-NEXT:    vor.vv v9, v9, v11
-; RV64-NEXT:    vor.vv v10, v13, v10
-; RV64-NEXT:    vor.vv v8, v14, v8
-; RV64-NEXT:    vor.vv v8, v8, v10
-; RV64-NEXT:    vor.vv v8, v8, v9
-; RV64-NEXT:    vsrl.vi v9, v8, 4
-; RV64-NEXT:    vand.vx v8, v8, s1
-; RV64-NEXT:    vand.vx v9, v9, s1
-; RV64-NEXT:    vsll.vi v8, v8, 4
-; RV64-NEXT:    vor.vv v8, v9, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, s2
-; RV64-NEXT:    vand.vx v9, v9, s2
-; RV64-NEXT:    vsll.vi v8, v8, 2
-; RV64-NEXT:    vor.vv v8, v9, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 1
-; RV64-NEXT:    vand.vx v8, v8, s3
-; RV64-NEXT:    vand.vx v9, v9, s3
-; RV64-NEXT:    vadd.vv v8, v8, v8
-; RV64-NEXT:    vor.vv v8, v9, v8
-; RV64-NEXT:    vand.vx v9, v8, s10
-; RV64-NEXT:    lui t2, 4096
-; RV64-NEXT:    vand.vx v10, v8, s9
-; RV64-NEXT:    lui s9, 8192
-; RV64-NEXT:    vand.vx v11, v8, s8
-; RV64-NEXT:    lui s8, 16384
-; RV64-NEXT:    vand.vx v12, v8, s7
-; RV64-NEXT:    lui s10, 32768
-; RV64-NEXT:    vand.vx v13, v8, s5
-; RV64-NEXT:    lui s11, 65536
-; RV64-NEXT:    vand.vx v14, v8, t5
-; RV64-NEXT:    lui t5, 131072
-; RV64-NEXT:    vand.vx v15, v8, t3
-; RV64-NEXT:    slli t3, t0, 11
-; RV64-NEXT:    vand.vx v16, v8, t3
-; RV64-NEXT:    lui t3, 262144
-; RV64-NEXT:    vand.vx v17, v8, s6
-; RV64-NEXT:    slli a0, t0, 31
-; RV64-NEXT:    sd a0, 96(sp) # 8-byte Folded Spill
-; RV64-NEXT:    vand.vx v18, v8, s4
-; RV64-NEXT:    slli a0, t0, 32
-; RV64-NEXT:    sd a0, 88(sp) # 8-byte Folded Spill
-; RV64-NEXT:    vand.vx v19, v8, t4
-; RV64-NEXT:    slli a0, t0, 33
-; RV64-NEXT:    sd a0, 80(sp) # 8-byte Folded Spill
-; RV64-NEXT:    vand.vx v20, v8, t1
-; RV64-NEXT:    slli a0, t0, 34
-; RV64-NEXT:    sd a0, 72(sp) # 8-byte Folded Spill
-; RV64-NEXT:    vand.vx v21, v8, a1
-; RV64-NEXT:    slli a0, t0, 35
-; RV64-NEXT:    sd a0, 64(sp) # 8-byte Folded Spill
-; RV64-NEXT:    vand.vx v22, v8, a7
-; RV64-NEXT:    slli a0, t0, 36
-; RV64-NEXT:    sd a0, 56(sp) # 8-byte Folded Spill
-; RV64-NEXT:    vand.vx v23, v8, a6
-; RV64-NEXT:    slli a0, t0, 37
-; RV64-NEXT:    sd a0, 48(sp) # 8-byte Folded Spill
-; RV64-NEXT:    vand.vx v24, v8, a5
-; RV64-NEXT:    slli a0, t0, 38
-; RV64-NEXT:    sd a0, 40(sp) # 8-byte Folded Spill
-; RV64-NEXT:    vand.vx v25, v8, a4
-; RV64-NEXT:    slli a0, t0, 39
-; RV64-NEXT:    sd a0, 32(sp) # 8-byte Folded Spill
-; RV64-NEXT:    vand.vx v26, v8, a3
-; RV64-NEXT:    slli a0, t0, 40
-; RV64-NEXT:    sd a0, 24(sp) # 8-byte Folded Spill
-; RV64-NEXT:    vand.vx v27, v8, a2
-; RV64-NEXT:    slli a0, t0, 41
-; RV64-NEXT:    sd a0, 16(sp) # 8-byte Folded Spill
-; RV64-NEXT:    lui a0, 2048
-; RV64-NEXT:    vand.vx v28, v8, a0
-; RV64-NEXT:    slli s5, t0, 42
-; RV64-NEXT:    vand.vx v29, v8, t2
-; RV64-NEXT:    slli s6, t0, 43
-; RV64-NEXT:    vand.vx v30, v8, s9
-; RV64-NEXT:    slli s7, t0, 44
-; RV64-NEXT:    vand.vx v31, v8, s8
-; RV64-NEXT:    slli s8, t0, 45
-; RV64-NEXT:    vand.vx v7, v8, s10
-; RV64-NEXT:    slli s9, t0, 46
-; RV64-NEXT:    vand.vx v6, v8, s11
-; RV64-NEXT:    slli s10, t0, 47
-; RV64-NEXT:    vand.vx v5, v8, t5
-; RV64-NEXT:    slli s11, t0, 48
-; RV64-NEXT:    vand.vx v0, v8, t3
-; RV64-NEXT:    slli ra, t0, 49
-; RV64-NEXT:    slli t5, t0, 50
-; RV64-NEXT:    slli t4, t0, 51
-; RV64-NEXT:    slli t3, t0, 52
-; RV64-NEXT:    slli t2, t0, 53
-; RV64-NEXT:    slli t1, t0, 54
-; RV64-NEXT:    slli a7, t0, 55
-; RV64-NEXT:    slli a6, t0, 56
-; RV64-NEXT:    slli a5, t0, 57
-; RV64-NEXT:    slli a4, t0, 58
-; RV64-NEXT:    slli a3, t0, 59
-; RV64-NEXT:    slli a2, t0, 60
-; RV64-NEXT:    slli a1, t0, 61
-; RV64-NEXT:    slli t0, t0, 62
-; RV64-NEXT:    li a0, -1
-; RV64-NEXT:    slli a0, a0, 63
-; RV64-NEXT:    vand.vi v4, v8, 2
-; RV64-NEXT:    vand.vi v3, v8, 1
-; RV64-NEXT:    vand.vi v2, v8, 4
-; RV64-NEXT:    vand.vi v1, v8, 8
-; RV64-NEXT:    vmul.vv v4, v8, v4
-; RV64-NEXT:    sd t6, 8(sp) # 8-byte Folded Spill
-; RV64-NEXT:    csrr t6, vlenb
-; RV64-NEXT:    slli s4, t6, 5
-; RV64-NEXT:    add t6, s4, t6
-; RV64-NEXT:    add t6, sp, t6
-; RV64-NEXT:    addi t6, t6, 112
-; RV64-NEXT:    vs1r.v v4, (t6) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    vmul.vv v4, v8, v3
-; RV64-NEXT:    csrr t6, vlenb
-; RV64-NEXT:    slli t6, t6, 5
-; RV64-NEXT:    add t6, sp, t6
-; RV64-NEXT:    addi t6, t6, 112
-; RV64-NEXT:    vs1r.v v4, (t6) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    vmul.vv v4, v8, v2
-; RV64-NEXT:    csrr t6, vlenb
-; RV64-NEXT:    slli s4, t6, 5
-; RV64-NEXT:    sub t6, s4, t6
-; RV64-NEXT:    add t6, sp, t6
-; RV64-NEXT:    addi t6, t6, 112
-; RV64-NEXT:    vs1r.v v4, (t6) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    vmul.vv v4, v8, v1
-; RV64-NEXT:    csrr t6, vlenb
-; RV64-NEXT:    slli t6, t6, 1
-; RV64-NEXT:    mv s4, t6
-; RV64-NEXT:    slli t6, t6, 1
-; RV64-NEXT:    add s4, s4, t6
-; RV64-NEXT:    slli t6, t6, 1
-; RV64-NEXT:    add s4, s4, t6
-; RV64-NEXT:    slli t6, t6, 1
-; RV64-NEXT:    add t6, t6, s4
-; RV64-NEXT:    add t6, sp, t6
-; RV64-NEXT:    addi t6, t6, 112
-; RV64-NEXT:    vs1r.v v4, (t6) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    vmul.vv v9, v8, v9
-; RV64-NEXT:    csrr t6, vlenb
-; RV64-NEXT:    mv s4, t6
-; RV64-NEXT:    slli t6, t6, 2
-; RV64-NEXT:    add s4, s4, t6
-; RV64-NEXT:    slli t6, t6, 1
-; RV64-NEXT:    add s4, s4, t6
-; RV64-NEXT:    slli t6, t6, 1
-; RV64-NEXT:    add t6, t6, s4
-; RV64-NEXT:    add t6, sp, t6
-; RV64-NEXT:    addi t6, t6, 112
-; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    vmul.vv v9, v8, v10
-; RV64-NEXT:    csrr t6, vlenb
-; RV64-NEXT:    slli t6, t6, 2
-; RV64-NEXT:    mv s4, t6
-; RV64-NEXT:    slli t6, t6, 1
-; RV64-NEXT:    add s4, s4, t6
-; RV64-NEXT:    slli t6, t6, 1
-; RV64-NEXT:    add t6, t6, s4
-; RV64-NEXT:    add t6, sp, t6
-; RV64-NEXT:    addi t6, t6, 112
-; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    vmul.vv v9, v8, v11
-; RV64-NEXT:    csrr t6, vlenb
-; RV64-NEXT:    mv s4, t6
-; RV64-NEXT:    slli t6, t6, 1
-; RV64-NEXT:    add s4, s4, t6
-; RV64-NEXT:    slli t6, t6, 2
-; RV64-NEXT:    add s4, s4, t6
-; RV64-NEXT:    slli t6, t6, 1
-; RV64-NEXT:    add t6, t6, s4
-; RV64-NEXT:    add t6, sp, t6
-; RV64-NEXT:    addi t6, t6, 112
-; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    vmul.vv v9, v8, v12
-; RV64-NEXT:    csrr t6, vlenb
-; RV64-NEXT:    slli t6, t6, 1
-; RV64-NEXT:    mv s4, t6
-; RV64-NEXT:    slli t6, t6, 2
-; RV64-NEXT:    add s4, s4, t6
-; RV64-NEXT:    slli t6, t6, 1
-; RV64-NEXT:    add t6, t6, s4
-; RV64-NEXT:    add t6, sp, t6
-; RV64-NEXT:    addi t6, t6, 112
-; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    vmul.vv v9, v8, v13
-; RV64-NEXT:    csrr t6, vlenb
-; RV64-NEXT:    mv s4, t6
-; RV64-NEXT:    slli t6, t6, 3
-; RV64-NEXT:    add s4, s4, t6
-; RV64-NEXT:    slli t6, t6, 1
-; RV64-NEXT:    add t6, t6, s4
-; RV64-NEXT:    add t6, sp, t6
-; RV64-NEXT:    addi t6, t6, 112
-; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    vmul.vv v9, v8, v14
-; RV64-NEXT:    csrr t6, vlenb
-; RV64-NEXT:    slli t6, t6, 3
-; RV64-NEXT:    mv s4, t6
-; RV64-NEXT:    slli t6, t6, 1
-; RV64-NEXT:    add t6, t6, s4
-; RV64-NEXT:    add t6, sp, t6
-; RV64-NEXT:    addi t6, t6, 112
-; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    vmul.vv v9, v8, v15
-; RV64-NEXT:    csrr t6, vlenb
-; RV64-NEXT:    mv s4, t6
-; RV64-NEXT:    slli t6, t6, 1
-; RV64-NEXT:    add s4, s4, t6
-; RV64-NEXT:    slli t6, t6, 1
-; RV64-NEXT:    add s4, s4, t6
-; RV64-NEXT:    slli t6, t6, 2
-; RV64-NEXT:    add t6, t6, s4
-; RV64-NEXT:    add t6, sp, t6
-; RV64-NEXT:    addi t6, t6, 112
-; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    vmul.vv v9, v8, v16
-; RV64-NEXT:    csrr t6, vlenb
-; RV64-NEXT:    slli t6, t6, 1
-; RV64-NEXT:    mv s4, t6
-; RV64-NEXT:    slli t6, t6, 1
-; RV64-NEXT:    add s4, s4, t6
-; RV64-NEXT:    slli t6, t6, 2
-; RV64-NEXT:    add t6, t6, s4
-; RV64-NEXT:    add t6, sp, t6
-; RV64-NEXT:    addi t6, t6, 112
-; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    vmul.vv v9, v8, v17
-; RV64-NEXT:    csrr t6, vlenb
-; RV64-NEXT:    mv s4, t6
-; RV64-NEXT:    slli t6, t6, 2
-; RV64-NEXT:    add s4, s4, t6
-; RV64-NEXT:    slli t6, t6, 2
-; RV64-NEXT:    add t6, t6, s4
-; RV64-NEXT:    add t6, sp, t6
-; RV64-NEXT:    addi t6, t6, 112
-; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    vmul.vv v9, v8, v18
-; RV64-NEXT:    csrr t6, vlenb
-; RV64-NEXT:    slli t6, t6, 2
-; RV64-NEXT:    mv s4, t6
-; RV64-NEXT:    slli t6, t6, 2
-; RV64-NEXT:    add t6, t6, s4
-; RV64-NEXT:    add t6, sp, t6
-; RV64-NEXT:    addi t6, t6, 112
-; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    vmul.vv v9, v8, v19
-; RV64-NEXT:    csrr t6, vlenb
-; RV64-NEXT:    mv s4, t6
-; RV64-NEXT:    slli t6, t6, 1
-; RV64-NEXT:    add s4, s4, t6
-; RV64-NEXT:    slli t6, t6, 3
-; RV64-NEXT:    add t6, t6, s4
-; RV64-NEXT:    add t6, sp, t6
-; RV64-NEXT:    addi t6, t6, 112
-; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    vmul.vv v9, v8, v20
-; RV64-NEXT:    csrr t6, vlenb
-; RV64-NEXT:    slli t6, t6, 1
-; RV64-NEXT:    mv s4, t6
-; RV64-NEXT:    slli t6, t6, 3
-; RV64-NEXT:    add t6, t6, s4
-; RV64-NEXT:    add t6, sp, t6
-; RV64-NEXT:    addi t6, t6, 112
-; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    vmul.vv v9, v8, v21
-; RV64-NEXT:    csrr t6, vlenb
-; RV64-NEXT:    slli s4, t6, 4
-; RV64-NEXT:    add t6, s4, t6
-; RV64-NEXT:    add t6, sp, t6
-; RV64-NEXT:    addi t6, t6, 112
-; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    vmul.vv v9, v8, v22
-; RV64-NEXT:    csrr t6, vlenb
-; RV64-NEXT:    slli t6, t6, 4
-; RV64-NEXT:    add t6, sp, t6
-; RV64-NEXT:    addi t6, t6, 112
-; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    vmul.vv v9, v8, v23
-; RV64-NEXT:    csrr t6, vlenb
-; RV64-NEXT:    slli s4, t6, 4
-; RV64-NEXT:    sub t6, s4, t6
-; RV64-NEXT:    add t6, sp, t6
-; RV64-NEXT:    addi t6, t6, 112
-; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    vmul.vv v9, v8, v24
-; RV64-NEXT:    csrr t6, vlenb
-; RV64-NEXT:    slli t6, t6, 1
-; RV64-NEXT:    mv s4, t6
-; RV64-NEXT:    slli t6, t6, 1
-; RV64-NEXT:    add s4, s4, t6
-; RV64-NEXT:    slli t6, t6, 1
-; RV64-NEXT:    add t6, t6, s4
-; RV64-NEXT:    add t6, sp, t6
-; RV64-NEXT:    addi t6, t6, 112
-; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    vmul.vv v9, v8, v25
-; RV64-NEXT:    csrr t6, vlenb
-; RV64-NEXT:    mv s4, t6
-; RV64-NEXT:    slli t6, t6, 2
-; RV64-NEXT:    add s4, s4, t6
-; RV64-NEXT:    slli t6, t6, 1
-; RV64-NEXT:    add t6, t6, s4
-; RV64-NEXT:    add t6, sp, t6
-; RV64-NEXT:    addi t6, t6, 112
-; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    vmul.vv v9, v8, v26
-; RV64-NEXT:    csrr t6, vlenb
-; RV64-NEXT:    slli t6, t6, 2
-; RV64-NEXT:    mv s4, t6
-; RV64-NEXT:    slli t6, t6, 1
-; RV64-NEXT:    add t6, t6, s4
-; RV64-NEXT:    add t6, sp, t6
-; RV64-NEXT:    addi t6, t6, 112
-; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    vmul.vv v9, v8, v27
-; RV64-NEXT:    csrr t6, vlenb
-; RV64-NEXT:    mv s4, t6
-; RV64-NEXT:    slli t6, t6, 1
-; RV64-NEXT:    add s4, s4, t6
-; RV64-NEXT:    slli t6, t6, 2
-; RV64-NEXT:    add t6, t6, s4
-; RV64-NEXT:    add t6, sp, t6
-; RV64-NEXT:    addi t6, t6, 112
-; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    vmul.vv v9, v8, v28
-; RV64-NEXT:    csrr t6, vlenb
-; RV64-NEXT:    slli t6, t6, 1
-; RV64-NEXT:    mv s4, t6
-; RV64-NEXT:    slli t6, t6, 2
-; RV64-NEXT:    add t6, t6, s4
-; RV64-NEXT:    add t6, sp, t6
-; RV64-NEXT:    addi t6, t6, 112
-; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    vmul.vv v9, v8, v29
-; RV64-NEXT:    csrr t6, vlenb
-; RV64-NEXT:    slli s4, t6, 3
-; RV64-NEXT:    add t6, s4, t6
-; RV64-NEXT:    add t6, sp, t6
-; RV64-NEXT:    addi t6, t6, 112
-; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    vmul.vv v9, v8, v30
-; RV64-NEXT:    csrr t6, vlenb
-; RV64-NEXT:    slli t6, t6, 3
-; RV64-NEXT:    add t6, sp, t6
-; RV64-NEXT:    addi t6, t6, 112
-; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    vmul.vv v9, v8, v31
-; RV64-NEXT:    csrr t6, vlenb
-; RV64-NEXT:    slli s4, t6, 3
-; RV64-NEXT:    sub t6, s4, t6
-; RV64-NEXT:    add t6, sp, t6
-; RV64-NEXT:    addi t6, t6, 112
-; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    vmul.vv v9, v8, v7
-; RV64-NEXT:    csrr t6, vlenb
-; RV64-NEXT:    slli t6, t6, 1
-; RV64-NEXT:    mv s4, t6
-; RV64-NEXT:    slli t6, t6, 1
-; RV64-NEXT:    add t6, t6, s4
-; RV64-NEXT:    add t6, sp, t6
-; RV64-NEXT:    addi t6, t6, 112
-; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    vmul.vv v9, v8, v6
-; RV64-NEXT:    csrr t6, vlenb
-; RV64-NEXT:    slli s4, t6, 2
-; RV64-NEXT:    add t6, s4, t6
-; RV64-NEXT:    add t6, sp, t6
-; RV64-NEXT:    addi t6, t6, 112
-; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    vmul.vv v9, v8, v5
-; RV64-NEXT:    csrr t6, vlenb
-; RV64-NEXT:    slli t6, t6, 2
-; RV64-NEXT:    add t6, sp, t6
-; RV64-NEXT:    addi t6, t6, 112
-; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    vmul.vv v9, v8, v0
-; RV64-NEXT:    csrr s4, vlenb
-; RV64-NEXT:    slli t6, s4, 1
-; RV64-NEXT:    add s4, t6, s4
-; RV64-NEXT:    ld t6, 8(sp) # 8-byte Folded Reload
-; RV64-NEXT:    add s4, sp, s4
-; RV64-NEXT:    addi s4, s4, 112
-; RV64-NEXT:    vs1r.v v9, (s4) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    ld s4, 96(sp) # 8-byte Folded Reload
-; RV64-NEXT:    vand.vx v9, v8, s4
-; RV64-NEXT:    vmul.vv v9, v8, v9
-; RV64-NEXT:    csrr s4, vlenb
-; RV64-NEXT:    slli s4, s4, 1
-; RV64-NEXT:    add s4, sp, s4
-; RV64-NEXT:    addi s4, s4, 112
-; RV64-NEXT:    vs1r.v v9, (s4) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    ld s4, 88(sp) # 8-byte Folded Reload
-; RV64-NEXT:    vand.vx v9, v8, s4
-; RV64-NEXT:    vmul.vv v9, v8, v9
-; RV64-NEXT:    csrr s4, vlenb
-; RV64-NEXT:    add s4, sp, s4
-; RV64-NEXT:    addi s4, s4, 112
-; RV64-NEXT:    vs1r.v v9, (s4) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    ld s4, 80(sp) # 8-byte Folded Reload
-; RV64-NEXT:    vand.vx v9, v8, s4
-; RV64-NEXT:    vmul.vv v9, v8, v9
-; RV64-NEXT:    addi s4, sp, 112
-; RV64-NEXT:    vs1r.v v9, (s4) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    ld s4, 72(sp) # 8-byte Folded Reload
-; RV64-NEXT:    vand.vx v9, v8, s4
-; RV64-NEXT:    vmul.vv v3, v8, v9
-; RV64-NEXT:    ld s4, 64(sp) # 8-byte Folded Reload
-; RV64-NEXT:    vand.vx v9, v8, s4
-; RV64-NEXT:    vmul.vv v4, v8, v9
-; RV64-NEXT:    ld s4, 56(sp) # 8-byte Folded Reload
-; RV64-NEXT:    vand.vx v9, v8, s4
-; RV64-NEXT:    vmul.vv v5, v8, v9
-; RV64-NEXT:    ld s4, 48(sp) # 8-byte Folded Reload
-; RV64-NEXT:    vand.vx v9, v8, s4
-; RV64-NEXT:    vmul.vv v6, v8, v9
-; RV64-NEXT:    ld s4, 40(sp) # 8-byte Folded Reload
-; RV64-NEXT:    vand.vx v9, v8, s4
-; RV64-NEXT:    vmul.vv v7, v8, v9
-; RV64-NEXT:    ld s4, 32(sp) # 8-byte Folded Reload
-; RV64-NEXT:    vand.vx v9, v8, s4
-; RV64-NEXT:    vmul.vv v31, v8, v9
-; RV64-NEXT:    ld s4, 24(sp) # 8-byte Folded Reload
-; RV64-NEXT:    vand.vx v9, v8, s4
-; RV64-NEXT:    vmul.vv v30, v8, v9
-; RV64-NEXT:    ld s4, 16(sp) # 8-byte Folded Reload
-; RV64-NEXT:    vand.vx v9, v8, s4
-; RV64-NEXT:    vmul.vv v29, v8, v9
-; RV64-NEXT:    vand.vx v9, v8, s5
-; RV64-NEXT:    vmul.vv v28, v8, v9
-; RV64-NEXT:    vand.vx v9, v8, s6
-; RV64-NEXT:    vmul.vv v27, v8, v9
-; RV64-NEXT:    vand.vx v9, v8, s7
-; RV64-NEXT:    vmul.vv v26, v8, v9
-; RV64-NEXT:    vand.vx v9, v8, s8
-; RV64-NEXT:    vmul.vv v25, v8, v9
-; RV64-NEXT:    vand.vx v9, v8, s9
-; RV64-NEXT:    vmul.vv v24, v8, v9
-; RV64-NEXT:    vand.vx v9, v8, s10
-; RV64-NEXT:    vmul.vv v23, v8, v9
-; RV64-NEXT:    vand.vx v9, v8, s11
-; RV64-NEXT:    vmul.vv v22, v8, v9
-; RV64-NEXT:    vand.vx v9, v8, ra
-; RV64-NEXT:    vmul.vv v21, v8, v9
-; RV64-NEXT:    vand.vx v9, v8, t5
-; RV64-NEXT:    vmul.vv v20, v8, v9
-; RV64-NEXT:    vand.vx v9, v8, t4
-; RV64-NEXT:    vmul.vv v19, v8, v9
-; RV64-NEXT:    vand.vx v9, v8, t3
-; RV64-NEXT:    vmul.vv v18, v8, v9
-; RV64-NEXT:    vand.vx v9, v8, t2
-; RV64-NEXT:    vmul.vv v17, v8, v9
-; RV64-NEXT:    vand.vx v9, v8, t1
-; RV64-NEXT:    vmul.vv v16, v8, v9
-; RV64-NEXT:    vand.vx v9, v8, a7
-; RV64-NEXT:    vmul.vv v15, v8, v9
-; RV64-NEXT:    vand.vx v9, v8, a6
-; RV64-NEXT:    vmul.vv v14, v8, v9
-; RV64-NEXT:    vand.vx v9, v8, a5
-; RV64-NEXT:    vmul.vv v13, v8, v9
-; RV64-NEXT:    vand.vx v9, v8, a4
-; RV64-NEXT:    vmul.vv v12, v8, v9
-; RV64-NEXT:    vand.vx v9, v8, a3
-; RV64-NEXT:    vmul.vv v11, v8, v9
-; RV64-NEXT:    vand.vx v9, v8, a2
-; RV64-NEXT:    vmul.vv v10, v8, v9
-; RV64-NEXT:    vand.vx v9, v8, a1
-; RV64-NEXT:    vmul.vv v9, v8, v9
-; RV64-NEXT:    vand.vx v0, v8, t0
-; RV64-NEXT:    vmul.vv v0, v8, v0
-; RV64-NEXT:    vand.vx v1, v8, a0
-; RV64-NEXT:    vmul.vv v8, v8, v1
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a1, a0, 5
-; RV64-NEXT:    add a0, a1, a0
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl1r.v v1, (a0) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v1, v2, v1
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a1, a0, 5
-; RV64-NEXT:    sub a0, a1, a0
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v1, v1, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v1, v1, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v1, v1, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v1, v1, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v1, v1, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v1, v1, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v1, v1, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v1, v1, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v1, v1, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v1, v1, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v1, v1, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v1, v1, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v1, v1, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v1, v1, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a1, a0, 4
-; RV64-NEXT:    add a0, a1, a0
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v1, v1, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 4
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v1, v1, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a1, a0, 4
-; RV64-NEXT:    sub a0, a1, a0
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v1, v1, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v1, v1, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v1, v1, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v1, v1, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v1, v1, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v1, v1, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a1, a0, 3
-; RV64-NEXT:    add a0, a1, a0
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v1, v1, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v1, v1, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a1, a0, 3
-; RV64-NEXT:    sub a0, a1, a0
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v1, v1, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v1, v1, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a1, a0, 2
-; RV64-NEXT:    add a0, a1, a0
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v1, v1, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v1, v1, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a1, a0, 1
-; RV64-NEXT:    add a0, a1, a0
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v1, v1, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v1, v1, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v1, v1, v2
-; RV64-NEXT:    addi a0, sp, 112
-; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v2, v1, v2
-; RV64-NEXT:    vxor.vv v3, v2, v3
-; RV64-NEXT:    vxor.vv v4, v3, v4
-; RV64-NEXT:    vxor.vv v5, v4, v5
-; RV64-NEXT:    vxor.vv v6, v5, v6
-; RV64-NEXT:    vxor.vv v7, v6, v7
-; RV64-NEXT:    vxor.vv v31, v7, v31
-; RV64-NEXT:    vxor.vv v30, v31, v30
-; RV64-NEXT:    vxor.vv v29, v30, v29
-; RV64-NEXT:    vxor.vv v28, v29, v28
-; RV64-NEXT:    vxor.vv v27, v28, v27
-; RV64-NEXT:    vxor.vv v26, v27, v26
-; RV64-NEXT:    vxor.vv v25, v26, v25
-; RV64-NEXT:    vxor.vv v24, v25, v24
-; RV64-NEXT:    vxor.vv v23, v24, v23
-; RV64-NEXT:    vxor.vv v22, v23, v22
-; RV64-NEXT:    vxor.vv v21, v22, v21
-; RV64-NEXT:    vxor.vv v20, v21, v20
-; RV64-NEXT:    vxor.vv v19, v20, v19
-; RV64-NEXT:    vxor.vv v18, v19, v18
-; RV64-NEXT:    vxor.vv v17, v18, v17
-; RV64-NEXT:    vxor.vv v16, v17, v16
-; RV64-NEXT:    vxor.vv v15, v16, v15
-; RV64-NEXT:    vxor.vv v14, v15, v14
-; RV64-NEXT:    vxor.vv v13, v14, v13
-; RV64-NEXT:    vxor.vv v12, v13, v12
-; RV64-NEXT:    vxor.vv v11, v12, v11
-; RV64-NEXT:    vxor.vv v10, v11, v10
-; RV64-NEXT:    vxor.vv v9, v10, v9
-; RV64-NEXT:    vxor.vv v9, v9, v0
-; RV64-NEXT:    vxor.vv v8, v9, v8
-; RV64-NEXT:    li a0, 56
-; RV64-NEXT:    vsrl.vx v9, v8, a0
-; RV64-NEXT:    li a1, 40
-; RV64-NEXT:    vsrl.vx v10, v8, a1
-; RV64-NEXT:    vsrl.vi v11, v8, 24
-; RV64-NEXT:    vsrl.vi v12, v8, 8
-; RV64-NEXT:    vand.vx v10, v10, s0
-; RV64-NEXT:    vor.vv v9, v10, v9
-; RV64-NEXT:    vand.vx v10, v8, t6
-; RV64-NEXT:    lui a2, 4080
-; RV64-NEXT:    vand.vx v11, v11, a2
-; RV64-NEXT:    vand.vx v12, v12, t6
-; RV64-NEXT:    vor.vv v11, v12, v11
-; RV64-NEXT:    vand.vx v12, v8, a2
-; RV64-NEXT:    vsll.vi v10, v10, 8
-; RV64-NEXT:    vsll.vi v12, v12, 24
-; RV64-NEXT:    vor.vv v10, v12, v10
-; RV64-NEXT:    vsll.vx v12, v8, a0
-; RV64-NEXT:    vand.vx v8, v8, s0
-; RV64-NEXT:    vsll.vx v8, v8, a1
-; RV64-NEXT:    vor.vv v8, v12, v8
-; RV64-NEXT:    vor.vv v9, v11, v9
-; RV64-NEXT:    vor.vv v8, v8, v10
-; RV64-NEXT:    vor.vv v8, v8, v9
-; RV64-NEXT:    vsrl.vi v9, v8, 4
-; RV64-NEXT:    vand.vx v8, v8, s1
-; RV64-NEXT:    vand.vx v9, v9, s1
-; RV64-NEXT:    vsll.vi v8, v8, 4
-; RV64-NEXT:    vor.vv v8, v9, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, s2
-; RV64-NEXT:    vand.vx v9, v9, s2
-; RV64-NEXT:    vsll.vi v8, v8, 2
-; RV64-NEXT:    vor.vv v8, v9, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 1
-; RV64-NEXT:    vand.vx v8, v8, s3
-; RV64-NEXT:    vand.vx v9, v9, s3
-; RV64-NEXT:    vadd.vv v8, v8, v8
-; RV64-NEXT:    vor.vv v8, v9, v8
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 4
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add sp, sp, a0
-; RV64-NEXT:    ld ra, 216(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s0, 208(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s1, 200(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s2, 192(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s3, 184(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s4, 176(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s5, 168(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s6, 160(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s7, 152(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s8, 144(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s9, 136(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s10, 128(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s11, 120(sp) # 8-byte Folded Reload
-; RV64-NEXT:    addi sp, sp, 224
-; RV64-NEXT:    ret
-  %a = call <1 x i64> @llvm.clmulr.v1i64(<1 x i64> %x, <1 x i64> %y)
-  ret <1 x i64> %a
-}
-
-define <2 x i64> @clmulr_v2i64(<2 x i64> %x, <2 x i64> %y) nounwind {
-; RV32-LABEL: clmulr_v2i64:
-; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -352
-; RV32-NEXT:    sw ra, 348(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s0, 344(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s1, 340(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s2, 336(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s3, 332(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s4, 328(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s5, 324(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s6, 320(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s7, 316(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s8, 312(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s9, 308(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s10, 304(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s11, 300(sp) # 4-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    sub sp, sp, a0
-; RV32-NEXT:    lui s7, 1044480
-; RV32-NEXT:    lui a7, 524288
-; RV32-NEXT:    li s11, 1
-; RV32-NEXT:    li s8, 2
-; RV32-NEXT:    li s9, 4
-; RV32-NEXT:    li s10, 8
-; RV32-NEXT:    li a3, 16
-; RV32-NEXT:    li a4, 32
-; RV32-NEXT:    li a5, 64
-; RV32-NEXT:    li a6, 128
-; RV32-NEXT:    li ra, 256
-; RV32-NEXT:    li a0, 512
-; RV32-NEXT:    li a1, 1024
-; RV32-NEXT:    lui a2, 1
-; RV32-NEXT:    lui t0, 2
-; RV32-NEXT:    lui t1, 4
-; RV32-NEXT:    lui t2, 8
-; RV32-NEXT:    lui t3, 16
-; RV32-NEXT:    lui t4, 32
-; RV32-NEXT:    lui t5, 64
-; RV32-NEXT:    lui t6, 128
-; RV32-NEXT:    lui s0, 256
-; RV32-NEXT:    lui s1, 512
-; RV32-NEXT:    lui s2, 1024
-; RV32-NEXT:    lui s3, 2048
-; RV32-NEXT:    lui s4, 4096
-; RV32-NEXT:    lui s5, 8192
-; RV32-NEXT:    lui s6, 16384
-; RV32-NEXT:    sw s7, 272(sp)
-; RV32-NEXT:    lui s7, 32768
-; RV32-NEXT:    sw zero, 276(sp)
-; RV32-NEXT:    sw a7, 264(sp)
-; RV32-NEXT:    sw zero, 268(sp)
-; RV32-NEXT:    sw zero, 256(sp)
-; RV32-NEXT:    sw s11, 260(sp)
-; RV32-NEXT:    sw zero, 248(sp)
-; RV32-NEXT:    sw s8, 252(sp)
-; RV32-NEXT:    lui s8, 65536
-; RV32-NEXT:    sw zero, 240(sp)
-; RV32-NEXT:    sw s9, 244(sp)
-; RV32-NEXT:    lui s9, 131072
-; RV32-NEXT:    sw zero, 232(sp)
-; RV32-NEXT:    sw s10, 236(sp)
-; RV32-NEXT:    lui s10, 262144
-; RV32-NEXT:    sw zero, 224(sp)
-; RV32-NEXT:    sw a3, 228(sp)
-; RV32-NEXT:    sw zero, 216(sp)
-; RV32-NEXT:    sw a4, 220(sp)
-; RV32-NEXT:    sw zero, 208(sp)
-; RV32-NEXT:    sw a5, 212(sp)
-; RV32-NEXT:    sw zero, 200(sp)
-; RV32-NEXT:    sw a6, 204(sp)
-; RV32-NEXT:    sw zero, 192(sp)
-; RV32-NEXT:    sw ra, 196(sp)
-; RV32-NEXT:    sw zero, 184(sp)
-; RV32-NEXT:    sw a0, 188(sp)
-; RV32-NEXT:    sw zero, 176(sp)
-; RV32-NEXT:    sw a1, 180(sp)
-; RV32-NEXT:    slli s11, s11, 11
-; RV32-NEXT:    sw zero, 168(sp)
-; RV32-NEXT:    sw s11, 172(sp)
-; RV32-NEXT:    sw zero, 160(sp)
-; RV32-NEXT:    sw a2, 164(sp)
-; RV32-NEXT:    sw zero, 152(sp)
-; RV32-NEXT:    sw t0, 156(sp)
-; RV32-NEXT:    sw zero, 144(sp)
-; RV32-NEXT:    sw t1, 148(sp)
-; RV32-NEXT:    sw zero, 136(sp)
-; RV32-NEXT:    sw t2, 140(sp)
-; RV32-NEXT:    sw zero, 128(sp)
-; RV32-NEXT:    sw t3, 132(sp)
-; RV32-NEXT:    sw zero, 120(sp)
-; RV32-NEXT:    sw t4, 124(sp)
-; RV32-NEXT:    sw zero, 112(sp)
-; RV32-NEXT:    sw t5, 116(sp)
-; RV32-NEXT:    sw zero, 104(sp)
-; RV32-NEXT:    sw t6, 108(sp)
-; RV32-NEXT:    sw zero, 96(sp)
-; RV32-NEXT:    sw s0, 100(sp)
-; RV32-NEXT:    sw zero, 88(sp)
-; RV32-NEXT:    sw s1, 92(sp)
-; RV32-NEXT:    sw zero, 80(sp)
-; RV32-NEXT:    sw s2, 84(sp)
-; RV32-NEXT:    sw zero, 72(sp)
-; RV32-NEXT:    sw s3, 76(sp)
-; RV32-NEXT:    sw zero, 64(sp)
-; RV32-NEXT:    sw s4, 68(sp)
-; RV32-NEXT:    sw zero, 56(sp)
-; RV32-NEXT:    sw s5, 60(sp)
-; RV32-NEXT:    sw zero, 48(sp)
-; RV32-NEXT:    sw s6, 52(sp)
-; RV32-NEXT:    sw zero, 40(sp)
-; RV32-NEXT:    sw s7, 44(sp)
-; RV32-NEXT:    sw zero, 32(sp)
-; RV32-NEXT:    sw s8, 36(sp)
-; RV32-NEXT:    sw zero, 24(sp)
-; RV32-NEXT:    sw s9, 28(sp)
-; RV32-NEXT:    sw zero, 16(sp)
-; RV32-NEXT:    sw s10, 20(sp)
-; RV32-NEXT:    sw zero, 8(sp)
-; RV32-NEXT:    sw a7, 12(sp)
-; RV32-NEXT:    lui a0, 61681
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT:    vmv.v.x v3, a0
-; RV32-NEXT:    lui a0, 209715
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vmv.v.x v2, a0
-; RV32-NEXT:    lui a0, 349525
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vmv.v.x v1, a0
-; RV32-NEXT:    addi a0, sp, 272
-; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT:    vlse64.v v0, (a0), zero
-; RV32-NEXT:    addi a0, sp, 264
-; RV32-NEXT:    vlse64.v v13, (a0), zero
-; RV32-NEXT:    addi a0, sp, 256
-; RV32-NEXT:    vlse64.v v14, (a0), zero
-; RV32-NEXT:    addi a0, sp, 248
-; RV32-NEXT:    vlse64.v v15, (a0), zero
-; RV32-NEXT:    addi a0, sp, 240
-; RV32-NEXT:    vlse64.v v16, (a0), zero
-; RV32-NEXT:    addi a0, sp, 232
-; RV32-NEXT:    vlse64.v v17, (a0), zero
-; RV32-NEXT:    addi a0, sp, 224
-; RV32-NEXT:    vlse64.v v18, (a0), zero
-; RV32-NEXT:    addi a0, sp, 216
-; RV32-NEXT:    vlse64.v v19, (a0), zero
-; RV32-NEXT:    addi a0, sp, 208
-; RV32-NEXT:    vlse64.v v20, (a0), zero
-; RV32-NEXT:    addi a0, sp, 200
-; RV32-NEXT:    vlse64.v v21, (a0), zero
-; RV32-NEXT:    addi a0, sp, 192
-; RV32-NEXT:    vlse64.v v22, (a0), zero
-; RV32-NEXT:    addi a0, sp, 184
-; RV32-NEXT:    vlse64.v v23, (a0), zero
-; RV32-NEXT:    addi a0, sp, 176
-; RV32-NEXT:    vlse64.v v24, (a0), zero
-; RV32-NEXT:    addi a0, sp, 168
-; RV32-NEXT:    vlse64.v v25, (a0), zero
-; RV32-NEXT:    addi a0, sp, 160
-; RV32-NEXT:    vlse64.v v26, (a0), zero
-; RV32-NEXT:    addi a0, sp, 152
-; RV32-NEXT:    vlse64.v v9, (a0), zero
-; RV32-NEXT:    addi a0, sp, 144
-; RV32-NEXT:    vlse64.v v10, (a0), zero
-; RV32-NEXT:    addi a0, sp, 136
-; RV32-NEXT:    vlse64.v v29, (a0), zero
-; RV32-NEXT:    addi a0, sp, 128
-; RV32-NEXT:    vlse64.v v30, (a0), zero
-; RV32-NEXT:    addi a0, sp, 120
-; RV32-NEXT:    vlse64.v v31, (a0), zero
-; RV32-NEXT:    addi a0, sp, 112
-; RV32-NEXT:    vlse64.v v11, (a0), zero
-; RV32-NEXT:    addi a0, sp, 104
-; RV32-NEXT:    vlse64.v v12, (a0), zero
-; RV32-NEXT:    addi a0, sp, 96
-; RV32-NEXT:    vlse64.v v5, (a0), zero
-; RV32-NEXT:    addi a0, sp, 88
-; RV32-NEXT:    vlse64.v v4, (a0), zero
-; RV32-NEXT:    li a6, 56
-; RV32-NEXT:    vsrl.vi v27, v8, 24
-; RV32-NEXT:    vsrl.vx v28, v8, a6
-; RV32-NEXT:    li ra, 40
-; RV32-NEXT:    vsrl.vx v7, v8, ra
-; RV32-NEXT:    vsll.vx v6, v8, a6
-; RV32-NEXT:    addi a4, t3, -256
-; RV32-NEXT:    vand.vx v7, v7, a4
-; RV32-NEXT:    vor.vv v28, v7, v28
-; RV32-NEXT:    vand.vx v7, v8, a4
-; RV32-NEXT:    vsll.vx v7, v7, ra
-; RV32-NEXT:    vor.vv v7, v6, v7
-; RV32-NEXT:    vsrl.vi v6, v8, 8
-; RV32-NEXT:    lui a5, 4080
-; RV32-NEXT:    vand.vx v27, v27, a5
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v0, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vv v6, v6, v0
-; RV32-NEXT:    vor.vv v27, v6, v27
-; RV32-NEXT:    addi a3, sp, 80
-; RV32-NEXT:    vlse64.v v6, (a3), zero
-; RV32-NEXT:    vor.vv v27, v27, v28
-; RV32-NEXT:    vand.vx v28, v8, a5
-; RV32-NEXT:    vsll.vi v28, v28, 24
-; RV32-NEXT:    vand.vv v8, v8, v0
-; RV32-NEXT:    vsll.vi v8, v8, 8
-; RV32-NEXT:    vor.vv v8, v28, v8
-; RV32-NEXT:    addi a3, sp, 72
-; RV32-NEXT:    vlse64.v v28, (a3), zero
-; RV32-NEXT:    vor.vv v8, v7, v8
-; RV32-NEXT:    addi a3, sp, 64
-; RV32-NEXT:    vlse64.v v7, (a3), zero
-; RV32-NEXT:    vor.vv v8, v8, v27
-; RV32-NEXT:    vsrl.vi v27, v8, 4
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v3, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vv v8, v8, v3
-; RV32-NEXT:    vand.vv v27, v27, v3
-; RV32-NEXT:    vsll.vi v8, v8, 4
-; RV32-NEXT:    vor.vv v8, v27, v8
-; RV32-NEXT:    vsrl.vi v27, v8, 2
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v2, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vv v8, v8, v2
-; RV32-NEXT:    vand.vv v27, v27, v2
-; RV32-NEXT:    vsll.vi v8, v8, 2
-; RV32-NEXT:    vor.vv v8, v27, v8
-; RV32-NEXT:    vsrl.vi v27, v8, 1
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v1, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vv v8, v8, v1
-; RV32-NEXT:    vand.vv v27, v27, v1
-; RV32-NEXT:    vadd.vv v8, v8, v8
-; RV32-NEXT:    vor.vv v8, v27, v8
-; RV32-NEXT:    addi a3, sp, 56
-; RV32-NEXT:    vlse64.v v27, (a3), zero
-; RV32-NEXT:    vand.vv v13, v8, v13
-; RV32-NEXT:    vand.vv v14, v8, v14
-; RV32-NEXT:    vand.vv v15, v8, v15
-; RV32-NEXT:    vand.vv v16, v8, v16
-; RV32-NEXT:    vand.vv v17, v8, v17
-; RV32-NEXT:    vand.vv v18, v8, v18
-; RV32-NEXT:    vand.vv v19, v8, v19
-; RV32-NEXT:    vand.vv v20, v8, v20
-; RV32-NEXT:    vand.vv v21, v8, v21
-; RV32-NEXT:    vand.vv v22, v8, v22
-; RV32-NEXT:    vand.vv v23, v8, v23
-; RV32-NEXT:    vand.vv v24, v8, v24
-; RV32-NEXT:    vand.vv v25, v8, v25
-; RV32-NEXT:    vand.vv v26, v8, v26
-; RV32-NEXT:    vand.vv v3, v8, v9
-; RV32-NEXT:    vand.vv v2, v8, v10
-; RV32-NEXT:    vand.vv v29, v8, v29
-; RV32-NEXT:    vand.vv v30, v8, v30
-; RV32-NEXT:    vand.vv v31, v8, v31
-; RV32-NEXT:    vand.vv v0, v8, v11
-; RV32-NEXT:    vand.vv v9, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vv v5, v8, v5
-; RV32-NEXT:    vand.vv v4, v8, v4
-; RV32-NEXT:    vand.vv v6, v8, v6
-; RV32-NEXT:    vand.vv v9, v8, v28
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    addi a3, sp, 48
-; RV32-NEXT:    addi a0, sp, 40
-; RV32-NEXT:    vlse64.v v9, (a3), zero
-; RV32-NEXT:    vlse64.v v10, (a0), zero
-; RV32-NEXT:    vand.vv v11, v8, v7
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v11, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vv v11, v8, v27
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v11, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vv v9, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    addi a2, sp, 32
-; RV32-NEXT:    addi a3, sp, 24
-; RV32-NEXT:    addi a1, sp, 16
-; RV32-NEXT:    addi a0, sp, 8
-; RV32-NEXT:    vlse64.v v9, (a2), zero
-; RV32-NEXT:    vlse64.v v10, (a3), zero
-; RV32-NEXT:    vlse64.v v11, (a1), zero
-; RV32-NEXT:    vlse64.v v12, (a0), zero
-; RV32-NEXT:    vand.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a1, a0, 5
-; RV32-NEXT:    add a0, a1, a0
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vv v9, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vv v9, v8, v11
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a1, a0, 5
-; RV32-NEXT:    sub a0, a1, a0
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vv v9, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vi v9, v8, 2
-; RV32-NEXT:    vmul.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vi v9, v8, 1
-; RV32-NEXT:    vmul.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vi v9, v8, 4
-; RV32-NEXT:    vmul.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vi v9, v8, 8
-; RV32-NEXT:    vmul.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    li a0, 16
-; RV32-NEXT:    vand.vx v9, v8, a0
-; RV32-NEXT:    vmul.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    li a0, 32
-; RV32-NEXT:    vand.vx v9, v8, a0
-; RV32-NEXT:    vmul.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    li a0, 64
-; RV32-NEXT:    vand.vx v9, v8, a0
-; RV32-NEXT:    vmul.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    li a0, 128
-; RV32-NEXT:    vand.vx v9, v8, a0
-; RV32-NEXT:    vmul.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    li a0, 256
-; RV32-NEXT:    vand.vx v9, v8, a0
-; RV32-NEXT:    vmul.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    li a0, 512
-; RV32-NEXT:    vand.vx v9, v8, a0
-; RV32-NEXT:    vmul.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    li a0, 1024
-; RV32-NEXT:    vand.vx v9, v8, a0
-; RV32-NEXT:    vmul.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vx v9, v8, s11
-; RV32-NEXT:    vmul.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    vand.vx v9, v8, a0
-; RV32-NEXT:    vmul.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vx v9, v8, t0
-; RV32-NEXT:    vmul.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a1, a0, 4
-; RV32-NEXT:    add a0, a1, a0
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vx v9, v8, t1
-; RV32-NEXT:    vmul.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vx v9, v8, t2
-; RV32-NEXT:    vmul.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a1, a0, 4
-; RV32-NEXT:    sub a0, a1, a0
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vx v9, v8, t3
-; RV32-NEXT:    vmul.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vx v9, v8, t4
-; RV32-NEXT:    vmul.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vx v9, v8, t5
-; RV32-NEXT:    vmul.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vx v9, v8, t6
-; RV32-NEXT:    vmul.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vx v9, v8, s0
-; RV32-NEXT:    vmul.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vx v9, v8, s1
-; RV32-NEXT:    vmul.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a1, a0, 3
-; RV32-NEXT:    add a0, a1, a0
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vx v9, v8, s2
-; RV32-NEXT:    vmul.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vx v9, v8, s3
-; RV32-NEXT:    vmul.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a1, a0, 3
-; RV32-NEXT:    sub a0, a1, a0
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vx v9, v8, s4
-; RV32-NEXT:    vmul.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vx v9, v8, s5
-; RV32-NEXT:    vmul.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a1, a0, 2
-; RV32-NEXT:    add a0, a1, a0
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vx v9, v8, s6
-; RV32-NEXT:    vmul.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vx v9, v8, s7
-; RV32-NEXT:    vmul.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a1, a0, 1
-; RV32-NEXT:    add a0, a1, a0
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vx v9, v8, s8
-; RV32-NEXT:    vmul.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vx v9, v8, s9
-; RV32-NEXT:    vmul.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vand.vx v1, v8, s10
-; RV32-NEXT:    vmul.vv v1, v8, v1
-; RV32-NEXT:    vmul.vv v9, v8, v13
-; RV32-NEXT:    addi a0, sp, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vmul.vv v10, v8, v14
-; RV32-NEXT:    vmul.vv v11, v8, v15
-; RV32-NEXT:    vmul.vv v12, v8, v16
-; RV32-NEXT:    vmul.vv v13, v8, v17
-; RV32-NEXT:    vmul.vv v14, v8, v18
-; RV32-NEXT:    vmul.vv v15, v8, v19
-; RV32-NEXT:    vmul.vv v16, v8, v20
-; RV32-NEXT:    vmul.vv v17, v8, v21
-; RV32-NEXT:    vmul.vv v18, v8, v22
-; RV32-NEXT:    vmul.vv v19, v8, v23
-; RV32-NEXT:    vmul.vv v20, v8, v24
-; RV32-NEXT:    vmul.vv v21, v8, v25
-; RV32-NEXT:    vmul.vv v22, v8, v26
-; RV32-NEXT:    vmul.vv v23, v8, v3
-; RV32-NEXT:    vmul.vv v24, v8, v2
-; RV32-NEXT:    vmul.vv v25, v8, v29
-; RV32-NEXT:    vmul.vv v26, v8, v30
-; RV32-NEXT:    vmul.vv v27, v8, v31
-; RV32-NEXT:    vmul.vv v28, v8, v0
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v29, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vmul.vv v29, v8, v29
-; RV32-NEXT:    vmul.vv v30, v8, v5
-; RV32-NEXT:    vmul.vv v31, v8, v4
-; RV32-NEXT:    vmul.vv v7, v8, v6
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v6, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vmul.vv v6, v8, v6
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v5, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vmul.vv v5, v8, v5
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v4, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vmul.vv v4, v8, v4
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v3, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vmul.vv v3, v8, v3
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vmul.vv v2, v8, v2
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a1, a0, 5
-; RV32-NEXT:    add a0, a1, a0
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vmul.vv v0, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vmul.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a1, a0, 5
-; RV32-NEXT:    sub a0, a1, a0
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vmul.vv v9, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vmul.vv v8, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vi v8, v8, 0
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a1, a0, 4
-; RV32-NEXT:    add a0, a1, a0
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a1, a0, 4
-; RV32-NEXT:    sub a0, a1, a0
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a1, a0, 3
-; RV32-NEXT:    add a0, a1, a0
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a1, a0, 3
-; RV32-NEXT:    sub a0, a1, a0
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a1, a0, 2
-; RV32-NEXT:    add a0, a1, a0
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a1, a0, 1
-; RV32-NEXT:    add a0, a1, a0
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    vxor.vv v8, v8, v1
-; RV32-NEXT:    addi a0, sp, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    vxor.vv v8, v8, v11
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    vxor.vv v8, v8, v13
-; RV32-NEXT:    vxor.vv v8, v8, v14
-; RV32-NEXT:    vxor.vv v8, v8, v15
-; RV32-NEXT:    vxor.vv v8, v8, v16
-; RV32-NEXT:    vxor.vv v8, v8, v17
-; RV32-NEXT:    vxor.vv v8, v8, v18
-; RV32-NEXT:    vxor.vv v8, v8, v19
-; RV32-NEXT:    vxor.vv v8, v8, v20
-; RV32-NEXT:    vxor.vv v8, v8, v21
-; RV32-NEXT:    vxor.vv v8, v8, v22
-; RV32-NEXT:    vxor.vv v8, v8, v23
-; RV32-NEXT:    vxor.vv v8, v8, v24
-; RV32-NEXT:    vxor.vv v8, v8, v25
-; RV32-NEXT:    vxor.vv v8, v8, v26
-; RV32-NEXT:    vxor.vv v8, v8, v27
-; RV32-NEXT:    vxor.vv v8, v8, v28
-; RV32-NEXT:    vxor.vv v8, v8, v29
-; RV32-NEXT:    vxor.vv v8, v8, v30
-; RV32-NEXT:    vxor.vv v8, v8, v31
-; RV32-NEXT:    vxor.vv v8, v8, v7
-; RV32-NEXT:    vxor.vv v8, v8, v6
-; RV32-NEXT:    vxor.vv v8, v8, v5
-; RV32-NEXT:    vxor.vv v8, v8, v4
-; RV32-NEXT:    vxor.vv v8, v8, v3
-; RV32-NEXT:    vxor.vv v8, v8, v2
-; RV32-NEXT:    vxor.vv v8, v8, v0
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    vsrl.vx v9, v8, a6
-; RV32-NEXT:    vsll.vx v10, v8, a6
-; RV32-NEXT:    vsrl.vx v11, v8, ra
-; RV32-NEXT:    vand.vx v12, v8, a4
-; RV32-NEXT:    vand.vx v11, v11, a4
-; RV32-NEXT:    vsrl.vi v13, v8, 24
-; RV32-NEXT:    vand.vx v14, v8, a5
-; RV32-NEXT:    vand.vx v13, v13, a5
-; RV32-NEXT:    vsll.vx v12, v12, ra
-; RV32-NEXT:    vsrl.vi v15, v8, 8
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v16, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vand.vv v8, v8, v16
-; RV32-NEXT:    vand.vv v15, v15, v16
-; RV32-NEXT:    vor.vv v9, v11, v9
-; RV32-NEXT:    vor.vv v11, v15, v13
-; RV32-NEXT:    vsll.vi v8, v8, 8
-; RV32-NEXT:    vsll.vi v13, v14, 24
-; RV32-NEXT:    vor.vv v8, v13, v8
-; RV32-NEXT:    vor.vv v10, v10, v12
-; RV32-NEXT:    vor.vv v9, v11, v9
-; RV32-NEXT:    vor.vv v8, v10, v8
-; RV32-NEXT:    vor.vv v8, v8, v9
-; RV32-NEXT:    vsrl.vi v9, v8, 4
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v10, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vand.vv v8, v8, v10
-; RV32-NEXT:    vand.vv v9, v9, v10
-; RV32-NEXT:    vsll.vi v8, v8, 4
-; RV32-NEXT:    vor.vv v8, v9, v8
-; RV32-NEXT:    vsrl.vi v9, v8, 2
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v10, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vand.vv v8, v8, v10
-; RV32-NEXT:    vand.vv v9, v9, v10
-; RV32-NEXT:    vsll.vi v8, v8, 2
-; RV32-NEXT:    vor.vv v8, v9, v8
-; RV32-NEXT:    vsrl.vi v9, v8, 1
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl1r.v v10, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vand.vv v8, v8, v10
-; RV32-NEXT:    vand.vv v9, v9, v10
-; RV32-NEXT:    vadd.vv v8, v8, v8
-; RV32-NEXT:    vor.vv v8, v9, v8
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add sp, sp, a0
-; RV32-NEXT:    lw ra, 348(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s0, 344(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s1, 340(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s2, 336(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s3, 332(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s4, 328(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s5, 324(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s6, 320(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s7, 316(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s8, 312(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s9, 308(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s10, 304(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s11, 300(sp) # 4-byte Folded Reload
-; RV32-NEXT:    addi sp, sp, 352
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: clmulr_v2i64:
-; RV64:       # %bb.0:
-; RV64-NEXT:    addi sp, sp, -224
-; RV64-NEXT:    sd ra, 216(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s0, 208(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s1, 200(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s2, 192(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s3, 184(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s4, 176(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s5, 168(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s6, 160(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s7, 152(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s8, 144(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s9, 136(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s10, 128(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s11, 120(sp) # 8-byte Folded Spill
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 4
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    sub sp, sp, a0
-; RV64-NEXT:    li s11, 56
-; RV64-NEXT:    li ra, 40
-; RV64-NEXT:    lui a0, 16
-; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; RV64-NEXT:    vsrl.vi v10, v8, 24
-; RV64-NEXT:    vsrl.vi v9, v8, 8
-; RV64-NEXT:    li t2, 255
-; RV64-NEXT:    lui t6, 61681
-; RV64-NEXT:    lui s0, 209715
-; RV64-NEXT:    lui s1, 349525
-; RV64-NEXT:    li s10, 16
-; RV64-NEXT:    li s9, 32
-; RV64-NEXT:    li s8, 64
-; RV64-NEXT:    li s7, 128
-; RV64-NEXT:    li s5, 256
-; RV64-NEXT:    li t5, 512
-; RV64-NEXT:    li t3, 1024
-; RV64-NEXT:    li t0, 1
-; RV64-NEXT:    lui s6, 1
-; RV64-NEXT:    lui s4, 2
-; RV64-NEXT:    lui t4, 4
-; RV64-NEXT:    lui t1, 8
-; RV64-NEXT:    lui a7, 32
-; RV64-NEXT:    lui a6, 64
-; RV64-NEXT:    lui a5, 128
-; RV64-NEXT:    lui a4, 256
-; RV64-NEXT:    lui a3, 512
-; RV64-NEXT:    lui a2, 1024
-; RV64-NEXT:    vsrl.vx v11, v8, s11
-; RV64-NEXT:    vsrl.vx v12, v8, ra
-; RV64-NEXT:    addi t6, t6, -241
-; RV64-NEXT:    addi s2, s0, 819
-; RV64-NEXT:    addi s3, s1, 1365
-; RV64-NEXT:    slli s1, t6, 32
-; RV64-NEXT:    add s1, t6, s1
-; RV64-NEXT:    slli t6, s2, 32
-; RV64-NEXT:    add s2, s2, t6
-; RV64-NEXT:    slli t6, s3, 32
-; RV64-NEXT:    add s3, s3, t6
-; RV64-NEXT:    addi s0, a0, -256
-; RV64-NEXT:    lui a1, 16
-; RV64-NEXT:    lui a0, 4080
-; RV64-NEXT:    vand.vx v10, v10, a0
-; RV64-NEXT:    slli t6, t2, 24
-; RV64-NEXT:    vand.vx v13, v8, a0
-; RV64-NEXT:    vsll.vx v14, v8, s11
-; RV64-NEXT:    vand.vx v12, v12, s0
-; RV64-NEXT:    vand.vx v9, v9, t6
-; RV64-NEXT:    vsll.vi v13, v13, 24
-; RV64-NEXT:    vand.vx v15, v8, t6
-; RV64-NEXT:    vand.vx v8, v8, s0
-; RV64-NEXT:    vor.vv v11, v12, v11
-; RV64-NEXT:    vor.vv v9, v9, v10
-; RV64-NEXT:    vsll.vi v10, v15, 8
-; RV64-NEXT:    vsll.vx v8, v8, ra
-; RV64-NEXT:    vor.vv v9, v9, v11
-; RV64-NEXT:    vor.vv v10, v13, v10
-; RV64-NEXT:    vor.vv v8, v14, v8
-; RV64-NEXT:    vor.vv v8, v8, v10
-; RV64-NEXT:    vor.vv v8, v8, v9
-; RV64-NEXT:    vsrl.vi v9, v8, 4
-; RV64-NEXT:    vand.vx v8, v8, s1
-; RV64-NEXT:    vand.vx v9, v9, s1
-; RV64-NEXT:    vsll.vi v8, v8, 4
-; RV64-NEXT:    vor.vv v8, v9, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, s2
-; RV64-NEXT:    vand.vx v9, v9, s2
-; RV64-NEXT:    vsll.vi v8, v8, 2
-; RV64-NEXT:    vor.vv v8, v9, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 1
-; RV64-NEXT:    vand.vx v8, v8, s3
-; RV64-NEXT:    vand.vx v9, v9, s3
-; RV64-NEXT:    vadd.vv v8, v8, v8
-; RV64-NEXT:    vor.vv v8, v9, v8
-; RV64-NEXT:    vand.vx v9, v8, s10
-; RV64-NEXT:    lui t2, 4096
-; RV64-NEXT:    vand.vx v10, v8, s9
-; RV64-NEXT:    lui s9, 8192
-; RV64-NEXT:    vand.vx v11, v8, s8
-; RV64-NEXT:    lui s8, 16384
-; RV64-NEXT:    vand.vx v12, v8, s7
-; RV64-NEXT:    lui s10, 32768
-; RV64-NEXT:    vand.vx v13, v8, s5
-; RV64-NEXT:    lui s11, 65536
-; RV64-NEXT:    vand.vx v14, v8, t5
-; RV64-NEXT:    lui t5, 131072
-; RV64-NEXT:    vand.vx v15, v8, t3
-; RV64-NEXT:    slli t3, t0, 11
-; RV64-NEXT:    vand.vx v16, v8, t3
-; RV64-NEXT:    lui t3, 262144
-; RV64-NEXT:    vand.vx v17, v8, s6
-; RV64-NEXT:    slli a0, t0, 31
-; RV64-NEXT:    sd a0, 96(sp) # 8-byte Folded Spill
-; RV64-NEXT:    vand.vx v18, v8, s4
-; RV64-NEXT:    slli a0, t0, 32
-; RV64-NEXT:    sd a0, 88(sp) # 8-byte Folded Spill
-; RV64-NEXT:    vand.vx v19, v8, t4
-; RV64-NEXT:    slli a0, t0, 33
-; RV64-NEXT:    sd a0, 80(sp) # 8-byte Folded Spill
-; RV64-NEXT:    vand.vx v20, v8, t1
-; RV64-NEXT:    slli a0, t0, 34
-; RV64-NEXT:    sd a0, 72(sp) # 8-byte Folded Spill
-; RV64-NEXT:    vand.vx v21, v8, a1
-; RV64-NEXT:    slli a0, t0, 35
-; RV64-NEXT:    sd a0, 64(sp) # 8-byte Folded Spill
-; RV64-NEXT:    vand.vx v22, v8, a7
-; RV64-NEXT:    slli a0, t0, 36
-; RV64-NEXT:    sd a0, 56(sp) # 8-byte Folded Spill
-; RV64-NEXT:    vand.vx v23, v8, a6
-; RV64-NEXT:    slli a0, t0, 37
-; RV64-NEXT:    sd a0, 48(sp) # 8-byte Folded Spill
-; RV64-NEXT:    vand.vx v24, v8, a5
-; RV64-NEXT:    slli a0, t0, 38
-; RV64-NEXT:    sd a0, 40(sp) # 8-byte Folded Spill
-; RV64-NEXT:    vand.vx v25, v8, a4
-; RV64-NEXT:    slli a0, t0, 39
-; RV64-NEXT:    sd a0, 32(sp) # 8-byte Folded Spill
-; RV64-NEXT:    vand.vx v26, v8, a3
-; RV64-NEXT:    slli a0, t0, 40
-; RV64-NEXT:    sd a0, 24(sp) # 8-byte Folded Spill
-; RV64-NEXT:    vand.vx v27, v8, a2
-; RV64-NEXT:    slli a0, t0, 41
-; RV64-NEXT:    sd a0, 16(sp) # 8-byte Folded Spill
-; RV64-NEXT:    lui a0, 2048
-; RV64-NEXT:    vand.vx v28, v8, a0
-; RV64-NEXT:    slli s5, t0, 42
-; RV64-NEXT:    vand.vx v29, v8, t2
-; RV64-NEXT:    slli s6, t0, 43
-; RV64-NEXT:    vand.vx v30, v8, s9
-; RV64-NEXT:    slli s7, t0, 44
-; RV64-NEXT:    vand.vx v31, v8, s8
-; RV64-NEXT:    slli s8, t0, 45
-; RV64-NEXT:    vand.vx v7, v8, s10
-; RV64-NEXT:    slli s9, t0, 46
-; RV64-NEXT:    vand.vx v6, v8, s11
-; RV64-NEXT:    slli s10, t0, 47
-; RV64-NEXT:    vand.vx v5, v8, t5
-; RV64-NEXT:    slli s11, t0, 48
-; RV64-NEXT:    vand.vx v0, v8, t3
-; RV64-NEXT:    slli ra, t0, 49
-; RV64-NEXT:    slli t5, t0, 50
-; RV64-NEXT:    slli t4, t0, 51
-; RV64-NEXT:    slli t3, t0, 52
-; RV64-NEXT:    slli t2, t0, 53
-; RV64-NEXT:    slli t1, t0, 54
-; RV64-NEXT:    slli a7, t0, 55
-; RV64-NEXT:    slli a6, t0, 56
-; RV64-NEXT:    slli a5, t0, 57
-; RV64-NEXT:    slli a4, t0, 58
-; RV64-NEXT:    slli a3, t0, 59
-; RV64-NEXT:    slli a2, t0, 60
-; RV64-NEXT:    slli a1, t0, 61
-; RV64-NEXT:    slli t0, t0, 62
-; RV64-NEXT:    li a0, -1
-; RV64-NEXT:    slli a0, a0, 63
-; RV64-NEXT:    vand.vi v4, v8, 2
-; RV64-NEXT:    vand.vi v3, v8, 1
-; RV64-NEXT:    vand.vi v2, v8, 4
-; RV64-NEXT:    vand.vi v1, v8, 8
-; RV64-NEXT:    vmul.vv v4, v8, v4
-; RV64-NEXT:    sd t6, 8(sp) # 8-byte Folded Spill
-; RV64-NEXT:    csrr t6, vlenb
-; RV64-NEXT:    slli s4, t6, 5
-; RV64-NEXT:    add t6, s4, t6
-; RV64-NEXT:    add t6, sp, t6
-; RV64-NEXT:    addi t6, t6, 112
-; RV64-NEXT:    vs1r.v v4, (t6) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    vmul.vv v4, v8, v3
-; RV64-NEXT:    csrr t6, vlenb
-; RV64-NEXT:    slli t6, t6, 5
-; RV64-NEXT:    add t6, sp, t6
-; RV64-NEXT:    addi t6, t6, 112
-; RV64-NEXT:    vs1r.v v4, (t6) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    vmul.vv v4, v8, v2
-; RV64-NEXT:    csrr t6, vlenb
-; RV64-NEXT:    slli s4, t6, 5
-; RV64-NEXT:    sub t6, s4, t6
-; RV64-NEXT:    add t6, sp, t6
-; RV64-NEXT:    addi t6, t6, 112
-; RV64-NEXT:    vs1r.v v4, (t6) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    vmul.vv v4, v8, v1
-; RV64-NEXT:    csrr t6, vlenb
-; RV64-NEXT:    slli t6, t6, 1
-; RV64-NEXT:    mv s4, t6
-; RV64-NEXT:    slli t6, t6, 1
-; RV64-NEXT:    add s4, s4, t6
-; RV64-NEXT:    slli t6, t6, 1
-; RV64-NEXT:    add s4, s4, t6
-; RV64-NEXT:    slli t6, t6, 1
-; RV64-NEXT:    add t6, t6, s4
-; RV64-NEXT:    add t6, sp, t6
-; RV64-NEXT:    addi t6, t6, 112
-; RV64-NEXT:    vs1r.v v4, (t6) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    vmul.vv v9, v8, v9
-; RV64-NEXT:    csrr t6, vlenb
-; RV64-NEXT:    mv s4, t6
-; RV64-NEXT:    slli t6, t6, 2
-; RV64-NEXT:    add s4, s4, t6
-; RV64-NEXT:    slli t6, t6, 1
-; RV64-NEXT:    add s4, s4, t6
-; RV64-NEXT:    slli t6, t6, 1
-; RV64-NEXT:    add t6, t6, s4
-; RV64-NEXT:    add t6, sp, t6
-; RV64-NEXT:    addi t6, t6, 112
-; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    vmul.vv v9, v8, v10
-; RV64-NEXT:    csrr t6, vlenb
-; RV64-NEXT:    slli t6, t6, 2
-; RV64-NEXT:    mv s4, t6
-; RV64-NEXT:    slli t6, t6, 1
-; RV64-NEXT:    add s4, s4, t6
-; RV64-NEXT:    slli t6, t6, 1
-; RV64-NEXT:    add t6, t6, s4
-; RV64-NEXT:    add t6, sp, t6
-; RV64-NEXT:    addi t6, t6, 112
-; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    vmul.vv v9, v8, v11
-; RV64-NEXT:    csrr t6, vlenb
-; RV64-NEXT:    mv s4, t6
-; RV64-NEXT:    slli t6, t6, 1
-; RV64-NEXT:    add s4, s4, t6
-; RV64-NEXT:    slli t6, t6, 2
-; RV64-NEXT:    add s4, s4, t6
-; RV64-NEXT:    slli t6, t6, 1
-; RV64-NEXT:    add t6, t6, s4
-; RV64-NEXT:    add t6, sp, t6
-; RV64-NEXT:    addi t6, t6, 112
-; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    vmul.vv v9, v8, v12
-; RV64-NEXT:    csrr t6, vlenb
-; RV64-NEXT:    slli t6, t6, 1
-; RV64-NEXT:    mv s4, t6
-; RV64-NEXT:    slli t6, t6, 2
-; RV64-NEXT:    add s4, s4, t6
-; RV64-NEXT:    slli t6, t6, 1
-; RV64-NEXT:    add t6, t6, s4
-; RV64-NEXT:    add t6, sp, t6
-; RV64-NEXT:    addi t6, t6, 112
-; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    vmul.vv v9, v8, v13
-; RV64-NEXT:    csrr t6, vlenb
-; RV64-NEXT:    mv s4, t6
-; RV64-NEXT:    slli t6, t6, 3
-; RV64-NEXT:    add s4, s4, t6
-; RV64-NEXT:    slli t6, t6, 1
-; RV64-NEXT:    add t6, t6, s4
-; RV64-NEXT:    add t6, sp, t6
-; RV64-NEXT:    addi t6, t6, 112
-; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    vmul.vv v9, v8, v14
-; RV64-NEXT:    csrr t6, vlenb
-; RV64-NEXT:    slli t6, t6, 3
-; RV64-NEXT:    mv s4, t6
-; RV64-NEXT:    slli t6, t6, 1
-; RV64-NEXT:    add t6, t6, s4
-; RV64-NEXT:    add t6, sp, t6
-; RV64-NEXT:    addi t6, t6, 112
-; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    vmul.vv v9, v8, v15
-; RV64-NEXT:    csrr t6, vlenb
-; RV64-NEXT:    mv s4, t6
-; RV64-NEXT:    slli t6, t6, 1
-; RV64-NEXT:    add s4, s4, t6
-; RV64-NEXT:    slli t6, t6, 1
-; RV64-NEXT:    add s4, s4, t6
-; RV64-NEXT:    slli t6, t6, 2
-; RV64-NEXT:    add t6, t6, s4
-; RV64-NEXT:    add t6, sp, t6
-; RV64-NEXT:    addi t6, t6, 112
-; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    vmul.vv v9, v8, v16
-; RV64-NEXT:    csrr t6, vlenb
-; RV64-NEXT:    slli t6, t6, 1
-; RV64-NEXT:    mv s4, t6
-; RV64-NEXT:    slli t6, t6, 1
-; RV64-NEXT:    add s4, s4, t6
-; RV64-NEXT:    slli t6, t6, 2
-; RV64-NEXT:    add t6, t6, s4
-; RV64-NEXT:    add t6, sp, t6
-; RV64-NEXT:    addi t6, t6, 112
-; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    vmul.vv v9, v8, v17
-; RV64-NEXT:    csrr t6, vlenb
-; RV64-NEXT:    mv s4, t6
-; RV64-NEXT:    slli t6, t6, 2
-; RV64-NEXT:    add s4, s4, t6
-; RV64-NEXT:    slli t6, t6, 2
-; RV64-NEXT:    add t6, t6, s4
-; RV64-NEXT:    add t6, sp, t6
-; RV64-NEXT:    addi t6, t6, 112
-; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    vmul.vv v9, v8, v18
-; RV64-NEXT:    csrr t6, vlenb
-; RV64-NEXT:    slli t6, t6, 2
-; RV64-NEXT:    mv s4, t6
-; RV64-NEXT:    slli t6, t6, 2
-; RV64-NEXT:    add t6, t6, s4
-; RV64-NEXT:    add t6, sp, t6
-; RV64-NEXT:    addi t6, t6, 112
-; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    vmul.vv v9, v8, v19
-; RV64-NEXT:    csrr t6, vlenb
-; RV64-NEXT:    mv s4, t6
-; RV64-NEXT:    slli t6, t6, 1
-; RV64-NEXT:    add s4, s4, t6
-; RV64-NEXT:    slli t6, t6, 3
-; RV64-NEXT:    add t6, t6, s4
-; RV64-NEXT:    add t6, sp, t6
-; RV64-NEXT:    addi t6, t6, 112
-; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    vmul.vv v9, v8, v20
-; RV64-NEXT:    csrr t6, vlenb
-; RV64-NEXT:    slli t6, t6, 1
-; RV64-NEXT:    mv s4, t6
-; RV64-NEXT:    slli t6, t6, 3
-; RV64-NEXT:    add t6, t6, s4
-; RV64-NEXT:    add t6, sp, t6
-; RV64-NEXT:    addi t6, t6, 112
-; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    vmul.vv v9, v8, v21
-; RV64-NEXT:    csrr t6, vlenb
-; RV64-NEXT:    slli s4, t6, 4
-; RV64-NEXT:    add t6, s4, t6
-; RV64-NEXT:    add t6, sp, t6
-; RV64-NEXT:    addi t6, t6, 112
-; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    vmul.vv v9, v8, v22
-; RV64-NEXT:    csrr t6, vlenb
-; RV64-NEXT:    slli t6, t6, 4
-; RV64-NEXT:    add t6, sp, t6
-; RV64-NEXT:    addi t6, t6, 112
-; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    vmul.vv v9, v8, v23
-; RV64-NEXT:    csrr t6, vlenb
-; RV64-NEXT:    slli s4, t6, 4
-; RV64-NEXT:    sub t6, s4, t6
-; RV64-NEXT:    add t6, sp, t6
-; RV64-NEXT:    addi t6, t6, 112
-; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    vmul.vv v9, v8, v24
-; RV64-NEXT:    csrr t6, vlenb
-; RV64-NEXT:    slli t6, t6, 1
-; RV64-NEXT:    mv s4, t6
-; RV64-NEXT:    slli t6, t6, 1
-; RV64-NEXT:    add s4, s4, t6
-; RV64-NEXT:    slli t6, t6, 1
-; RV64-NEXT:    add t6, t6, s4
-; RV64-NEXT:    add t6, sp, t6
-; RV64-NEXT:    addi t6, t6, 112
-; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    vmul.vv v9, v8, v25
-; RV64-NEXT:    csrr t6, vlenb
-; RV64-NEXT:    mv s4, t6
-; RV64-NEXT:    slli t6, t6, 2
-; RV64-NEXT:    add s4, s4, t6
-; RV64-NEXT:    slli t6, t6, 1
-; RV64-NEXT:    add t6, t6, s4
-; RV64-NEXT:    add t6, sp, t6
-; RV64-NEXT:    addi t6, t6, 112
-; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    vmul.vv v9, v8, v26
-; RV64-NEXT:    csrr t6, vlenb
-; RV64-NEXT:    slli t6, t6, 2
-; RV64-NEXT:    mv s4, t6
-; RV64-NEXT:    slli t6, t6, 1
-; RV64-NEXT:    add t6, t6, s4
-; RV64-NEXT:    add t6, sp, t6
-; RV64-NEXT:    addi t6, t6, 112
-; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    vmul.vv v9, v8, v27
-; RV64-NEXT:    csrr t6, vlenb
-; RV64-NEXT:    mv s4, t6
-; RV64-NEXT:    slli t6, t6, 1
-; RV64-NEXT:    add s4, s4, t6
-; RV64-NEXT:    slli t6, t6, 2
-; RV64-NEXT:    add t6, t6, s4
-; RV64-NEXT:    add t6, sp, t6
-; RV64-NEXT:    addi t6, t6, 112
-; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    vmul.vv v9, v8, v28
-; RV64-NEXT:    csrr t6, vlenb
-; RV64-NEXT:    slli t6, t6, 1
-; RV64-NEXT:    mv s4, t6
-; RV64-NEXT:    slli t6, t6, 2
-; RV64-NEXT:    add t6, t6, s4
-; RV64-NEXT:    add t6, sp, t6
-; RV64-NEXT:    addi t6, t6, 112
-; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    vmul.vv v9, v8, v29
-; RV64-NEXT:    csrr t6, vlenb
-; RV64-NEXT:    slli s4, t6, 3
-; RV64-NEXT:    add t6, s4, t6
-; RV64-NEXT:    add t6, sp, t6
-; RV64-NEXT:    addi t6, t6, 112
-; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    vmul.vv v9, v8, v30
-; RV64-NEXT:    csrr t6, vlenb
-; RV64-NEXT:    slli t6, t6, 3
-; RV64-NEXT:    add t6, sp, t6
-; RV64-NEXT:    addi t6, t6, 112
-; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    vmul.vv v9, v8, v31
-; RV64-NEXT:    csrr t6, vlenb
-; RV64-NEXT:    slli s4, t6, 3
-; RV64-NEXT:    sub t6, s4, t6
-; RV64-NEXT:    add t6, sp, t6
-; RV64-NEXT:    addi t6, t6, 112
-; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    vmul.vv v9, v8, v7
-; RV64-NEXT:    csrr t6, vlenb
-; RV64-NEXT:    slli t6, t6, 1
-; RV64-NEXT:    mv s4, t6
-; RV64-NEXT:    slli t6, t6, 1
-; RV64-NEXT:    add t6, t6, s4
-; RV64-NEXT:    add t6, sp, t6
-; RV64-NEXT:    addi t6, t6, 112
-; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    vmul.vv v9, v8, v6
-; RV64-NEXT:    csrr t6, vlenb
-; RV64-NEXT:    slli s4, t6, 2
-; RV64-NEXT:    add t6, s4, t6
-; RV64-NEXT:    add t6, sp, t6
-; RV64-NEXT:    addi t6, t6, 112
-; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    vmul.vv v9, v8, v5
-; RV64-NEXT:    csrr t6, vlenb
-; RV64-NEXT:    slli t6, t6, 2
-; RV64-NEXT:    add t6, sp, t6
-; RV64-NEXT:    addi t6, t6, 112
-; RV64-NEXT:    vs1r.v v9, (t6) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    vmul.vv v9, v8, v0
-; RV64-NEXT:    csrr s4, vlenb
-; RV64-NEXT:    slli t6, s4, 1
-; RV64-NEXT:    add s4, t6, s4
-; RV64-NEXT:    ld t6, 8(sp) # 8-byte Folded Reload
-; RV64-NEXT:    add s4, sp, s4
-; RV64-NEXT:    addi s4, s4, 112
-; RV64-NEXT:    vs1r.v v9, (s4) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    ld s4, 96(sp) # 8-byte Folded Reload
-; RV64-NEXT:    vand.vx v9, v8, s4
-; RV64-NEXT:    vmul.vv v9, v8, v9
-; RV64-NEXT:    csrr s4, vlenb
-; RV64-NEXT:    slli s4, s4, 1
-; RV64-NEXT:    add s4, sp, s4
-; RV64-NEXT:    addi s4, s4, 112
-; RV64-NEXT:    vs1r.v v9, (s4) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    ld s4, 88(sp) # 8-byte Folded Reload
-; RV64-NEXT:    vand.vx v9, v8, s4
-; RV64-NEXT:    vmul.vv v9, v8, v9
-; RV64-NEXT:    csrr s4, vlenb
-; RV64-NEXT:    add s4, sp, s4
-; RV64-NEXT:    addi s4, s4, 112
-; RV64-NEXT:    vs1r.v v9, (s4) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    ld s4, 80(sp) # 8-byte Folded Reload
-; RV64-NEXT:    vand.vx v9, v8, s4
-; RV64-NEXT:    vmul.vv v9, v8, v9
-; RV64-NEXT:    addi s4, sp, 112
-; RV64-NEXT:    vs1r.v v9, (s4) # vscale x 8-byte Folded Spill
-; RV64-NEXT:    ld s4, 72(sp) # 8-byte Folded Reload
-; RV64-NEXT:    vand.vx v9, v8, s4
-; RV64-NEXT:    vmul.vv v3, v8, v9
-; RV64-NEXT:    ld s4, 64(sp) # 8-byte Folded Reload
-; RV64-NEXT:    vand.vx v9, v8, s4
-; RV64-NEXT:    vmul.vv v4, v8, v9
-; RV64-NEXT:    ld s4, 56(sp) # 8-byte Folded Reload
-; RV64-NEXT:    vand.vx v9, v8, s4
-; RV64-NEXT:    vmul.vv v5, v8, v9
-; RV64-NEXT:    ld s4, 48(sp) # 8-byte Folded Reload
-; RV64-NEXT:    vand.vx v9, v8, s4
-; RV64-NEXT:    vmul.vv v6, v8, v9
-; RV64-NEXT:    ld s4, 40(sp) # 8-byte Folded Reload
-; RV64-NEXT:    vand.vx v9, v8, s4
-; RV64-NEXT:    vmul.vv v7, v8, v9
-; RV64-NEXT:    ld s4, 32(sp) # 8-byte Folded Reload
-; RV64-NEXT:    vand.vx v9, v8, s4
-; RV64-NEXT:    vmul.vv v31, v8, v9
-; RV64-NEXT:    ld s4, 24(sp) # 8-byte Folded Reload
-; RV64-NEXT:    vand.vx v9, v8, s4
-; RV64-NEXT:    vmul.vv v30, v8, v9
-; RV64-NEXT:    ld s4, 16(sp) # 8-byte Folded Reload
-; RV64-NEXT:    vand.vx v9, v8, s4
-; RV64-NEXT:    vmul.vv v29, v8, v9
-; RV64-NEXT:    vand.vx v9, v8, s5
-; RV64-NEXT:    vmul.vv v28, v8, v9
-; RV64-NEXT:    vand.vx v9, v8, s6
-; RV64-NEXT:    vmul.vv v27, v8, v9
-; RV64-NEXT:    vand.vx v9, v8, s7
-; RV64-NEXT:    vmul.vv v26, v8, v9
-; RV64-NEXT:    vand.vx v9, v8, s8
-; RV64-NEXT:    vmul.vv v25, v8, v9
-; RV64-NEXT:    vand.vx v9, v8, s9
-; RV64-NEXT:    vmul.vv v24, v8, v9
-; RV64-NEXT:    vand.vx v9, v8, s10
-; RV64-NEXT:    vmul.vv v23, v8, v9
-; RV64-NEXT:    vand.vx v9, v8, s11
-; RV64-NEXT:    vmul.vv v22, v8, v9
-; RV64-NEXT:    vand.vx v9, v8, ra
-; RV64-NEXT:    vmul.vv v21, v8, v9
-; RV64-NEXT:    vand.vx v9, v8, t5
-; RV64-NEXT:    vmul.vv v20, v8, v9
-; RV64-NEXT:    vand.vx v9, v8, t4
-; RV64-NEXT:    vmul.vv v19, v8, v9
-; RV64-NEXT:    vand.vx v9, v8, t3
-; RV64-NEXT:    vmul.vv v18, v8, v9
-; RV64-NEXT:    vand.vx v9, v8, t2
-; RV64-NEXT:    vmul.vv v17, v8, v9
-; RV64-NEXT:    vand.vx v9, v8, t1
-; RV64-NEXT:    vmul.vv v16, v8, v9
-; RV64-NEXT:    vand.vx v9, v8, a7
-; RV64-NEXT:    vmul.vv v15, v8, v9
-; RV64-NEXT:    vand.vx v9, v8, a6
-; RV64-NEXT:    vmul.vv v14, v8, v9
-; RV64-NEXT:    vand.vx v9, v8, a5
-; RV64-NEXT:    vmul.vv v13, v8, v9
-; RV64-NEXT:    vand.vx v9, v8, a4
-; RV64-NEXT:    vmul.vv v12, v8, v9
-; RV64-NEXT:    vand.vx v9, v8, a3
-; RV64-NEXT:    vmul.vv v11, v8, v9
-; RV64-NEXT:    vand.vx v9, v8, a2
-; RV64-NEXT:    vmul.vv v10, v8, v9
-; RV64-NEXT:    vand.vx v9, v8, a1
-; RV64-NEXT:    vmul.vv v9, v8, v9
-; RV64-NEXT:    vand.vx v0, v8, t0
-; RV64-NEXT:    vmul.vv v0, v8, v0
-; RV64-NEXT:    vand.vx v1, v8, a0
-; RV64-NEXT:    vmul.vv v8, v8, v1
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a1, a0, 5
-; RV64-NEXT:    add a0, a1, a0
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl1r.v v1, (a0) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v1, v2, v1
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a1, a0, 5
-; RV64-NEXT:    sub a0, a1, a0
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v1, v1, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v1, v1, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v1, v1, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v1, v1, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v1, v1, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v1, v1, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v1, v1, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v1, v1, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v1, v1, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v1, v1, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v1, v1, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v1, v1, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v1, v1, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v1, v1, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a1, a0, 4
-; RV64-NEXT:    add a0, a1, a0
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v1, v1, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 4
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v1, v1, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a1, a0, 4
-; RV64-NEXT:    sub a0, a1, a0
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v1, v1, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v1, v1, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v1, v1, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v1, v1, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v1, v1, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v1, v1, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a1, a0, 3
-; RV64-NEXT:    add a0, a1, a0
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v1, v1, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v1, v1, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a1, a0, 3
-; RV64-NEXT:    sub a0, a1, a0
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v1, v1, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v1, v1, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a1, a0, 2
-; RV64-NEXT:    add a0, a1, a0
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v1, v1, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v1, v1, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a1, a0, 1
-; RV64-NEXT:    add a0, a1, a0
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v1, v1, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v1, v1, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v1, v1, v2
-; RV64-NEXT:    addi a0, sp, 112
-; RV64-NEXT:    vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
-; RV64-NEXT:    vxor.vv v2, v1, v2
-; RV64-NEXT:    vxor.vv v3, v2, v3
-; RV64-NEXT:    vxor.vv v4, v3, v4
-; RV64-NEXT:    vxor.vv v5, v4, v5
-; RV64-NEXT:    vxor.vv v6, v5, v6
-; RV64-NEXT:    vxor.vv v7, v6, v7
-; RV64-NEXT:    vxor.vv v31, v7, v31
-; RV64-NEXT:    vxor.vv v30, v31, v30
-; RV64-NEXT:    vxor.vv v29, v30, v29
-; RV64-NEXT:    vxor.vv v28, v29, v28
-; RV64-NEXT:    vxor.vv v27, v28, v27
-; RV64-NEXT:    vxor.vv v26, v27, v26
-; RV64-NEXT:    vxor.vv v25, v26, v25
-; RV64-NEXT:    vxor.vv v24, v25, v24
-; RV64-NEXT:    vxor.vv v23, v24, v23
-; RV64-NEXT:    vxor.vv v22, v23, v22
-; RV64-NEXT:    vxor.vv v21, v22, v21
-; RV64-NEXT:    vxor.vv v20, v21, v20
-; RV64-NEXT:    vxor.vv v19, v20, v19
-; RV64-NEXT:    vxor.vv v18, v19, v18
-; RV64-NEXT:    vxor.vv v17, v18, v17
-; RV64-NEXT:    vxor.vv v16, v17, v16
-; RV64-NEXT:    vxor.vv v15, v16, v15
-; RV64-NEXT:    vxor.vv v14, v15, v14
-; RV64-NEXT:    vxor.vv v13, v14, v13
-; RV64-NEXT:    vxor.vv v12, v13, v12
-; RV64-NEXT:    vxor.vv v11, v12, v11
-; RV64-NEXT:    vxor.vv v10, v11, v10
-; RV64-NEXT:    vxor.vv v9, v10, v9
-; RV64-NEXT:    vxor.vv v9, v9, v0
-; RV64-NEXT:    vxor.vv v8, v9, v8
-; RV64-NEXT:    li a0, 56
-; RV64-NEXT:    vsrl.vx v9, v8, a0
-; RV64-NEXT:    li a1, 40
-; RV64-NEXT:    vsrl.vx v10, v8, a1
-; RV64-NEXT:    vsrl.vi v11, v8, 24
-; RV64-NEXT:    vsrl.vi v12, v8, 8
-; RV64-NEXT:    vand.vx v10, v10, s0
-; RV64-NEXT:    vor.vv v9, v10, v9
-; RV64-NEXT:    vand.vx v10, v8, t6
-; RV64-NEXT:    lui a2, 4080
-; RV64-NEXT:    vand.vx v11, v11, a2
-; RV64-NEXT:    vand.vx v12, v12, t6
-; RV64-NEXT:    vor.vv v11, v12, v11
-; RV64-NEXT:    vand.vx v12, v8, a2
-; RV64-NEXT:    vsll.vi v10, v10, 8
-; RV64-NEXT:    vsll.vi v12, v12, 24
-; RV64-NEXT:    vor.vv v10, v12, v10
-; RV64-NEXT:    vsll.vx v12, v8, a0
-; RV64-NEXT:    vand.vx v8, v8, s0
-; RV64-NEXT:    vsll.vx v8, v8, a1
-; RV64-NEXT:    vor.vv v8, v12, v8
-; RV64-NEXT:    vor.vv v9, v11, v9
-; RV64-NEXT:    vor.vv v8, v8, v10
-; RV64-NEXT:    vor.vv v8, v8, v9
-; RV64-NEXT:    vsrl.vi v9, v8, 4
-; RV64-NEXT:    vand.vx v8, v8, s1
-; RV64-NEXT:    vand.vx v9, v9, s1
-; RV64-NEXT:    vsll.vi v8, v8, 4
-; RV64-NEXT:    vor.vv v8, v9, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, s2
-; RV64-NEXT:    vand.vx v9, v9, s2
-; RV64-NEXT:    vsll.vi v8, v8, 2
-; RV64-NEXT:    vor.vv v8, v9, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 1
-; RV64-NEXT:    vand.vx v8, v8, s3
-; RV64-NEXT:    vand.vx v9, v9, s3
-; RV64-NEXT:    vadd.vv v8, v8, v8
-; RV64-NEXT:    vor.vv v8, v9, v8
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 4
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add sp, sp, a0
-; RV64-NEXT:    ld ra, 216(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s0, 208(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s1, 200(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s2, 192(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s3, 184(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s4, 176(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s5, 168(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s6, 160(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s7, 152(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s8, 144(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s9, 136(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s10, 128(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s11, 120(sp) # 8-byte Folded Reload
-; RV64-NEXT:    addi sp, sp, 224
-; RV64-NEXT:    ret
-  %a = call <2 x i64> @llvm.clmulr.v2i64(<2 x i64> %x, <2 x i64> %y)
-  ret <2 x i64> %a
-}
-
-define <4 x i64> @clmulr_v4i64(<4 x i64> %x, <4 x i64> %y) nounwind {
-; RV32-LABEL: clmulr_v4i64:
-; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -352
-; RV32-NEXT:    sw ra, 348(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s0, 344(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s1, 340(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s2, 336(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s3, 332(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s4, 328(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s5, 324(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s6, 320(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s7, 316(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s8, 312(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s9, 308(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s10, 304(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s11, 300(sp) # 4-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    sub sp, sp, a0
-; RV32-NEXT:    lui s7, 1044480
-; RV32-NEXT:    lui a7, 524288
-; RV32-NEXT:    li a1, 1
-; RV32-NEXT:    li s8, 2
-; RV32-NEXT:    li s9, 4
-; RV32-NEXT:    li s10, 8
-; RV32-NEXT:    li a3, 16
-; RV32-NEXT:    li a4, 32
-; RV32-NEXT:    li a5, 64
-; RV32-NEXT:    li a6, 128
-; RV32-NEXT:    li s11, 256
-; RV32-NEXT:    li ra, 512
-; RV32-NEXT:    li a0, 1024
-; RV32-NEXT:    lui a2, 1
-; RV32-NEXT:    lui t0, 2
-; RV32-NEXT:    lui t1, 4
-; RV32-NEXT:    lui t2, 8
-; RV32-NEXT:    lui t3, 16
-; RV32-NEXT:    lui t4, 32
-; RV32-NEXT:    lui t5, 64
-; RV32-NEXT:    lui t6, 128
-; RV32-NEXT:    lui s0, 256
-; RV32-NEXT:    lui s1, 512
-; RV32-NEXT:    lui s2, 1024
-; RV32-NEXT:    lui s3, 2048
-; RV32-NEXT:    lui s4, 4096
-; RV32-NEXT:    lui s5, 8192
-; RV32-NEXT:    lui s6, 16384
-; RV32-NEXT:    sw s7, 272(sp)
-; RV32-NEXT:    lui s7, 32768
-; RV32-NEXT:    sw zero, 276(sp)
-; RV32-NEXT:    sw a7, 264(sp)
-; RV32-NEXT:    sw zero, 268(sp)
-; RV32-NEXT:    sw zero, 256(sp)
-; RV32-NEXT:    sw a1, 260(sp)
-; RV32-NEXT:    sw zero, 248(sp)
-; RV32-NEXT:    sw s8, 252(sp)
-; RV32-NEXT:    lui s8, 65536
-; RV32-NEXT:    sw zero, 240(sp)
-; RV32-NEXT:    sw s9, 244(sp)
-; RV32-NEXT:    lui s9, 131072
-; RV32-NEXT:    sw zero, 232(sp)
-; RV32-NEXT:    sw s10, 236(sp)
-; RV32-NEXT:    lui s10, 262144
-; RV32-NEXT:    sw zero, 224(sp)
-; RV32-NEXT:    sw a3, 228(sp)
-; RV32-NEXT:    sw zero, 216(sp)
-; RV32-NEXT:    sw a4, 220(sp)
-; RV32-NEXT:    sw zero, 208(sp)
-; RV32-NEXT:    sw a5, 212(sp)
-; RV32-NEXT:    sw zero, 200(sp)
-; RV32-NEXT:    sw a6, 204(sp)
-; RV32-NEXT:    sw zero, 192(sp)
-; RV32-NEXT:    sw s11, 196(sp)
-; RV32-NEXT:    sw zero, 184(sp)
-; RV32-NEXT:    sw ra, 188(sp)
-; RV32-NEXT:    sw zero, 176(sp)
-; RV32-NEXT:    sw a0, 180(sp)
-; RV32-NEXT:    slli a5, a1, 11
-; RV32-NEXT:    sw zero, 168(sp)
-; RV32-NEXT:    sw a5, 172(sp)
-; RV32-NEXT:    sw zero, 160(sp)
-; RV32-NEXT:    sw a2, 164(sp)
-; RV32-NEXT:    sw zero, 152(sp)
-; RV32-NEXT:    sw t0, 156(sp)
-; RV32-NEXT:    sw zero, 144(sp)
-; RV32-NEXT:    sw t1, 148(sp)
-; RV32-NEXT:    sw zero, 136(sp)
-; RV32-NEXT:    sw t2, 140(sp)
-; RV32-NEXT:    sw zero, 128(sp)
-; RV32-NEXT:    sw t3, 132(sp)
-; RV32-NEXT:    sw zero, 120(sp)
-; RV32-NEXT:    sw t4, 124(sp)
-; RV32-NEXT:    sw zero, 112(sp)
-; RV32-NEXT:    sw t5, 116(sp)
-; RV32-NEXT:    sw zero, 104(sp)
-; RV32-NEXT:    sw t6, 108(sp)
-; RV32-NEXT:    sw zero, 96(sp)
-; RV32-NEXT:    sw s0, 100(sp)
-; RV32-NEXT:    sw zero, 88(sp)
-; RV32-NEXT:    sw s1, 92(sp)
-; RV32-NEXT:    sw zero, 80(sp)
-; RV32-NEXT:    sw s2, 84(sp)
-; RV32-NEXT:    sw zero, 72(sp)
-; RV32-NEXT:    sw s3, 76(sp)
-; RV32-NEXT:    sw zero, 64(sp)
-; RV32-NEXT:    sw s4, 68(sp)
-; RV32-NEXT:    sw zero, 56(sp)
-; RV32-NEXT:    sw s5, 60(sp)
-; RV32-NEXT:    sw zero, 48(sp)
-; RV32-NEXT:    sw s6, 52(sp)
-; RV32-NEXT:    sw zero, 40(sp)
-; RV32-NEXT:    sw s7, 44(sp)
-; RV32-NEXT:    sw zero, 32(sp)
-; RV32-NEXT:    sw s8, 36(sp)
-; RV32-NEXT:    sw zero, 24(sp)
-; RV32-NEXT:    sw s9, 28(sp)
-; RV32-NEXT:    sw zero, 16(sp)
-; RV32-NEXT:    sw s10, 20(sp)
-; RV32-NEXT:    sw zero, 8(sp)
-; RV32-NEXT:    sw a7, 12(sp)
-; RV32-NEXT:    lui a0, 61681
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT:    vmv.v.x v4, a0
-; RV32-NEXT:    lui a0, 209715
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vmv.v.x v2, a0
-; RV32-NEXT:    lui a0, 349525
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vmv.v.x v0, a0
-; RV32-NEXT:    addi a0, sp, 272
-; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT:    vlse64.v v6, (a0), zero
-; RV32-NEXT:    addi a0, sp, 264
-; RV32-NEXT:    vlse64.v v10, (a0), zero
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    addi a0, sp, 256
-; RV32-NEXT:    vlse64.v v12, (a0), zero
-; RV32-NEXT:    addi a0, sp, 248
-; RV32-NEXT:    vlse64.v v14, (a0), zero
-; RV32-NEXT:    addi a0, sp, 240
-; RV32-NEXT:    vlse64.v v16, (a0), zero
-; RV32-NEXT:    addi a0, sp, 232
-; RV32-NEXT:    vlse64.v v18, (a0), zero
-; RV32-NEXT:    addi a0, sp, 224
-; RV32-NEXT:    vlse64.v v20, (a0), zero
-; RV32-NEXT:    addi a0, sp, 216
-; RV32-NEXT:    vlse64.v v22, (a0), zero
-; RV32-NEXT:    li ra, 56
-; RV32-NEXT:    vsrl.vi v24, v8, 24
-; RV32-NEXT:    vsrl.vx v26, v8, ra
-; RV32-NEXT:    li s11, 40
-; RV32-NEXT:    vsrl.vx v28, v8, s11
-; RV32-NEXT:    vsll.vx v30, v8, ra
-; RV32-NEXT:    addi a4, t3, -256
-; RV32-NEXT:    vand.vx v28, v28, a4
-; RV32-NEXT:    vor.vv v26, v28, v26
-; RV32-NEXT:    vand.vx v28, v8, a4
-; RV32-NEXT:    vsll.vx v28, v28, s11
-; RV32-NEXT:    vor.vv v30, v30, v28
-; RV32-NEXT:    vsrl.vi v28, v8, 8
-; RV32-NEXT:    lui a6, 4080
-; RV32-NEXT:    vand.vx v24, v24, a6
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v6, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vv v28, v28, v6
-; RV32-NEXT:    vor.vv v28, v28, v24
-; RV32-NEXT:    addi a3, sp, 208
-; RV32-NEXT:    vlse64.v v24, (a3), zero
-; RV32-NEXT:    vor.vv v10, v28, v26
-; RV32-NEXT:    vand.vx v26, v8, a6
-; RV32-NEXT:    vsll.vi v26, v26, 24
-; RV32-NEXT:    vand.vv v8, v8, v6
-; RV32-NEXT:    vsll.vi v8, v8, 8
-; RV32-NEXT:    vor.vv v8, v26, v8
-; RV32-NEXT:    addi a3, sp, 200
-; RV32-NEXT:    vlse64.v v28, (a3), zero
-; RV32-NEXT:    vor.vv v8, v30, v8
-; RV32-NEXT:    addi a3, sp, 192
-; RV32-NEXT:    vlse64.v v26, (a3), zero
-; RV32-NEXT:    vor.vv v8, v8, v10
-; RV32-NEXT:    vsrl.vi v30, v8, 4
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v4, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vv v8, v8, v4
-; RV32-NEXT:    vand.vv v30, v30, v4
-; RV32-NEXT:    vsll.vi v8, v8, 4
-; RV32-NEXT:    vor.vv v8, v30, v8
-; RV32-NEXT:    vsrl.vi v30, v8, 2
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v2, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vv v8, v8, v2
-; RV32-NEXT:    vand.vv v30, v30, v2
-; RV32-NEXT:    vsll.vi v8, v8, 2
-; RV32-NEXT:    vor.vv v8, v30, v8
-; RV32-NEXT:    vsrl.vi v30, v8, 1
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v0, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vv v8, v8, v0
-; RV32-NEXT:    vand.vv v30, v30, v0
-; RV32-NEXT:    vadd.vv v8, v8, v8
-; RV32-NEXT:    vor.vv v8, v30, v8
-; RV32-NEXT:    addi a3, sp, 184
-; RV32-NEXT:    vlse64.v v30, (a3), zero
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vand.vv v6, v8, v10
-; RV32-NEXT:    vand.vv v4, v8, v12
-; RV32-NEXT:    vand.vv v2, v8, v14
-; RV32-NEXT:    vand.vv v0, v8, v16
-; RV32-NEXT:    vand.vv v10, v8, v18
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vv v10, v8, v20
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vv v10, v8, v22
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vv v10, v8, v24
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vv v28, v8, v28
-; RV32-NEXT:    addi a3, sp, 176
-; RV32-NEXT:    addi a0, sp, 168
-; RV32-NEXT:    vlse64.v v10, (a3), zero
-; RV32-NEXT:    vlse64.v v12, (a0), zero
-; RV32-NEXT:    vand.vv v14, v8, v26
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v14, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vv v14, v8, v30
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v14, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vv v10, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    addi a2, sp, 160
-; RV32-NEXT:    addi a3, sp, 152
-; RV32-NEXT:    addi a1, sp, 144
-; RV32-NEXT:    addi a0, sp, 136
-; RV32-NEXT:    vlse64.v v10, (a2), zero
-; RV32-NEXT:    vlse64.v v12, (a3), zero
-; RV32-NEXT:    vlse64.v v14, (a1), zero
-; RV32-NEXT:    vlse64.v v16, (a0), zero
-; RV32-NEXT:    vand.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vv v10, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vv v10, v8, v14
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vv v10, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    addi a0, sp, 128
-; RV32-NEXT:    addi a1, sp, 120
-; RV32-NEXT:    addi a2, sp, 112
-; RV32-NEXT:    addi a3, sp, 104
-; RV32-NEXT:    vlse64.v v10, (a0), zero
-; RV32-NEXT:    vlse64.v v12, (a1), zero
-; RV32-NEXT:    vlse64.v v14, (a2), zero
-; RV32-NEXT:    vlse64.v v16, (a3), zero
-; RV32-NEXT:    vand.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vv v10, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vv v10, v8, v14
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vv v10, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    addi a0, sp, 96
-; RV32-NEXT:    addi a1, sp, 88
-; RV32-NEXT:    addi a2, sp, 80
-; RV32-NEXT:    addi a3, sp, 72
-; RV32-NEXT:    vlse64.v v10, (a0), zero
-; RV32-NEXT:    vlse64.v v12, (a1), zero
-; RV32-NEXT:    vlse64.v v14, (a2), zero
-; RV32-NEXT:    vlse64.v v16, (a3), zero
-; RV32-NEXT:    vand.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vv v10, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vv v10, v8, v14
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vv v10, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    addi a0, sp, 64
-; RV32-NEXT:    addi a1, sp, 56
-; RV32-NEXT:    addi a2, sp, 48
-; RV32-NEXT:    addi a3, sp, 40
-; RV32-NEXT:    vlse64.v v10, (a0), zero
-; RV32-NEXT:    vlse64.v v12, (a1), zero
-; RV32-NEXT:    vlse64.v v14, (a2), zero
-; RV32-NEXT:    vlse64.v v16, (a3), zero
-; RV32-NEXT:    vand.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vv v10, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vv v10, v8, v14
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vv v10, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    addi a0, sp, 32
-; RV32-NEXT:    addi a1, sp, 24
-; RV32-NEXT:    addi a2, sp, 16
-; RV32-NEXT:    addi a3, sp, 8
-; RV32-NEXT:    vlse64.v v10, (a0), zero
-; RV32-NEXT:    vlse64.v v12, (a1), zero
-; RV32-NEXT:    vlse64.v v14, (a2), zero
-; RV32-NEXT:    vlse64.v v16, (a3), zero
-; RV32-NEXT:    vand.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vv v10, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 6
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vv v10, v8, v14
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vv v10, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vi v10, v8, 2
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vi v10, v8, 1
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vi v10, v8, 4
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vi v10, v8, 8
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    li a0, 16
-; RV32-NEXT:    vand.vx v10, v8, a0
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    li a0, 32
-; RV32-NEXT:    vand.vx v10, v8, a0
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    li a0, 64
-; RV32-NEXT:    vand.vx v10, v8, a0
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    li a0, 128
-; RV32-NEXT:    vand.vx v10, v8, a0
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    li a0, 256
-; RV32-NEXT:    vand.vx v10, v8, a0
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    li a0, 512
-; RV32-NEXT:    vand.vx v10, v8, a0
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    li a0, 1024
-; RV32-NEXT:    vand.vx v10, v8, a0
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vx v10, v8, a5
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    vand.vx v10, v8, a0
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vx v10, v8, t0
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vx v10, v8, t1
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vx v10, v8, t2
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vx v10, v8, t3
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vx v10, v8, t4
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vx v10, v8, t5
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vx v10, v8, t6
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vx v10, v8, s0
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vx v10, v8, s1
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vx v10, v8, s2
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vx v10, v8, s3
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vx v10, v8, s4
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vx v10, v8, s5
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vx v10, v8, s6
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vx v10, v8, s7
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vx v10, v8, s8
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vx v10, v8, s9
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vand.vx v10, v8, s10
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    addi a0, sp, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vmul.vv v12, v8, v6
-; RV32-NEXT:    vmul.vv v14, v8, v4
-; RV32-NEXT:    vmul.vv v16, v8, v2
-; RV32-NEXT:    vmul.vv v18, v8, v0
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v20, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vmul.vv v20, v8, v20
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v22, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vmul.vv v22, v8, v22
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v24, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vmul.vv v24, v8, v24
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v26, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vmul.vv v26, v8, v26
-; RV32-NEXT:    vmul.vv v28, v8, v28
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v30, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vmul.vv v30, v8, v30
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v6, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vmul.vv v6, v8, v6
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v4, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vmul.vv v4, v8, v4
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vmul.vv v2, v8, v2
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v0, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vmul.vv v0, v8, v0
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 6
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vmul.vv v10, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vmul.vv v8, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs2r.v v8, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v8, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vi v8, v8, 0
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    addi a0, sp, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    vxor.vv v8, v8, v14
-; RV32-NEXT:    vxor.vv v8, v8, v16
-; RV32-NEXT:    vxor.vv v8, v8, v18
-; RV32-NEXT:    vxor.vv v8, v8, v20
-; RV32-NEXT:    vxor.vv v8, v8, v22
-; RV32-NEXT:    vxor.vv v8, v8, v24
-; RV32-NEXT:    vxor.vv v8, v8, v26
-; RV32-NEXT:    vxor.vv v8, v8, v28
-; RV32-NEXT:    vxor.vv v8, v8, v30
-; RV32-NEXT:    vxor.vv v8, v8, v6
-; RV32-NEXT:    vxor.vv v8, v8, v4
-; RV32-NEXT:    vxor.vv v8, v8, v2
-; RV32-NEXT:    vxor.vv v8, v8, v0
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v10
-; RV32-NEXT:    vsrl.vx v10, v8, ra
-; RV32-NEXT:    vsll.vx v12, v8, ra
-; RV32-NEXT:    vsrl.vx v14, v8, s11
-; RV32-NEXT:    vand.vx v16, v8, a4
-; RV32-NEXT:    vand.vx v14, v14, a4
-; RV32-NEXT:    vsrl.vi v18, v8, 24
-; RV32-NEXT:    vand.vx v20, v8, a6
-; RV32-NEXT:    vand.vx v18, v18, a6
-; RV32-NEXT:    vsll.vx v16, v16, s11
-; RV32-NEXT:    vsrl.vi v22, v8, 8
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v24, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vand.vv v8, v8, v24
-; RV32-NEXT:    vand.vv v22, v22, v24
-; RV32-NEXT:    vor.vv v10, v14, v10
-; RV32-NEXT:    vor.vv v14, v22, v18
-; RV32-NEXT:    vsll.vi v8, v8, 8
-; RV32-NEXT:    vsll.vi v18, v20, 24
-; RV32-NEXT:    vor.vv v8, v18, v8
-; RV32-NEXT:    vor.vv v12, v12, v16
-; RV32-NEXT:    vor.vv v10, v14, v10
-; RV32-NEXT:    vor.vv v8, v12, v8
-; RV32-NEXT:    vor.vv v8, v8, v10
-; RV32-NEXT:    vsrl.vi v10, v8, 4
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v12, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vand.vv v8, v8, v12
-; RV32-NEXT:    vand.vv v10, v10, v12
-; RV32-NEXT:    vsll.vi v8, v8, 4
-; RV32-NEXT:    vor.vv v8, v10, v8
-; RV32-NEXT:    vsrl.vi v10, v8, 2
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v12, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vand.vv v8, v8, v12
-; RV32-NEXT:    vand.vv v10, v10, v12
-; RV32-NEXT:    vsll.vi v8, v8, 2
-; RV32-NEXT:    vor.vv v8, v10, v8
-; RV32-NEXT:    vsrl.vi v10, v8, 1
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl2r.v v12, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vand.vv v8, v8, v12
-; RV32-NEXT:    vand.vv v10, v10, v12
-; RV32-NEXT:    vadd.vv v8, v8, v8
-; RV32-NEXT:    vor.vv v8, v10, v8
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add sp, sp, a0
-; RV32-NEXT:    lw ra, 348(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s0, 344(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s1, 340(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s2, 336(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s3, 332(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s4, 328(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s5, 324(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s6, 320(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s7, 316(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s8, 312(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s9, 308(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s10, 304(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s11, 300(sp) # 4-byte Folded Reload
-; RV32-NEXT:    addi sp, sp, 352
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: clmulr_v4i64:
-; RV64:       # %bb.0:
-; RV64-NEXT:    addi sp, sp, -224
-; RV64-NEXT:    sd ra, 216(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s0, 208(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s1, 200(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s2, 192(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s3, 184(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s4, 176(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s5, 168(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s6, 160(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s7, 152(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s8, 144(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s9, 136(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s10, 128(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s11, 120(sp) # 8-byte Folded Spill
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    sub sp, sp, a0
-; RV64-NEXT:    li a7, 56
-; RV64-NEXT:    li s1, 40
-; RV64-NEXT:    lui s3, 16
-; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV64-NEXT:    vsrl.vi v14, v8, 24
-; RV64-NEXT:    lui t6, 4080
-; RV64-NEXT:    vsrl.vi v10, v8, 8
-; RV64-NEXT:    li s2, 255
-; RV64-NEXT:    lui a5, 61681
-; RV64-NEXT:    lui a6, 209715
-; RV64-NEXT:    lui t5, 349525
-; RV64-NEXT:    li t4, 16
-; RV64-NEXT:    li t3, 32
-; RV64-NEXT:    li t2, 64
-; RV64-NEXT:    li t1, 128
-; RV64-NEXT:    li t0, 256
-; RV64-NEXT:    li a4, 512
-; RV64-NEXT:    li a3, 1024
-; RV64-NEXT:    li s0, 1
-; RV64-NEXT:    lui a2, 1
-; RV64-NEXT:    lui a1, 2
-; RV64-NEXT:    lui a0, 4
-; RV64-NEXT:    vsrl.vx v12, v8, a7
-; RV64-NEXT:    vsrl.vx v18, v8, s1
-; RV64-NEXT:    addi s4, s3, -256
-; RV64-NEXT:    vand.vx v16, v14, t6
-; RV64-NEXT:    slli s2, s2, 24
-; RV64-NEXT:    vand.vx v20, v8, t6
-; RV64-NEXT:    vsll.vx v14, v8, a7
-; RV64-NEXT:    addi a7, a5, -241
-; RV64-NEXT:    addi a6, a6, 819
-; RV64-NEXT:    addi a5, t5, 1365
-; RV64-NEXT:    slli t5, s0, 11
-; RV64-NEXT:    slli t6, s0, 31
-; RV64-NEXT:    sd t6, 96(sp) # 8-byte Folded Spill
-; RV64-NEXT:    slli t6, s0, 32
-; RV64-NEXT:    sd t6, 88(sp) # 8-byte Folded Spill
-; RV64-NEXT:    slli t6, s0, 33
-; RV64-NEXT:    sd t6, 80(sp) # 8-byte Folded Spill
-; RV64-NEXT:    slli t6, s0, 34
-; RV64-NEXT:    sd t6, 72(sp) # 8-byte Folded Spill
-; RV64-NEXT:    slli t6, s0, 35
-; RV64-NEXT:    sd t6, 64(sp) # 8-byte Folded Spill
-; RV64-NEXT:    slli t6, s0, 36
-; RV64-NEXT:    sd t6, 56(sp) # 8-byte Folded Spill
-; RV64-NEXT:    slli t6, a7, 32
-; RV64-NEXT:    add a7, a7, t6
-; RV64-NEXT:    slli t6, a6, 32
-; RV64-NEXT:    add a6, a6, t6
-; RV64-NEXT:    slli t6, a5, 32
-; RV64-NEXT:    add a5, a5, t6
-; RV64-NEXT:    slli t6, s0, 37
-; RV64-NEXT:    sd t6, 48(sp) # 8-byte Folded Spill
-; RV64-NEXT:    vand.vx v18, v18, s4
-; RV64-NEXT:    vand.vx v10, v10, s2
-; RV64-NEXT:    vsll.vi v20, v20, 24
-; RV64-NEXT:    vand.vx v22, v8, s2
-; RV64-NEXT:    vand.vx v8, v8, s4
-; RV64-NEXT:    vor.vv v12, v18, v12
-; RV64-NEXT:    vor.vv v10, v10, v16
-; RV64-NEXT:    vsll.vi v16, v22, 8
-; RV64-NEXT:    vsll.vx v8, v8, s1
-; RV64-NEXT:    vor.vv v10, v10, v12
-; RV64-NEXT:    vor.vv v12, v20, v16
-; RV64-NEXT:    vor.vv v8, v14, v8
-; RV64-NEXT:    vor.vv v8, v8, v12
-; RV64-NEXT:    vor.vv v8, v8, v10
-; RV64-NEXT:    vsrl.vi v10, v8, 4
-; RV64-NEXT:    vand.vx v8, v8, a7
-; RV64-NEXT:    vand.vx v10, v10, a7
-; RV64-NEXT:    vsll.vi v8, v8, 4
-; RV64-NEXT:    vor.vv v8, v10, v8
-; RV64-NEXT:    vsrl.vi v10, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a6
-; RV64-NEXT:    vand.vx v10, v10, a6
-; RV64-NEXT:    vsll.vi v8, v8, 2
-; RV64-NEXT:    vor.vv v8, v10, v8
-; RV64-NEXT:    vsrl.vi v10, v8, 1
-; RV64-NEXT:    vand.vx v8, v8, a5
-; RV64-NEXT:    vand.vx v10, v10, a5
-; RV64-NEXT:    vadd.vv v8, v8, v8
-; RV64-NEXT:    vor.vv v8, v10, v8
-; RV64-NEXT:    vand.vx v10, v8, t4
-; RV64-NEXT:    slli t4, s0, 38
-; RV64-NEXT:    sd t4, 40(sp) # 8-byte Folded Spill
-; RV64-NEXT:    vand.vx v12, v8, t3
-; RV64-NEXT:    slli t3, s0, 39
-; RV64-NEXT:    sd t3, 32(sp) # 8-byte Folded Spill
-; RV64-NEXT:    vand.vx v14, v8, t2
-; RV64-NEXT:    slli t2, s0, 40
-; RV64-NEXT:    sd t2, 24(sp) # 8-byte Folded Spill
-; RV64-NEXT:    vand.vx v16, v8, t1
-; RV64-NEXT:    slli t1, s0, 41
-; RV64-NEXT:    sd t1, 16(sp) # 8-byte Folded Spill
-; RV64-NEXT:    vand.vx v18, v8, t0
-; RV64-NEXT:    slli s6, s0, 42
-; RV64-NEXT:    vand.vx v20, v8, a4
-; RV64-NEXT:    slli s7, s0, 43
-; RV64-NEXT:    vand.vx v22, v8, a3
-; RV64-NEXT:    slli s8, s0, 44
-; RV64-NEXT:    vand.vx v24, v8, t5
-; RV64-NEXT:    slli s9, s0, 45
-; RV64-NEXT:    vand.vx v26, v8, a2
-; RV64-NEXT:    slli s10, s0, 46
-; RV64-NEXT:    vand.vx v28, v8, a1
-; RV64-NEXT:    slli s11, s0, 47
-; RV64-NEXT:    vand.vx v30, v8, a0
-; RV64-NEXT:    slli ra, s0, 48
-; RV64-NEXT:    slli s3, s0, 49
-; RV64-NEXT:    slli s1, s0, 50
-; RV64-NEXT:    slli t6, s0, 51
-; RV64-NEXT:    slli t5, s0, 52
-; RV64-NEXT:    slli t4, s0, 53
-; RV64-NEXT:    slli t3, s0, 54
-; RV64-NEXT:    slli t2, s0, 55
-; RV64-NEXT:    slli t1, s0, 56
-; RV64-NEXT:    slli t0, s0, 57
-; RV64-NEXT:    slli a4, s0, 58
-; RV64-NEXT:    slli a3, s0, 59
-; RV64-NEXT:    slli a2, s0, 60
-; RV64-NEXT:    slli a1, s0, 61
-; RV64-NEXT:    slli s0, s0, 62
-; RV64-NEXT:    li a0, -1
-; RV64-NEXT:    slli a0, a0, 63
-; RV64-NEXT:    vand.vi v6, v8, 2
-; RV64-NEXT:    vand.vi v4, v8, 1
-; RV64-NEXT:    vand.vi v2, v8, 4
-; RV64-NEXT:    vand.vi v0, v8, 8
-; RV64-NEXT:    vmul.vv v6, v8, v6
-; RV64-NEXT:    sd a5, 8(sp) # 8-byte Folded Spill
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 4
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 112
-; RV64-NEXT:    vs2r.v v6, (a5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    vmul.vv v6, v8, v4
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 5
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 112
-; RV64-NEXT:    vs2r.v v6, (a5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    vmul.vv v6, v8, v2
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 112
-; RV64-NEXT:    vs2r.v v6, (a5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    vmul.vv v6, v8, v0
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 112
-; RV64-NEXT:    vs2r.v v6, (a5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    vmul.vv v10, v8, v10
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 112
-; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    vmul.vv v10, v8, v12
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 3
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 112
-; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    vmul.vv v10, v8, v14
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 112
-; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    vmul.vv v10, v8, v16
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 112
-; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    vmul.vv v10, v8, v18
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 3
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 112
-; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    vmul.vv v10, v8, v20
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 4
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 112
-; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    vmul.vv v10, v8, v22
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 3
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 112
-; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    vmul.vv v10, v8, v24
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 3
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 112
-; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    vmul.vv v10, v8, v26
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 3
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 112
-; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    vmul.vv v10, v8, v28
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 3
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 3
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 112
-; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    vmul.vv v10, v8, v30
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 4
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 112
-; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    lui s5, 8
-; RV64-NEXT:    vand.vx v10, v8, s5
-; RV64-NEXT:    vmul.vv v10, v8, v10
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 4
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 112
-; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    lui s5, 16
-; RV64-NEXT:    vand.vx v10, v8, s5
-; RV64-NEXT:    vmul.vv v10, v8, v10
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 5
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 112
-; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    lui s5, 32
-; RV64-NEXT:    vand.vx v10, v8, s5
-; RV64-NEXT:    vmul.vv v10, v8, v10
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 6
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 112
-; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    lui s5, 64
-; RV64-NEXT:    vand.vx v10, v8, s5
-; RV64-NEXT:    vmul.vv v10, v8, v10
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 112
-; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    lui s5, 128
-; RV64-NEXT:    vand.vx v10, v8, s5
-; RV64-NEXT:    vmul.vv v10, v8, v10
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 112
-; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    lui s5, 256
-; RV64-NEXT:    vand.vx v10, v8, s5
-; RV64-NEXT:    vmul.vv v10, v8, v10
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 112
-; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    lui s5, 512
-; RV64-NEXT:    vand.vx v10, v8, s5
-; RV64-NEXT:    vmul.vv v10, v8, v10
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 3
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 112
-; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    lui s5, 1024
-; RV64-NEXT:    vand.vx v10, v8, s5
-; RV64-NEXT:    vmul.vv v10, v8, v10
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 112
-; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    lui s5, 2048
-; RV64-NEXT:    vand.vx v10, v8, s5
-; RV64-NEXT:    vmul.vv v10, v8, v10
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 112
-; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    lui s5, 4096
-; RV64-NEXT:    vand.vx v10, v8, s5
-; RV64-NEXT:    vmul.vv v10, v8, v10
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 3
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 112
-; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    lui s5, 8192
-; RV64-NEXT:    vand.vx v10, v8, s5
-; RV64-NEXT:    vmul.vv v10, v8, v10
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 4
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 112
-; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    lui s5, 16384
-; RV64-NEXT:    vand.vx v10, v8, s5
-; RV64-NEXT:    vmul.vv v10, v8, v10
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 112
-; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    lui s5, 32768
-; RV64-NEXT:    vand.vx v10, v8, s5
-; RV64-NEXT:    vmul.vv v10, v8, v10
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 112
-; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    lui s5, 65536
-; RV64-NEXT:    vand.vx v10, v8, s5
-; RV64-NEXT:    vmul.vv v10, v8, v10
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 112
-; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    lui s5, 131072
-; RV64-NEXT:    vand.vx v10, v8, s5
-; RV64-NEXT:    vmul.vv v10, v8, v10
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 3
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 112
-; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    lui s5, 262144
-; RV64-NEXT:    vand.vx v10, v8, s5
-; RV64-NEXT:    vmul.vv v10, v8, v10
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 3
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 112
-; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    ld s5, 96(sp) # 8-byte Folded Reload
-; RV64-NEXT:    vand.vx v10, v8, s5
-; RV64-NEXT:    vmul.vv v10, v8, v10
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 3
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 112
-; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    ld s5, 88(sp) # 8-byte Folded Reload
-; RV64-NEXT:    vand.vx v10, v8, s5
-; RV64-NEXT:    vmul.vv v10, v8, v10
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 4
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 112
-; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    ld s5, 80(sp) # 8-byte Folded Reload
-; RV64-NEXT:    vand.vx v10, v8, s5
-; RV64-NEXT:    vmul.vv v10, v8, v10
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 112
-; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    ld s5, 72(sp) # 8-byte Folded Reload
-; RV64-NEXT:    vand.vx v10, v8, s5
-; RV64-NEXT:    vmul.vv v10, v8, v10
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 112
-; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    ld s5, 64(sp) # 8-byte Folded Reload
-; RV64-NEXT:    vand.vx v10, v8, s5
-; RV64-NEXT:    vmul.vv v10, v8, v10
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 112
-; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    ld s5, 56(sp) # 8-byte Folded Reload
-; RV64-NEXT:    vand.vx v10, v8, s5
-; RV64-NEXT:    vmul.vv v10, v8, v10
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 112
-; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    ld s5, 48(sp) # 8-byte Folded Reload
-; RV64-NEXT:    vand.vx v10, v8, s5
-; RV64-NEXT:    vmul.vv v10, v8, v10
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 3
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 112
-; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    ld s5, 40(sp) # 8-byte Folded Reload
-; RV64-NEXT:    vand.vx v10, v8, s5
-; RV64-NEXT:    vmul.vv v10, v8, v10
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 112
-; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    ld s5, 32(sp) # 8-byte Folded Reload
-; RV64-NEXT:    vand.vx v10, v8, s5
-; RV64-NEXT:    vmul.vv v10, v8, v10
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 112
-; RV64-NEXT:    vs2r.v v10, (a5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    ld s5, 24(sp) # 8-byte Folded Reload
-; RV64-NEXT:    vand.vx v10, v8, s5
-; RV64-NEXT:    vmul.vv v10, v8, v10
-; RV64-NEXT:    csrr s5, vlenb
-; RV64-NEXT:    slli s5, s5, 1
-; RV64-NEXT:    mv a5, s5
-; RV64-NEXT:    slli s5, s5, 3
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    ld a5, 8(sp) # 8-byte Folded Reload
-; RV64-NEXT:    add s5, sp, s5
-; RV64-NEXT:    addi s5, s5, 112
-; RV64-NEXT:    vs2r.v v10, (s5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    ld s5, 16(sp) # 8-byte Folded Reload
-; RV64-NEXT:    vand.vx v10, v8, s5
-; RV64-NEXT:    vmul.vv v10, v8, v10
-; RV64-NEXT:    csrr s5, vlenb
-; RV64-NEXT:    slli s5, s5, 4
-; RV64-NEXT:    add s5, sp, s5
-; RV64-NEXT:    addi s5, s5, 112
-; RV64-NEXT:    vs2r.v v10, (s5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    vand.vx v10, v8, s6
-; RV64-NEXT:    vmul.vv v10, v8, v10
-; RV64-NEXT:    csrr s5, vlenb
-; RV64-NEXT:    slli s5, s5, 1
-; RV64-NEXT:    mv s6, s5
-; RV64-NEXT:    slli s5, s5, 1
-; RV64-NEXT:    add s6, s6, s5
-; RV64-NEXT:    slli s5, s5, 1
-; RV64-NEXT:    add s5, s5, s6
-; RV64-NEXT:    add s5, sp, s5
-; RV64-NEXT:    addi s5, s5, 112
-; RV64-NEXT:    vs2r.v v10, (s5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    vand.vx v10, v8, s7
-; RV64-NEXT:    vmul.vv v10, v8, v10
-; RV64-NEXT:    csrr s5, vlenb
-; RV64-NEXT:    slli s5, s5, 2
-; RV64-NEXT:    mv s6, s5
-; RV64-NEXT:    slli s5, s5, 1
-; RV64-NEXT:    add s5, s5, s6
-; RV64-NEXT:    add s5, sp, s5
-; RV64-NEXT:    addi s5, s5, 112
-; RV64-NEXT:    vs2r.v v10, (s5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    vand.vx v10, v8, s8
-; RV64-NEXT:    vmul.vv v10, v8, v10
-; RV64-NEXT:    csrr s5, vlenb
-; RV64-NEXT:    slli s5, s5, 1
-; RV64-NEXT:    mv s6, s5
-; RV64-NEXT:    slli s5, s5, 2
-; RV64-NEXT:    add s5, s5, s6
-; RV64-NEXT:    add s5, sp, s5
-; RV64-NEXT:    addi s5, s5, 112
-; RV64-NEXT:    vs2r.v v10, (s5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    vand.vx v10, v8, s9
-; RV64-NEXT:    vmul.vv v10, v8, v10
-; RV64-NEXT:    csrr s5, vlenb
-; RV64-NEXT:    slli s5, s5, 3
-; RV64-NEXT:    add s5, sp, s5
-; RV64-NEXT:    addi s5, s5, 112
-; RV64-NEXT:    vs2r.v v10, (s5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    vand.vx v10, v8, s10
-; RV64-NEXT:    vmul.vv v10, v8, v10
-; RV64-NEXT:    csrr s5, vlenb
-; RV64-NEXT:    slli s5, s5, 1
-; RV64-NEXT:    mv s6, s5
-; RV64-NEXT:    slli s5, s5, 1
-; RV64-NEXT:    add s5, s5, s6
-; RV64-NEXT:    add s5, sp, s5
-; RV64-NEXT:    addi s5, s5, 112
-; RV64-NEXT:    vs2r.v v10, (s5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    vand.vx v10, v8, s11
-; RV64-NEXT:    vmul.vv v10, v8, v10
-; RV64-NEXT:    csrr s5, vlenb
-; RV64-NEXT:    slli s5, s5, 2
-; RV64-NEXT:    add s5, sp, s5
-; RV64-NEXT:    addi s5, s5, 112
-; RV64-NEXT:    vs2r.v v10, (s5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    vand.vx v10, v8, ra
-; RV64-NEXT:    vmul.vv v10, v8, v10
-; RV64-NEXT:    csrr s5, vlenb
-; RV64-NEXT:    slli s5, s5, 1
-; RV64-NEXT:    add s5, sp, s5
-; RV64-NEXT:    addi s5, s5, 112
-; RV64-NEXT:    vs2r.v v10, (s5) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    vand.vx v10, v8, s3
-; RV64-NEXT:    vmul.vv v10, v8, v10
-; RV64-NEXT:    addi s3, sp, 112
-; RV64-NEXT:    vs2r.v v10, (s3) # vscale x 16-byte Folded Spill
-; RV64-NEXT:    vand.vx v10, v8, s1
-; RV64-NEXT:    vmul.vv v4, v8, v10
-; RV64-NEXT:    vand.vx v10, v8, t6
-; RV64-NEXT:    vmul.vv v6, v8, v10
-; RV64-NEXT:    vand.vx v10, v8, t5
-; RV64-NEXT:    vmul.vv v30, v8, v10
-; RV64-NEXT:    vand.vx v10, v8, t4
-; RV64-NEXT:    vmul.vv v28, v8, v10
-; RV64-NEXT:    vand.vx v10, v8, t3
-; RV64-NEXT:    vmul.vv v26, v8, v10
-; RV64-NEXT:    vand.vx v10, v8, t2
-; RV64-NEXT:    vmul.vv v24, v8, v10
-; RV64-NEXT:    vand.vx v10, v8, t1
-; RV64-NEXT:    vmul.vv v22, v8, v10
-; RV64-NEXT:    vand.vx v10, v8, t0
-; RV64-NEXT:    vmul.vv v20, v8, v10
-; RV64-NEXT:    vand.vx v10, v8, a4
-; RV64-NEXT:    vmul.vv v18, v8, v10
-; RV64-NEXT:    vand.vx v10, v8, a3
-; RV64-NEXT:    vmul.vv v16, v8, v10
-; RV64-NEXT:    vand.vx v10, v8, a2
-; RV64-NEXT:    vmul.vv v14, v8, v10
-; RV64-NEXT:    vand.vx v10, v8, a1
-; RV64-NEXT:    vmul.vv v12, v8, v10
-; RV64-NEXT:    vand.vx v10, v8, s0
-; RV64-NEXT:    vmul.vv v10, v8, v10
-; RV64-NEXT:    vand.vx v0, v8, a0
-; RV64-NEXT:    vmul.vv v8, v8, v0
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 4
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl2r.v v0, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 5
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v2, v0
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 4
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 4
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 4
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 5
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 6
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 4
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 4
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 4
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v2
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 112
-; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v2
-; RV64-NEXT:    addi a0, sp, 112
-; RV64-NEXT:    vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
-; RV64-NEXT:    vxor.vv v2, v0, v2
-; RV64-NEXT:    vxor.vv v4, v2, v4
-; RV64-NEXT:    vxor.vv v6, v4, v6
-; RV64-NEXT:    vxor.vv v30, v6, v30
-; RV64-NEXT:    vxor.vv v28, v30, v28
-; RV64-NEXT:    vxor.vv v26, v28, v26
-; RV64-NEXT:    vxor.vv v24, v26, v24
-; RV64-NEXT:    vxor.vv v22, v24, v22
-; RV64-NEXT:    vxor.vv v20, v22, v20
-; RV64-NEXT:    vxor.vv v18, v20, v18
-; RV64-NEXT:    vxor.vv v16, v18, v16
-; RV64-NEXT:    vxor.vv v14, v16, v14
-; RV64-NEXT:    vxor.vv v12, v14, v12
-; RV64-NEXT:    vxor.vv v10, v12, v10
-; RV64-NEXT:    vxor.vv v8, v10, v8
-; RV64-NEXT:    li a0, 56
-; RV64-NEXT:    vsrl.vx v10, v8, a0
-; RV64-NEXT:    li a1, 40
-; RV64-NEXT:    vsrl.vx v12, v8, a1
-; RV64-NEXT:    vsrl.vi v14, v8, 24
-; RV64-NEXT:    vsrl.vi v16, v8, 8
-; RV64-NEXT:    vand.vx v12, v12, s4
-; RV64-NEXT:    vor.vv v10, v12, v10
-; RV64-NEXT:    vand.vx v12, v8, s2
-; RV64-NEXT:    lui a2, 4080
-; RV64-NEXT:    vand.vx v14, v14, a2
-; RV64-NEXT:    vand.vx v16, v16, s2
-; RV64-NEXT:    vor.vv v14, v16, v14
-; RV64-NEXT:    vand.vx v16, v8, a2
-; RV64-NEXT:    vsll.vi v12, v12, 8
-; RV64-NEXT:    vsll.vi v16, v16, 24
-; RV64-NEXT:    vor.vv v12, v16, v12
-; RV64-NEXT:    vsll.vx v16, v8, a0
-; RV64-NEXT:    vand.vx v8, v8, s4
-; RV64-NEXT:    vsll.vx v8, v8, a1
-; RV64-NEXT:    vor.vv v8, v16, v8
-; RV64-NEXT:    vor.vv v10, v14, v10
-; RV64-NEXT:    vor.vv v8, v8, v12
-; RV64-NEXT:    vor.vv v8, v8, v10
-; RV64-NEXT:    vsrl.vi v10, v8, 4
-; RV64-NEXT:    vand.vx v8, v8, a7
-; RV64-NEXT:    vand.vx v10, v10, a7
-; RV64-NEXT:    vsll.vi v8, v8, 4
-; RV64-NEXT:    vor.vv v8, v10, v8
-; RV64-NEXT:    vsrl.vi v10, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a6
-; RV64-NEXT:    vand.vx v10, v10, a6
-; RV64-NEXT:    vsll.vi v8, v8, 2
-; RV64-NEXT:    vor.vv v8, v10, v8
-; RV64-NEXT:    vsrl.vi v10, v8, 1
-; RV64-NEXT:    vand.vx v8, v8, a5
-; RV64-NEXT:    vand.vx v10, v10, a5
-; RV64-NEXT:    vadd.vv v8, v8, v8
-; RV64-NEXT:    vor.vv v8, v10, v8
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add sp, sp, a0
-; RV64-NEXT:    ld ra, 216(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s0, 208(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s1, 200(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s2, 192(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s3, 184(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s4, 176(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s5, 168(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s6, 160(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s7, 152(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s8, 144(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s9, 136(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s10, 128(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s11, 120(sp) # 8-byte Folded Reload
-; RV64-NEXT:    addi sp, sp, 224
-; RV64-NEXT:    ret
-  %a = call <4 x i64> @llvm.clmulr.v4i64(<4 x i64> %x, <4 x i64> %y)
-  ret <4 x i64> %a
-}
-
-define <8 x i64> @clmulr_v8i64(<8 x i64> %x, <8 x i64> %y) nounwind {
-; RV32-LABEL: clmulr_v8i64:
-; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -352
-; RV32-NEXT:    sw ra, 348(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s0, 344(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s1, 340(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s2, 336(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s3, 332(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s4, 328(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s5, 324(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s6, 320(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s7, 316(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s8, 312(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s9, 308(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s10, 304(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s11, 300(sp) # 4-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 6
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    sub sp, sp, a0
-; RV32-NEXT:    lui s11, 1044480
-; RV32-NEXT:    lui s0, 524288
-; RV32-NEXT:    li a0, 1
-; RV32-NEXT:    li ra, 2
-; RV32-NEXT:    li t4, 4
-; RV32-NEXT:    li t2, 8
-; RV32-NEXT:    li t6, 16
-; RV32-NEXT:    li t5, 32
-; RV32-NEXT:    li t3, 64
-; RV32-NEXT:    li t1, 128
-; RV32-NEXT:    li t0, 256
-; RV32-NEXT:    li a7, 512
-; RV32-NEXT:    li a6, 1024
-; RV32-NEXT:    lui a4, 1
-; RV32-NEXT:    lui a3, 2
-; RV32-NEXT:    lui a2, 4
-; RV32-NEXT:    lui a5, 8
-; RV32-NEXT:    lui s1, 16
-; RV32-NEXT:    lui a1, 32
-; RV32-NEXT:    lui s2, 64
-; RV32-NEXT:    lui s3, 128
-; RV32-NEXT:    lui s4, 256
-; RV32-NEXT:    lui s5, 512
-; RV32-NEXT:    lui s6, 1024
-; RV32-NEXT:    lui s7, 2048
-; RV32-NEXT:    lui s8, 4096
-; RV32-NEXT:    lui s9, 8192
-; RV32-NEXT:    lui s10, 16384
-; RV32-NEXT:    sw s11, 272(sp)
-; RV32-NEXT:    lui s11, 32768
-; RV32-NEXT:    sw zero, 276(sp)
-; RV32-NEXT:    sw s0, 264(sp)
-; RV32-NEXT:    sw zero, 268(sp)
-; RV32-NEXT:    sw zero, 256(sp)
-; RV32-NEXT:    sw a0, 260(sp)
-; RV32-NEXT:    sw zero, 248(sp)
-; RV32-NEXT:    sw ra, 252(sp)
-; RV32-NEXT:    lui ra, 65536
-; RV32-NEXT:    sw zero, 240(sp)
-; RV32-NEXT:    sw t4, 244(sp)
-; RV32-NEXT:    lui t4, 131072
-; RV32-NEXT:    sw zero, 232(sp)
-; RV32-NEXT:    sw t2, 236(sp)
-; RV32-NEXT:    lui t2, 262144
-; RV32-NEXT:    sw zero, 224(sp)
-; RV32-NEXT:    sw t6, 228(sp)
-; RV32-NEXT:    sw zero, 216(sp)
-; RV32-NEXT:    sw t5, 220(sp)
-; RV32-NEXT:    sw zero, 208(sp)
-; RV32-NEXT:    sw t3, 212(sp)
-; RV32-NEXT:    sw zero, 200(sp)
-; RV32-NEXT:    sw t1, 204(sp)
-; RV32-NEXT:    sw zero, 192(sp)
-; RV32-NEXT:    sw t0, 196(sp)
-; RV32-NEXT:    sw zero, 184(sp)
-; RV32-NEXT:    sw a7, 188(sp)
-; RV32-NEXT:    sw zero, 176(sp)
-; RV32-NEXT:    sw a6, 180(sp)
-; RV32-NEXT:    li t1, 1024
-; RV32-NEXT:    slli t6, a0, 11
-; RV32-NEXT:    sw zero, 168(sp)
-; RV32-NEXT:    sw t6, 172(sp)
-; RV32-NEXT:    sw zero, 160(sp)
-; RV32-NEXT:    sw a4, 164(sp)
-; RV32-NEXT:    sw zero, 152(sp)
-; RV32-NEXT:    sw a3, 156(sp)
-; RV32-NEXT:    lui t3, 2
-; RV32-NEXT:    sw zero, 144(sp)
-; RV32-NEXT:    sw a2, 148(sp)
-; RV32-NEXT:    lui t5, 4
-; RV32-NEXT:    sw zero, 136(sp)
-; RV32-NEXT:    sw a5, 140(sp)
-; RV32-NEXT:    lui a4, 8
-; RV32-NEXT:    sw zero, 128(sp)
-; RV32-NEXT:    sw s1, 132(sp)
-; RV32-NEXT:    sw zero, 120(sp)
-; RV32-NEXT:    sw a1, 124(sp)
-; RV32-NEXT:    sw zero, 112(sp)
-; RV32-NEXT:    sw s2, 116(sp)
-; RV32-NEXT:    sw zero, 104(sp)
-; RV32-NEXT:    sw s3, 108(sp)
-; RV32-NEXT:    sw zero, 96(sp)
-; RV32-NEXT:    sw s4, 100(sp)
-; RV32-NEXT:    sw zero, 88(sp)
-; RV32-NEXT:    sw s5, 92(sp)
-; RV32-NEXT:    sw zero, 80(sp)
-; RV32-NEXT:    sw s6, 84(sp)
-; RV32-NEXT:    sw zero, 72(sp)
-; RV32-NEXT:    sw s7, 76(sp)
-; RV32-NEXT:    sw zero, 64(sp)
-; RV32-NEXT:    sw s8, 68(sp)
-; RV32-NEXT:    sw zero, 56(sp)
-; RV32-NEXT:    sw s9, 60(sp)
-; RV32-NEXT:    sw zero, 48(sp)
-; RV32-NEXT:    sw s10, 52(sp)
-; RV32-NEXT:    sw zero, 40(sp)
-; RV32-NEXT:    sw s11, 44(sp)
-; RV32-NEXT:    sw zero, 32(sp)
-; RV32-NEXT:    sw ra, 36(sp)
-; RV32-NEXT:    sw zero, 24(sp)
-; RV32-NEXT:    sw t4, 28(sp)
-; RV32-NEXT:    sw zero, 16(sp)
-; RV32-NEXT:    sw t2, 20(sp)
-; RV32-NEXT:    sw zero, 8(sp)
-; RV32-NEXT:    sw s0, 12(sp)
-; RV32-NEXT:    lui a1, 61681
-; RV32-NEXT:    addi a1, a1, -241
-; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; RV32-NEXT:    vmv.v.x v28, a1
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    vmv.v.x v4, a1
-; RV32-NEXT:    addi a1, sp, 272
-; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v0, (a1), zero
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 8
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v0, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    li t0, 56
-; RV32-NEXT:    vsrl.vi v20, v8, 24
-; RV32-NEXT:    vsrl.vx v12, v8, t0
-; RV32-NEXT:    li a6, 40
-; RV32-NEXT:    vsrl.vx v16, v8, a6
-; RV32-NEXT:    vsll.vx v24, v8, t0
-; RV32-NEXT:    addi a3, s1, -256
-; RV32-NEXT:    vand.vx v16, v16, a3
-; RV32-NEXT:    vor.vv v16, v16, v12
-; RV32-NEXT:    vand.vx v12, v8, a3
-; RV32-NEXT:    vsll.vx v12, v12, a6
-; RV32-NEXT:    vor.vv v12, v24, v12
-; RV32-NEXT:    vsrl.vi v24, v8, 8
-; RV32-NEXT:    lui a5, 4080
-; RV32-NEXT:    vand.vx v20, v20, a5
-; RV32-NEXT:    lui a7, 349525
-; RV32-NEXT:    addi a7, a7, 1365
-; RV32-NEXT:    vand.vv v24, v24, v0
-; RV32-NEXT:    vor.vv v20, v24, v20
-; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; RV32-NEXT:    vmv.v.x v24, a7
-; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vor.vv v16, v20, v16
-; RV32-NEXT:    vand.vx v20, v8, a5
-; RV32-NEXT:    vsll.vi v20, v20, 24
-; RV32-NEXT:    vand.vv v8, v8, v0
-; RV32-NEXT:    vsll.vi v8, v8, 8
-; RV32-NEXT:    vor.vv v8, v20, v8
-; RV32-NEXT:    addi a7, sp, 264
-; RV32-NEXT:    vlse64.v v20, (a7), zero
-; RV32-NEXT:    vor.vv v8, v12, v8
-; RV32-NEXT:    addi a7, sp, 256
-; RV32-NEXT:    vlse64.v v12, (a7), zero
-; RV32-NEXT:    vor.vv v8, v8, v16
-; RV32-NEXT:    vsrl.vi v16, v8, 4
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v28, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vv v8, v8, v28
-; RV32-NEXT:    vand.vv v16, v16, v28
-; RV32-NEXT:    vsll.vi v8, v8, 4
-; RV32-NEXT:    vor.vv v8, v16, v8
-; RV32-NEXT:    vsrl.vi v16, v8, 2
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v4, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vv v8, v8, v4
-; RV32-NEXT:    vand.vv v16, v16, v4
-; RV32-NEXT:    vsll.vi v8, v8, 2
-; RV32-NEXT:    vor.vv v8, v16, v8
-; RV32-NEXT:    vsrl.vi v16, v8, 1
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v24, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vv v8, v8, v24
-; RV32-NEXT:    vand.vv v16, v16, v24
-; RV32-NEXT:    vadd.vv v8, v8, v8
-; RV32-NEXT:    vor.vv v8, v16, v8
-; RV32-NEXT:    addi a7, sp, 248
-; RV32-NEXT:    vlse64.v v16, (a7), zero
-; RV32-NEXT:    vand.vv v28, v8, v20
-; RV32-NEXT:    addi a7, sp, 240
-; RV32-NEXT:    addi a0, sp, 232
-; RV32-NEXT:    vlse64.v v20, (a7), zero
-; RV32-NEXT:    vlse64.v v24, (a0), zero
-; RV32-NEXT:    vand.vv v4, v8, v12
-; RV32-NEXT:    vand.vv v0, v8, v16
-; RV32-NEXT:    vand.vv v12, v8, v20
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vv v12, v8, v24
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    addi a2, sp, 224
-; RV32-NEXT:    addi a7, sp, 216
-; RV32-NEXT:    addi a1, sp, 208
-; RV32-NEXT:    addi a0, sp, 200
-; RV32-NEXT:    vlse64.v v12, (a2), zero
-; RV32-NEXT:    vlse64.v v16, (a7), zero
-; RV32-NEXT:    vlse64.v v20, (a1), zero
-; RV32-NEXT:    vlse64.v v24, (a0), zero
-; RV32-NEXT:    vand.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vv v12, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vv v12, v8, v20
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vv v12, v8, v24
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    addi a0, sp, 192
-; RV32-NEXT:    addi a1, sp, 184
-; RV32-NEXT:    addi a2, sp, 176
-; RV32-NEXT:    addi a7, sp, 168
-; RV32-NEXT:    vlse64.v v12, (a0), zero
-; RV32-NEXT:    vlse64.v v16, (a1), zero
-; RV32-NEXT:    vlse64.v v20, (a2), zero
-; RV32-NEXT:    vlse64.v v24, (a7), zero
-; RV32-NEXT:    vand.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vv v12, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vv v12, v8, v20
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vv v12, v8, v24
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    addi a0, sp, 160
-; RV32-NEXT:    addi a1, sp, 152
-; RV32-NEXT:    addi a2, sp, 144
-; RV32-NEXT:    addi a7, sp, 136
-; RV32-NEXT:    vlse64.v v12, (a0), zero
-; RV32-NEXT:    vlse64.v v16, (a1), zero
-; RV32-NEXT:    vlse64.v v20, (a2), zero
-; RV32-NEXT:    vlse64.v v24, (a7), zero
-; RV32-NEXT:    vand.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vv v12, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vv v12, v8, v20
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 6
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vv v12, v8, v24
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    addi a0, sp, 128
-; RV32-NEXT:    addi a1, sp, 120
-; RV32-NEXT:    addi a2, sp, 112
-; RV32-NEXT:    addi a7, sp, 104
-; RV32-NEXT:    vlse64.v v12, (a0), zero
-; RV32-NEXT:    vlse64.v v16, (a1), zero
-; RV32-NEXT:    vlse64.v v20, (a2), zero
-; RV32-NEXT:    vlse64.v v24, (a7), zero
-; RV32-NEXT:    vand.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vv v12, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vv v12, v8, v20
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vv v12, v8, v24
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    addi a0, sp, 96
-; RV32-NEXT:    addi a1, sp, 88
-; RV32-NEXT:    addi a2, sp, 80
-; RV32-NEXT:    addi a7, sp, 72
-; RV32-NEXT:    vlse64.v v12, (a0), zero
-; RV32-NEXT:    vlse64.v v16, (a1), zero
-; RV32-NEXT:    vlse64.v v20, (a2), zero
-; RV32-NEXT:    vlse64.v v24, (a7), zero
-; RV32-NEXT:    vand.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vv v12, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vv v12, v8, v20
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vv v12, v8, v24
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    addi a0, sp, 64
-; RV32-NEXT:    addi a1, sp, 56
-; RV32-NEXT:    addi a2, sp, 48
-; RV32-NEXT:    addi a7, sp, 40
-; RV32-NEXT:    vlse64.v v12, (a0), zero
-; RV32-NEXT:    vlse64.v v16, (a1), zero
-; RV32-NEXT:    vlse64.v v20, (a2), zero
-; RV32-NEXT:    vlse64.v v24, (a7), zero
-; RV32-NEXT:    vand.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vv v12, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vv v12, v8, v20
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vv v12, v8, v24
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    addi a0, sp, 32
-; RV32-NEXT:    addi a1, sp, 24
-; RV32-NEXT:    addi a2, sp, 16
-; RV32-NEXT:    addi a7, sp, 8
-; RV32-NEXT:    vlse64.v v12, (a0), zero
-; RV32-NEXT:    vlse64.v v16, (a1), zero
-; RV32-NEXT:    vlse64.v v20, (a2), zero
-; RV32-NEXT:    vlse64.v v24, (a7), zero
-; RV32-NEXT:    vand.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vv v12, v8, v16
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 7
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vv v12, v8, v20
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vv v12, v8, v24
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vi v12, v8, 2
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vi v12, v8, 1
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vi v12, v8, 4
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vi v12, v8, 8
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    li a0, 16
-; RV32-NEXT:    vand.vx v12, v8, a0
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    li a0, 32
-; RV32-NEXT:    vand.vx v12, v8, a0
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    li a0, 64
-; RV32-NEXT:    vand.vx v12, v8, a0
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    li a0, 128
-; RV32-NEXT:    vand.vx v12, v8, a0
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    li a0, 256
-; RV32-NEXT:    vand.vx v12, v8, a0
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    li a0, 512
-; RV32-NEXT:    vand.vx v12, v8, a0
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vx v12, v8, t1
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vx v12, v8, t6
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    vand.vx v12, v8, a0
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vx v12, v8, t3
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vx v12, v8, t5
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 6
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vx v12, v8, a4
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vx v12, v8, s1
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    lui a0, 32
-; RV32-NEXT:    vand.vx v12, v8, a0
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vx v12, v8, s2
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vx v12, v8, s3
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vx v12, v8, s4
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vx v12, v8, s5
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vx v12, v8, s6
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vx v12, v8, s7
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vx v12, v8, s8
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vx v12, v8, s9
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vx v12, v8, s10
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vx v12, v8, s11
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vx v12, v8, ra
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vx v12, v8, t4
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vand.vx v12, v8, t2
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    addi a0, sp, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    vmul.vv v16, v8, v28
-; RV32-NEXT:    vmul.vv v20, v8, v4
-; RV32-NEXT:    vmul.vv v24, v8, v0
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v28, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vmul.vv v28, v8, v28
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vmul.vv v4, v8, v4
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v0, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vmul.vv v0, v8, v0
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 6
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 6
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 7
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vmul.vv v12, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vmul.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vs4r.v v8, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v8, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vi v8, v8, 0
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 6
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    addi a0, sp, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    vxor.vv v8, v8, v16
-; RV32-NEXT:    vxor.vv v8, v8, v20
-; RV32-NEXT:    vxor.vv v8, v8, v24
-; RV32-NEXT:    vxor.vv v8, v8, v28
-; RV32-NEXT:    vxor.vv v8, v8, v4
-; RV32-NEXT:    vxor.vv v8, v8, v0
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 6
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vxor.vv v8, v8, v12
-; RV32-NEXT:    vsrl.vx v12, v8, t0
-; RV32-NEXT:    vsrl.vx v16, v8, a6
-; RV32-NEXT:    vsrl.vi v20, v8, 24
-; RV32-NEXT:    vand.vx v16, v16, a3
-; RV32-NEXT:    vor.vv v12, v16, v12
-; RV32-NEXT:    vsrl.vi v16, v8, 8
-; RV32-NEXT:    vand.vx v20, v20, a5
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 8
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v24, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vand.vv v16, v16, v24
-; RV32-NEXT:    vor.vv v16, v16, v20
-; RV32-NEXT:    vand.vx v20, v8, a5
-; RV32-NEXT:    vand.vv v24, v8, v24
-; RV32-NEXT:    vsll.vi v24, v24, 8
-; RV32-NEXT:    vsll.vi v20, v20, 24
-; RV32-NEXT:    vor.vv v20, v20, v24
-; RV32-NEXT:    vsll.vx v24, v8, t0
-; RV32-NEXT:    vand.vx v8, v8, a3
-; RV32-NEXT:    vsll.vx v8, v8, a6
-; RV32-NEXT:    vor.vv v8, v24, v8
-; RV32-NEXT:    vor.vv v12, v16, v12
-; RV32-NEXT:    vor.vv v8, v8, v20
-; RV32-NEXT:    vor.vv v8, v8, v12
-; RV32-NEXT:    vsrl.vi v12, v8, 4
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vand.vv v8, v8, v16
-; RV32-NEXT:    vand.vv v12, v12, v16
-; RV32-NEXT:    vsll.vi v8, v8, 4
-; RV32-NEXT:    vor.vv v8, v12, v8
-; RV32-NEXT:    vsrl.vi v12, v8, 2
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vand.vv v8, v8, v16
-; RV32-NEXT:    vand.vv v12, v12, v16
-; RV32-NEXT:    vsll.vi v8, v8, 2
-; RV32-NEXT:    vor.vv v8, v12, v8
-; RV32-NEXT:    vsrl.vi v12, v8, 1
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a1, a1, a0
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 288
-; RV32-NEXT:    vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT:    vand.vv v8, v8, v16
-; RV32-NEXT:    vand.vv v12, v12, v16
-; RV32-NEXT:    vadd.vv v8, v8, v8
-; RV32-NEXT:    vor.vv v8, v12, v8
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    slli a0, a0, 6
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add sp, sp, a0
-; RV32-NEXT:    lw ra, 348(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s0, 344(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s1, 340(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s2, 336(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s3, 332(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s4, 328(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s5, 324(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s6, 320(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s7, 316(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s8, 312(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s9, 308(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s10, 304(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s11, 300(sp) # 4-byte Folded Reload
-; RV32-NEXT:    addi sp, sp, 352
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: clmulr_v8i64:
-; RV64:       # %bb.0:
-; RV64-NEXT:    addi sp, sp, -240
-; RV64-NEXT:    sd ra, 232(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s0, 224(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s1, 216(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s2, 208(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s3, 200(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s4, 192(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s5, 184(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s6, 176(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s7, 168(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s8, 160(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s9, 152(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s10, 144(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s11, 136(sp) # 8-byte Folded Spill
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    sub sp, sp, a0
-; RV64-NEXT:    li a6, 56
-; RV64-NEXT:    li t0, 40
-; RV64-NEXT:    lui t1, 16
-; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV64-NEXT:    vsrl.vi v20, v8, 24
-; RV64-NEXT:    lui a7, 4080
-; RV64-NEXT:    vsrl.vi v12, v8, 8
-; RV64-NEXT:    li s0, 255
-; RV64-NEXT:    lui a3, 61681
-; RV64-NEXT:    lui a4, 209715
-; RV64-NEXT:    lui a5, 349525
-; RV64-NEXT:    li a2, 16
-; RV64-NEXT:    li a1, 32
-; RV64-NEXT:    li a0, 64
-; RV64-NEXT:    li s9, 1
-; RV64-NEXT:    vsrl.vx v16, v8, a6
-; RV64-NEXT:    vsrl.vx v28, v8, t0
-; RV64-NEXT:    addi s4, t1, -256
-; RV64-NEXT:    vand.vx v24, v20, a7
-; RV64-NEXT:    slli s0, s0, 24
-; RV64-NEXT:    vand.vx v4, v8, a7
-; RV64-NEXT:    vsll.vx v20, v8, a6
-; RV64-NEXT:    addi a7, a3, -241
-; RV64-NEXT:    addi a6, a4, 819
-; RV64-NEXT:    addi a5, a5, 1365
-; RV64-NEXT:    slli a3, s9, 11
-; RV64-NEXT:    sd a3, 112(sp) # 8-byte Folded Spill
-; RV64-NEXT:    slli a3, s9, 31
-; RV64-NEXT:    sd a3, 104(sp) # 8-byte Folded Spill
-; RV64-NEXT:    slli a3, s9, 32
-; RV64-NEXT:    sd a3, 96(sp) # 8-byte Folded Spill
-; RV64-NEXT:    slli a3, s9, 33
-; RV64-NEXT:    sd a3, 88(sp) # 8-byte Folded Spill
-; RV64-NEXT:    slli a3, s9, 34
-; RV64-NEXT:    sd a3, 80(sp) # 8-byte Folded Spill
-; RV64-NEXT:    slli a3, s9, 35
-; RV64-NEXT:    sd a3, 72(sp) # 8-byte Folded Spill
-; RV64-NEXT:    slli a3, s9, 36
-; RV64-NEXT:    sd a3, 64(sp) # 8-byte Folded Spill
-; RV64-NEXT:    slli a3, s9, 37
-; RV64-NEXT:    sd a3, 56(sp) # 8-byte Folded Spill
-; RV64-NEXT:    slli a3, s9, 38
-; RV64-NEXT:    sd a3, 48(sp) # 8-byte Folded Spill
-; RV64-NEXT:    slli a3, s9, 39
-; RV64-NEXT:    sd a3, 40(sp) # 8-byte Folded Spill
-; RV64-NEXT:    slli a3, s9, 40
-; RV64-NEXT:    sd a3, 32(sp) # 8-byte Folded Spill
-; RV64-NEXT:    slli a3, s9, 41
-; RV64-NEXT:    sd a3, 24(sp) # 8-byte Folded Spill
-; RV64-NEXT:    slli s6, s9, 42
-; RV64-NEXT:    slli s7, s9, 43
-; RV64-NEXT:    slli a3, a7, 32
-; RV64-NEXT:    add a7, a7, a3
-; RV64-NEXT:    slli a3, a6, 32
-; RV64-NEXT:    add a6, a6, a3
-; RV64-NEXT:    slli a3, a5, 32
-; RV64-NEXT:    add a5, a5, a3
-; RV64-NEXT:    slli s8, s9, 44
-; RV64-NEXT:    vand.vx v28, v28, s4
-; RV64-NEXT:    vand.vx v12, v12, s0
-; RV64-NEXT:    vsll.vi v4, v4, 24
-; RV64-NEXT:    vand.vx v0, v8, s0
-; RV64-NEXT:    vand.vx v8, v8, s4
-; RV64-NEXT:    vor.vv v16, v28, v16
-; RV64-NEXT:    vor.vv v12, v12, v24
-; RV64-NEXT:    vsll.vi v24, v0, 8
-; RV64-NEXT:    vsll.vx v8, v8, t0
-; RV64-NEXT:    vor.vv v12, v12, v16
-; RV64-NEXT:    vor.vv v16, v4, v24
-; RV64-NEXT:    vor.vv v8, v20, v8
-; RV64-NEXT:    vor.vv v8, v8, v16
-; RV64-NEXT:    vor.vv v8, v8, v12
-; RV64-NEXT:    vsrl.vi v12, v8, 4
-; RV64-NEXT:    vand.vx v8, v8, a7
-; RV64-NEXT:    vand.vx v12, v12, a7
-; RV64-NEXT:    vsll.vi v8, v8, 4
-; RV64-NEXT:    vor.vv v8, v12, v8
-; RV64-NEXT:    vsrl.vi v12, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a6
-; RV64-NEXT:    vand.vx v12, v12, a6
-; RV64-NEXT:    vsll.vi v8, v8, 2
-; RV64-NEXT:    vor.vv v8, v12, v8
-; RV64-NEXT:    vsrl.vi v12, v8, 1
-; RV64-NEXT:    vand.vx v8, v8, a5
-; RV64-NEXT:    vand.vx v12, v12, a5
-; RV64-NEXT:    vadd.vv v8, v8, v8
-; RV64-NEXT:    vor.vv v8, v12, v8
-; RV64-NEXT:    vand.vx v12, v8, a2
-; RV64-NEXT:    slli s10, s9, 45
-; RV64-NEXT:    vand.vx v16, v8, a1
-; RV64-NEXT:    slli s11, s9, 46
-; RV64-NEXT:    vand.vx v20, v8, a0
-; RV64-NEXT:    slli ra, s9, 47
-; RV64-NEXT:    slli s3, s9, 48
-; RV64-NEXT:    slli s2, s9, 49
-; RV64-NEXT:    slli s1, s9, 50
-; RV64-NEXT:    slli t6, s9, 51
-; RV64-NEXT:    slli t5, s9, 52
-; RV64-NEXT:    slli t4, s9, 53
-; RV64-NEXT:    slli t3, s9, 54
-; RV64-NEXT:    slli t2, s9, 55
-; RV64-NEXT:    slli t1, s9, 56
-; RV64-NEXT:    slli t0, s9, 57
-; RV64-NEXT:    slli a4, s9, 58
-; RV64-NEXT:    slli a3, s9, 59
-; RV64-NEXT:    slli a2, s9, 60
-; RV64-NEXT:    slli a1, s9, 61
-; RV64-NEXT:    slli s9, s9, 62
-; RV64-NEXT:    li a0, -1
-; RV64-NEXT:    slli a0, a0, 63
-; RV64-NEXT:    vand.vi v24, v8, 2
-; RV64-NEXT:    vand.vi v28, v8, 1
-; RV64-NEXT:    vand.vi v4, v8, 4
-; RV64-NEXT:    vand.vi v0, v8, 8
-; RV64-NEXT:    vmul.vv v24, v8, v24
-; RV64-NEXT:    sd a5, 8(sp) # 8-byte Folded Spill
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 3
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 128
-; RV64-NEXT:    vs4r.v v24, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    vmul.vv v24, v8, v28
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 5
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 128
-; RV64-NEXT:    vs4r.v v24, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    vmul.vv v24, v8, v4
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 128
-; RV64-NEXT:    vs4r.v v24, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    vmul.vv v24, v8, v0
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 3
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 128
-; RV64-NEXT:    vs4r.v v24, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 128
-; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    vmul.vv v12, v8, v16
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 4
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 128
-; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    vmul.vv v12, v8, v20
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 3
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 128
-; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    li s5, 128
-; RV64-NEXT:    vand.vx v12, v8, s5
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 3
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 3
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 128
-; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    li s5, 256
-; RV64-NEXT:    vand.vx v12, v8, s5
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 4
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 128
-; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    li s5, 512
-; RV64-NEXT:    vand.vx v12, v8, s5
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 6
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 128
-; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    li s5, 1024
-; RV64-NEXT:    vand.vx v12, v8, s5
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 128
-; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    ld s5, 112(sp) # 8-byte Folded Reload
-; RV64-NEXT:    vand.vx v12, v8, s5
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 3
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 128
-; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    lui s5, 1
-; RV64-NEXT:    vand.vx v12, v8, s5
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 128
-; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    lui s5, 2
-; RV64-NEXT:    vand.vx v12, v8, s5
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 4
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 128
-; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    lui s5, 4
-; RV64-NEXT:    vand.vx v12, v8, s5
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 128
-; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    lui s5, 8
-; RV64-NEXT:    vand.vx v12, v8, s5
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 3
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 128
-; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    lui s5, 16
-; RV64-NEXT:    vand.vx v12, v8, s5
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 3
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 128
-; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    lui s5, 32
-; RV64-NEXT:    vand.vx v12, v8, s5
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 5
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 128
-; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    lui s5, 64
-; RV64-NEXT:    vand.vx v12, v8, s5
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 3
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 128
-; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    lui s5, 128
-; RV64-NEXT:    vand.vx v12, v8, s5
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 3
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 3
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 128
-; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    lui s5, 256
-; RV64-NEXT:    vand.vx v12, v8, s5
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 3
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 128
-; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    lui s5, 512
-; RV64-NEXT:    vand.vx v12, v8, s5
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 4
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 3
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 128
-; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    lui s5, 1024
-; RV64-NEXT:    vand.vx v12, v8, s5
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 4
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 128
-; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    lui s5, 2048
-; RV64-NEXT:    vand.vx v12, v8, s5
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 3
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 4
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 128
-; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    lui s5, 4096
-; RV64-NEXT:    vand.vx v12, v8, s5
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 5
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 128
-; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    lui s5, 8192
-; RV64-NEXT:    vand.vx v12, v8, s5
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 7
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 128
-; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    lui s5, 16384
-; RV64-NEXT:    vand.vx v12, v8, s5
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 128
-; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    lui s5, 32768
-; RV64-NEXT:    vand.vx v12, v8, s5
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 3
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 128
-; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    lui s5, 65536
-; RV64-NEXT:    vand.vx v12, v8, s5
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 128
-; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    lui s5, 131072
-; RV64-NEXT:    vand.vx v12, v8, s5
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 4
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 128
-; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    lui s5, 262144
-; RV64-NEXT:    vand.vx v12, v8, s5
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 128
-; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    ld s5, 104(sp) # 8-byte Folded Reload
-; RV64-NEXT:    vand.vx v12, v8, s5
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 3
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 128
-; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    ld s5, 96(sp) # 8-byte Folded Reload
-; RV64-NEXT:    vand.vx v12, v8, s5
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 3
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 128
-; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    ld s5, 88(sp) # 8-byte Folded Reload
-; RV64-NEXT:    vand.vx v12, v8, s5
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 5
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 128
-; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    ld s5, 80(sp) # 8-byte Folded Reload
-; RV64-NEXT:    vand.vx v12, v8, s5
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 128
-; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    ld s5, 72(sp) # 8-byte Folded Reload
-; RV64-NEXT:    vand.vx v12, v8, s5
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 3
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 128
-; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    ld s5, 64(sp) # 8-byte Folded Reload
-; RV64-NEXT:    vand.vx v12, v8, s5
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 128
-; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    ld s5, 56(sp) # 8-byte Folded Reload
-; RV64-NEXT:    vand.vx v12, v8, s5
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 4
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 128
-; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    ld s5, 48(sp) # 8-byte Folded Reload
-; RV64-NEXT:    vand.vx v12, v8, s5
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 2
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 1
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    slli a5, a5, 3
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 128
-; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    ld s5, 40(sp) # 8-byte Folded Reload
-; RV64-NEXT:    vand.vx v12, v8, s5
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 3
-; RV64-NEXT:    mv s5, a5
-; RV64-NEXT:    slli a5, a5, 3
-; RV64-NEXT:    add a5, a5, s5
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 128
-; RV64-NEXT:    vs4r.v v12, (a5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    ld s5, 32(sp) # 8-byte Folded Reload
-; RV64-NEXT:    vand.vx v12, v8, s5
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr s5, vlenb
-; RV64-NEXT:    slli s5, s5, 2
-; RV64-NEXT:    mv a5, s5
-; RV64-NEXT:    slli s5, s5, 4
-; RV64-NEXT:    add s5, s5, a5
-; RV64-NEXT:    ld a5, 8(sp) # 8-byte Folded Reload
-; RV64-NEXT:    add s5, sp, s5
-; RV64-NEXT:    addi s5, s5, 128
-; RV64-NEXT:    vs4r.v v12, (s5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    ld s5, 24(sp) # 8-byte Folded Reload
-; RV64-NEXT:    vand.vx v12, v8, s5
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr s5, vlenb
-; RV64-NEXT:    slli s5, s5, 6
-; RV64-NEXT:    add s5, sp, s5
-; RV64-NEXT:    addi s5, s5, 128
-; RV64-NEXT:    vs4r.v v12, (s5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    vand.vx v12, v8, s6
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr s5, vlenb
-; RV64-NEXT:    slli s5, s5, 2
-; RV64-NEXT:    mv s6, s5
-; RV64-NEXT:    slli s5, s5, 1
-; RV64-NEXT:    add s6, s6, s5
-; RV64-NEXT:    slli s5, s5, 1
-; RV64-NEXT:    add s6, s6, s5
-; RV64-NEXT:    slli s5, s5, 1
-; RV64-NEXT:    add s5, s5, s6
-; RV64-NEXT:    add s5, sp, s5
-; RV64-NEXT:    addi s5, s5, 128
-; RV64-NEXT:    vs4r.v v12, (s5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    vand.vx v12, v8, s7
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr s5, vlenb
-; RV64-NEXT:    slli s5, s5, 3
-; RV64-NEXT:    mv s6, s5
-; RV64-NEXT:    slli s5, s5, 1
-; RV64-NEXT:    add s6, s6, s5
-; RV64-NEXT:    slli s5, s5, 1
-; RV64-NEXT:    add s5, s5, s6
-; RV64-NEXT:    add s5, sp, s5
-; RV64-NEXT:    addi s5, s5, 128
-; RV64-NEXT:    vs4r.v v12, (s5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    vand.vx v12, v8, s8
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr s5, vlenb
-; RV64-NEXT:    slli s5, s5, 2
-; RV64-NEXT:    mv s6, s5
-; RV64-NEXT:    slli s5, s5, 2
-; RV64-NEXT:    add s6, s6, s5
-; RV64-NEXT:    slli s5, s5, 1
-; RV64-NEXT:    add s5, s5, s6
-; RV64-NEXT:    add s5, sp, s5
-; RV64-NEXT:    addi s5, s5, 128
-; RV64-NEXT:    vs4r.v v12, (s5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    vand.vx v12, v8, s10
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr s5, vlenb
-; RV64-NEXT:    slli s5, s5, 4
-; RV64-NEXT:    mv s6, s5
-; RV64-NEXT:    slli s5, s5, 1
-; RV64-NEXT:    add s5, s5, s6
-; RV64-NEXT:    add s5, sp, s5
-; RV64-NEXT:    addi s5, s5, 128
-; RV64-NEXT:    vs4r.v v12, (s5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    vand.vx v12, v8, s11
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr s5, vlenb
-; RV64-NEXT:    slli s5, s5, 2
-; RV64-NEXT:    mv s6, s5
-; RV64-NEXT:    slli s5, s5, 1
-; RV64-NEXT:    add s6, s6, s5
-; RV64-NEXT:    slli s5, s5, 2
-; RV64-NEXT:    add s5, s5, s6
-; RV64-NEXT:    add s5, sp, s5
-; RV64-NEXT:    addi s5, s5, 128
-; RV64-NEXT:    vs4r.v v12, (s5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    vand.vx v12, v8, ra
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr s5, vlenb
-; RV64-NEXT:    slli s5, s5, 3
-; RV64-NEXT:    mv s6, s5
-; RV64-NEXT:    slli s5, s5, 2
-; RV64-NEXT:    add s5, s5, s6
-; RV64-NEXT:    add s5, sp, s5
-; RV64-NEXT:    addi s5, s5, 128
-; RV64-NEXT:    vs4r.v v12, (s5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    vand.vx v12, v8, s3
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr s3, vlenb
-; RV64-NEXT:    slli s3, s3, 2
-; RV64-NEXT:    mv s5, s3
-; RV64-NEXT:    slli s3, s3, 3
-; RV64-NEXT:    add s3, s3, s5
-; RV64-NEXT:    add s3, sp, s3
-; RV64-NEXT:    addi s3, s3, 128
-; RV64-NEXT:    vs4r.v v12, (s3) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    vand.vx v12, v8, s2
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr s2, vlenb
-; RV64-NEXT:    slli s2, s2, 5
-; RV64-NEXT:    add s2, sp, s2
-; RV64-NEXT:    addi s2, s2, 128
-; RV64-NEXT:    vs4r.v v12, (s2) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    vand.vx v12, v8, s1
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr s1, vlenb
-; RV64-NEXT:    slli s1, s1, 2
-; RV64-NEXT:    mv s2, s1
-; RV64-NEXT:    slli s1, s1, 1
-; RV64-NEXT:    add s2, s2, s1
-; RV64-NEXT:    slli s1, s1, 1
-; RV64-NEXT:    add s1, s1, s2
-; RV64-NEXT:    add s1, sp, s1
-; RV64-NEXT:    addi s1, s1, 128
-; RV64-NEXT:    vs4r.v v12, (s1) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    vand.vx v12, v8, t6
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr t6, vlenb
-; RV64-NEXT:    slli t6, t6, 3
-; RV64-NEXT:    mv s1, t6
-; RV64-NEXT:    slli t6, t6, 1
-; RV64-NEXT:    add t6, t6, s1
-; RV64-NEXT:    add t6, sp, t6
-; RV64-NEXT:    addi t6, t6, 128
-; RV64-NEXT:    vs4r.v v12, (t6) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    vand.vx v12, v8, t5
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr t5, vlenb
-; RV64-NEXT:    slli t5, t5, 2
-; RV64-NEXT:    mv t6, t5
-; RV64-NEXT:    slli t5, t5, 2
-; RV64-NEXT:    add t5, t5, t6
-; RV64-NEXT:    add t5, sp, t5
-; RV64-NEXT:    addi t5, t5, 128
-; RV64-NEXT:    vs4r.v v12, (t5) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    vand.vx v12, v8, t4
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr t4, vlenb
-; RV64-NEXT:    slli t4, t4, 4
-; RV64-NEXT:    add t4, sp, t4
-; RV64-NEXT:    addi t4, t4, 128
-; RV64-NEXT:    vs4r.v v12, (t4) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    vand.vx v12, v8, t3
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr t3, vlenb
-; RV64-NEXT:    slli t3, t3, 2
-; RV64-NEXT:    mv t4, t3
-; RV64-NEXT:    slli t3, t3, 1
-; RV64-NEXT:    add t3, t3, t4
-; RV64-NEXT:    add t3, sp, t3
-; RV64-NEXT:    addi t3, t3, 128
-; RV64-NEXT:    vs4r.v v12, (t3) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    vand.vx v12, v8, t2
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr t2, vlenb
-; RV64-NEXT:    slli t2, t2, 3
-; RV64-NEXT:    add t2, sp, t2
-; RV64-NEXT:    addi t2, t2, 128
-; RV64-NEXT:    vs4r.v v12, (t2) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    vand.vx v12, v8, t1
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    csrr t1, vlenb
-; RV64-NEXT:    slli t1, t1, 2
-; RV64-NEXT:    add t1, sp, t1
-; RV64-NEXT:    addi t1, t1, 128
-; RV64-NEXT:    vs4r.v v12, (t1) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    vand.vx v12, v8, t0
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    addi t0, sp, 128
-; RV64-NEXT:    vs4r.v v12, (t0) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    vand.vx v12, v8, a4
-; RV64-NEXT:    vmul.vv v28, v8, v12
-; RV64-NEXT:    vand.vx v12, v8, a3
-; RV64-NEXT:    vmul.vv v24, v8, v12
-; RV64-NEXT:    vand.vx v12, v8, a2
-; RV64-NEXT:    vmul.vv v20, v8, v12
-; RV64-NEXT:    vand.vx v12, v8, a1
-; RV64-NEXT:    vmul.vv v16, v8, v12
-; RV64-NEXT:    vand.vx v12, v8, s9
-; RV64-NEXT:    vmul.vv v12, v8, v12
-; RV64-NEXT:    vand.vx v0, v8, a0
-; RV64-NEXT:    vmul.vv v8, v8, v0
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl4r.v v0, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 5
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v4, v0
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 4
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 4
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 6
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 4
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 5
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 4
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 4
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 4
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 5
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 7
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 4
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 5
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 4
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 4
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 6
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 4
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 5
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 4
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v4
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v0, v0, v4
-; RV64-NEXT:    addi a0, sp, 128
-; RV64-NEXT:    vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
-; RV64-NEXT:    vxor.vv v4, v0, v4
-; RV64-NEXT:    vxor.vv v28, v4, v28
-; RV64-NEXT:    vxor.vv v24, v28, v24
-; RV64-NEXT:    vxor.vv v20, v24, v20
-; RV64-NEXT:    vxor.vv v16, v20, v16
-; RV64-NEXT:    vxor.vv v12, v16, v12
-; RV64-NEXT:    vxor.vv v8, v12, v8
-; RV64-NEXT:    li a0, 56
-; RV64-NEXT:    vsrl.vx v12, v8, a0
-; RV64-NEXT:    li a1, 40
-; RV64-NEXT:    vsrl.vx v16, v8, a1
-; RV64-NEXT:    vsrl.vi v20, v8, 24
-; RV64-NEXT:    vsrl.vi v24, v8, 8
-; RV64-NEXT:    vand.vx v16, v16, s4
-; RV64-NEXT:    vor.vv v12, v16, v12
-; RV64-NEXT:    vand.vx v16, v8, s0
-; RV64-NEXT:    lui a2, 4080
-; RV64-NEXT:    vand.vx v20, v20, a2
-; RV64-NEXT:    vand.vx v24, v24, s0
-; RV64-NEXT:    vor.vv v20, v24, v20
-; RV64-NEXT:    vand.vx v24, v8, a2
-; RV64-NEXT:    vsll.vi v16, v16, 8
-; RV64-NEXT:    vsll.vi v24, v24, 24
-; RV64-NEXT:    vor.vv v16, v24, v16
-; RV64-NEXT:    vsll.vx v24, v8, a0
-; RV64-NEXT:    vand.vx v8, v8, s4
-; RV64-NEXT:    vsll.vx v8, v8, a1
-; RV64-NEXT:    vor.vv v8, v24, v8
-; RV64-NEXT:    vor.vv v12, v20, v12
-; RV64-NEXT:    vor.vv v8, v8, v16
-; RV64-NEXT:    vor.vv v8, v8, v12
-; RV64-NEXT:    vsrl.vi v12, v8, 4
-; RV64-NEXT:    vand.vx v8, v8, a7
-; RV64-NEXT:    vand.vx v12, v12, a7
-; RV64-NEXT:    vsll.vi v8, v8, 4
-; RV64-NEXT:    vor.vv v8, v12, v8
-; RV64-NEXT:    vsrl.vi v12, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a6
-; RV64-NEXT:    vand.vx v12, v12, a6
-; RV64-NEXT:    vsll.vi v8, v8, 2
-; RV64-NEXT:    vor.vv v8, v12, v8
-; RV64-NEXT:    vsrl.vi v12, v8, 1
-; RV64-NEXT:    vand.vx v8, v8, a5
-; RV64-NEXT:    vand.vx v12, v12, a5
-; RV64-NEXT:    vadd.vv v8, v8, v8
-; RV64-NEXT:    vor.vv v8, v12, v8
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a1, a1, a0
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add sp, sp, a0
-; RV64-NEXT:    ld ra, 232(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s0, 224(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s1, 216(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s2, 208(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s3, 200(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s4, 192(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s5, 184(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s6, 176(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s7, 168(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s8, 160(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s9, 152(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s10, 144(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s11, 136(sp) # 8-byte Folded Reload
-; RV64-NEXT:    addi sp, sp, 240
-; RV64-NEXT:    ret
-  %a = call <8 x i64> @llvm.clmulr.v8i64(<8 x i64> %x, <8 x i64> %y)
-  ret <8 x i64> %a
-}

>From 22cdaedfca60f09841b34c93ddd26fc19118b546 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Thu, 20 Nov 2025 10:28:07 +0000
Subject: [PATCH 4/5] [ISel] DAGCombine clmul -> clmul[hr]

---
 llvm/include/llvm/CodeGen/SDPatternMatch.h    |    5 +
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp |   26 +-
 .../CodeGen/SelectionDAG/TargetLowering.cpp   |   25 +-
 llvm/test/CodeGen/RISCV/clmul.ll              | 8265 +++++++++++++++++
 4 files changed, 8314 insertions(+), 7 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/SDPatternMatch.h b/llvm/include/llvm/CodeGen/SDPatternMatch.h
index a81b91e338cb8..445e025861fd2 100644
--- a/llvm/include/llvm/CodeGen/SDPatternMatch.h
+++ b/llvm/include/llvm/CodeGen/SDPatternMatch.h
@@ -919,6 +919,11 @@ inline BinaryOpc_match<LHS, RHS> m_Rotr(const LHS &L, const RHS &R) {
   return BinaryOpc_match<LHS, RHS>(ISD::ROTR, L, R);
 }
 
+template <typename LHS, typename RHS>
+inline BinaryOpc_match<LHS, RHS> m_Clmul(const LHS &L, const RHS &R) {
+  return BinaryOpc_match<LHS, RHS>(ISD::CLMUL, L, R);
+}
+
 template <typename LHS, typename RHS>
 inline BinaryOpc_match<LHS, RHS, true> m_FAdd(const LHS &L, const RHS &R) {
   return BinaryOpc_match<LHS, RHS, true>(ISD::FADD, L, R);
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 94afdc5db6613..906f4b2a8ec39 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -10315,6 +10315,25 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N) {
   if (SDValue R = combineShiftOfShiftedLogic(N, DAG))
     return R;
 
+  // Fold clmul(zext(x), zext(y)) >> (BW - 1 | BW) -> clmul(r|h)(x, y).
+  SDLoc DL(N);
+  EVT VT = N->getValueType(0);
+  SDValue X, Y;
+  if (sd_match(N, m_Srl(m_Clmul(m_ZExt(m_Value(X)), m_ZExt(m_Value(Y))),
+                        m_SpecificInt(VT.getScalarSizeInBits() / 2 - 1))))
+    return DAG.getNode(ISD::ZERO_EXTEND, DL, VT,
+                       DAG.getNode(ISD::CLMULR, DL, X.getValueType(), X, Y));
+  if (sd_match(N, m_Srl(m_Clmul(m_ZExt(m_Value(X)), m_ZExt(m_Value(Y))),
+                        m_SpecificInt(VT.getScalarSizeInBits() / 2))))
+    return DAG.getNode(ISD::ZERO_EXTEND, DL, VT,
+                       DAG.getNode(ISD::CLMULH, DL, X.getValueType(), X, Y));
+
+  // Fold bitreverse(clmul(bitreverse(x), bitreverse(y))) >> 1 -> clmulh(x, y).
+  if (sd_match(N, m_Srl(m_BitReverse(m_Clmul(m_BitReverse(m_Value(X)),
+                                             m_BitReverse(m_Value(Y)))),
+                        m_SpecificInt(1))))
+    return DAG.getNode(ISD::CLMULH, DL, VT, X, Y);
+
   // We want to pull some binops through shifts, so that we have (and (shift))
   // instead of (shift (and)), likewise for add, or, xor, etc.  This sort of
   // thing happens with address calculations, so it's important to canonicalize
@@ -10350,8 +10369,6 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N) {
     return SDValue();
 
   // Attempt to fold the constants, shifting the binop RHS by the shift amount.
-  SDLoc DL(N);
-  EVT VT = N->getValueType(0);
   if (SDValue NewRHS = DAG.FoldConstantArithmetic(
           N->getOpcode(), DL, VT, {LHS.getOperand(1), N->getOperand(1)})) {
     SDValue NewShift = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(0),
@@ -11771,6 +11788,11 @@ SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
       sd_match(N, m_BitReverse(m_Shl(m_BitReverse(m_Value(X)), m_Value(Y)))))
     return DAG.getNode(ISD::SRL, DL, VT, X, Y);
 
+  // fold bitreverse(clmul(bitreverse(x), bitreverse(y))) -> clmulr(x, y)
+  if (sd_match(N, m_BitReverse(m_Clmul(m_BitReverse(m_Value(X)),
+                                       m_BitReverse(m_Value(Y))))))
+    return DAG.getNode(ISD::CLMULR, DL, VT, X, Y);
+
   return SDValue();
 }
 
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 79627466bad0d..0014bcf60c0e6 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -8308,13 +8308,14 @@ SDValue TargetLowering::expandCLMUL(SDNode *Node, SelectionDAG &DAG) const {
   SDValue X = Node->getOperand(0);
   SDValue Y = Node->getOperand(1);
   unsigned BW = VT.getScalarSizeInBits();
+  unsigned Opcode = Node->getOpcode();
 
-  if (VT.isVector() && isOperationLegalOrCustomOrPromote(
-                           Node->getOpcode(), VT.getVectorElementType()))
+  if (VT.isVector() &&
+      isOperationLegalOrCustomOrPromote(Opcode, VT.getVectorElementType()))
     return DAG.UnrollVectorOp(Node);
 
   SDValue Res = DAG.getConstant(0, DL, VT);
-  switch (Node->getOpcode()) {
+  switch (Opcode) {
   case ISD::CLMUL: {
     for (unsigned I = 0; I < BW; ++I) {
       SDValue Mask = DAG.getConstant(APInt::getOneBitSet(BW, I), DL, VT);
@@ -8327,12 +8328,26 @@ SDValue TargetLowering::expandCLMUL(SDNode *Node, SelectionDAG &DAG) const {
   case ISD::CLMULR:
   case ISD::CLMULH: {
     EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), 2 * BW);
+    // For example, ExtVT = i64 based operations aren't legal on rv32; use
+    // bitreverse-based lowering in this case.
+    if (!isOperationLegalOrCustom(ISD::ZERO_EXTEND, ExtVT) ||
+        !isOperationLegalOrCustom(ISD::SRL, ExtVT)) {
+      SDValue XRev = DAG.getNode(ISD::BITREVERSE, DL, VT, X);
+      SDValue YRev = DAG.getNode(ISD::BITREVERSE, DL, VT, Y);
+      SDValue ClMul = DAG.getNode(ISD::CLMUL, DL, VT, XRev, YRev);
+      Res = DAG.getNode(ISD::BITREVERSE, DL, VT, ClMul);
+      Res = Opcode == ISD::CLMULR
+                ? Res
+                : DAG.getNode(ISD::SRL, DL, VT, Res,
+                              DAG.getShiftAmountConstant(1, VT, DL));
+      break;
+    }
     SDValue XExt = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVT, X);
     SDValue YExt = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVT, Y);
     SDValue ClMul = DAG.getNode(ISD::CLMUL, DL, ExtVT, XExt, YExt);
-    unsigned ShtAmt = Node->getOpcode() == ISD::CLMULR ? BW - 1 : BW;
+    unsigned ShtAmt = Opcode == ISD::CLMULR ? BW - 1 : BW;
     SDValue HiBits = DAG.getNode(ISD::SRL, DL, ExtVT, ClMul,
-                                 DAG.getShiftAmountConstant(ShtAmt, VT, DL));
+                                 DAG.getShiftAmountConstant(ShtAmt, ExtVT, DL));
     Res = DAG.getNode(ISD::TRUNCATE, DL, VT, HiBits);
     break;
   }
diff --git a/llvm/test/CodeGen/RISCV/clmul.ll b/llvm/test/CodeGen/RISCV/clmul.ll
index da4f4d3075133..429d34a0f9851 100644
--- a/llvm/test/CodeGen/RISCV/clmul.ll
+++ b/llvm/test/CodeGen/RISCV/clmul.ll
@@ -3241,3 +3241,8268 @@ define i16 @clmul_constfold_i16() nounwind {
   %res = call i16 @llvm.clmul.i16(i16 -2, i16 -1)
   ret i16 %res
 }
+
+define i4 @clmulr_i4(i4 %a, i4 %b) nounwind {
+; RV32IM-LABEL: clmulr_i4:
+; RV32IM:       # %bb.0:
+; RV32IM-NEXT:    addi sp, sp, -144
+; RV32IM-NEXT:    sw ra, 140(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s0, 136(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s1, 132(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s2, 128(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s3, 124(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s4, 120(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s5, 116(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s6, 112(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s7, 108(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s8, 104(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s9, 100(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s10, 96(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s11, 92(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    srli t0, a0, 8
+; RV32IM-NEXT:    lui a3, 16
+; RV32IM-NEXT:    srli t1, a0, 24
+; RV32IM-NEXT:    slli a2, a0, 24
+; RV32IM-NEXT:    lui t2, 61681
+; RV32IM-NEXT:    lui t3, 209715
+; RV32IM-NEXT:    lui a7, 349525
+; RV32IM-NEXT:    srli t4, a1, 8
+; RV32IM-NEXT:    srli t5, a1, 24
+; RV32IM-NEXT:    slli a4, a1, 24
+; RV32IM-NEXT:    li t6, 1
+; RV32IM-NEXT:    lui s0, 4
+; RV32IM-NEXT:    lui s1, 8
+; RV32IM-NEXT:    lui s2, 32
+; RV32IM-NEXT:    lui s3, 64
+; RV32IM-NEXT:    lui s5, 128
+; RV32IM-NEXT:    lui s6, 256
+; RV32IM-NEXT:    lui s7, 512
+; RV32IM-NEXT:    lui s8, 1024
+; RV32IM-NEXT:    lui s9, 2048
+; RV32IM-NEXT:    lui s10, 4096
+; RV32IM-NEXT:    lui s11, 8192
+; RV32IM-NEXT:    lui ra, 16384
+; RV32IM-NEXT:    addi s4, a3, -256
+; RV32IM-NEXT:    lui a5, 16
+; RV32IM-NEXT:    and t0, t0, s4
+; RV32IM-NEXT:    or a3, t0, t1
+; RV32IM-NEXT:    lui t0, 32768
+; RV32IM-NEXT:    and t1, t4, s4
+; RV32IM-NEXT:    or t4, t1, t5
+; RV32IM-NEXT:    lui a6, 65536
+; RV32IM-NEXT:    and a0, a0, s4
+; RV32IM-NEXT:    slli a0, a0, 8
+; RV32IM-NEXT:    or t5, a2, a0
+; RV32IM-NEXT:    lui a2, 131072
+; RV32IM-NEXT:    and a1, a1, s4
+; RV32IM-NEXT:    slli a1, a1, 8
+; RV32IM-NEXT:    or a0, a4, a1
+; RV32IM-NEXT:    lui a1, 262144
+; RV32IM-NEXT:    addi t2, t2, -241
+; RV32IM-NEXT:    addi t3, t3, 819
+; RV32IM-NEXT:    addi a7, a7, 1365
+; RV32IM-NEXT:    or a3, t5, a3
+; RV32IM-NEXT:    or a0, a0, t4
+; RV32IM-NEXT:    srli t4, a3, 4
+; RV32IM-NEXT:    and a3, a3, t2
+; RV32IM-NEXT:    srli t5, a0, 4
+; RV32IM-NEXT:    and a0, a0, t2
+; RV32IM-NEXT:    and t4, t4, t2
+; RV32IM-NEXT:    slli a3, a3, 4
+; RV32IM-NEXT:    and t5, t5, t2
+; RV32IM-NEXT:    slli a0, a0, 4
+; RV32IM-NEXT:    or a3, t4, a3
+; RV32IM-NEXT:    or a0, t5, a0
+; RV32IM-NEXT:    srli t4, a3, 2
+; RV32IM-NEXT:    and a3, a3, t3
+; RV32IM-NEXT:    srli t5, a0, 2
+; RV32IM-NEXT:    and a0, a0, t3
+; RV32IM-NEXT:    and t4, t4, t3
+; RV32IM-NEXT:    slli a3, a3, 2
+; RV32IM-NEXT:    and t5, t5, t3
+; RV32IM-NEXT:    slli a0, a0, 2
+; RV32IM-NEXT:    or a3, t4, a3
+; RV32IM-NEXT:    or a0, t5, a0
+; RV32IM-NEXT:    srli t4, a3, 1
+; RV32IM-NEXT:    and a3, a3, a7
+; RV32IM-NEXT:    srli t5, a0, 1
+; RV32IM-NEXT:    and a0, a0, a7
+; RV32IM-NEXT:    and t4, t4, a7
+; RV32IM-NEXT:    and a7, t5, a7
+; RV32IM-NEXT:    lui a4, 524288
+; RV32IM-NEXT:    slli t6, t6, 11
+; RV32IM-NEXT:    slli a3, a3, 1
+; RV32IM-NEXT:    slli a0, a0, 1
+; RV32IM-NEXT:    or a3, t4, a3
+; RV32IM-NEXT:    or a0, a7, a0
+; RV32IM-NEXT:    andi t5, a0, 2
+; RV32IM-NEXT:    andi t4, a0, 1
+; RV32IM-NEXT:    and t6, a0, t6
+; RV32IM-NEXT:    lui a7, 1
+; RV32IM-NEXT:    and a7, a0, a7
+; RV32IM-NEXT:    sw a7, 84(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lui a7, 2
+; RV32IM-NEXT:    and a7, a0, a7
+; RV32IM-NEXT:    sw a7, 80(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and s0, a0, s0
+; RV32IM-NEXT:    sw s0, 76(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and s1, a0, s1
+; RV32IM-NEXT:    and a5, a0, a5
+; RV32IM-NEXT:    sw a5, 72(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and s2, a0, s2
+; RV32IM-NEXT:    and a5, a0, s3
+; RV32IM-NEXT:    sw a5, 68(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a5, a0, s5
+; RV32IM-NEXT:    sw a5, 64(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a5, a0, s6
+; RV32IM-NEXT:    sw a5, 60(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and s7, a0, s7
+; RV32IM-NEXT:    and s8, a0, s8
+; RV32IM-NEXT:    and a5, a0, s9
+; RV32IM-NEXT:    sw a5, 56(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a5, a0, s10
+; RV32IM-NEXT:    sw a5, 52(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a5, a0, s11
+; RV32IM-NEXT:    sw a5, 48(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a5, a0, ra
+; RV32IM-NEXT:    sw a5, 44(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a5, a0, t0
+; RV32IM-NEXT:    sw a5, 40(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a5, a0, a6
+; RV32IM-NEXT:    sw a5, 36(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a2, a0, a2
+; RV32IM-NEXT:    sw a2, 32(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a1, a0, a1
+; RV32IM-NEXT:    sw a1, 28(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a4, a0, a4
+; RV32IM-NEXT:    sw a4, 24(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    andi a1, a0, 4
+; RV32IM-NEXT:    andi a2, a0, 8
+; RV32IM-NEXT:    andi a4, a0, 16
+; RV32IM-NEXT:    andi a5, a0, 32
+; RV32IM-NEXT:    andi a6, a0, 64
+; RV32IM-NEXT:    andi a7, a0, 128
+; RV32IM-NEXT:    andi t0, a0, 256
+; RV32IM-NEXT:    andi t1, a0, 512
+; RV32IM-NEXT:    andi a0, a0, 1024
+; RV32IM-NEXT:    mul t5, a3, t5
+; RV32IM-NEXT:    sw t5, 12(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul s10, a3, t4
+; RV32IM-NEXT:    mul a1, a3, a1
+; RV32IM-NEXT:    sw a1, 8(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul s6, a3, a2
+; RV32IM-NEXT:    mul s5, a3, a4
+; RV32IM-NEXT:    mul s3, a3, a5
+; RV32IM-NEXT:    mul a1, a3, a6
+; RV32IM-NEXT:    sw a1, 16(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul a1, a3, a7
+; RV32IM-NEXT:    sw a1, 88(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul s0, a3, t0
+; RV32IM-NEXT:    mul t5, a3, t1
+; RV32IM-NEXT:    mul s11, a3, a0
+; RV32IM-NEXT:    mul a0, a3, t6
+; RV32IM-NEXT:    sw a0, 20(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw a0, 84(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a0, a3, a0
+; RV32IM-NEXT:    sw a0, 84(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw a0, 80(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul t1, a3, a0
+; RV32IM-NEXT:    lw a0, 76(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a7, a3, a0
+; RV32IM-NEXT:    mul s1, a3, s1
+; RV32IM-NEXT:    lw a0, 72(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul ra, a3, a0
+; RV32IM-NEXT:    mul a0, a3, s2
+; RV32IM-NEXT:    sw a0, 76(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw a0, 68(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a0, a3, a0
+; RV32IM-NEXT:    sw a0, 80(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw a0, 64(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a4, a3, a0
+; RV32IM-NEXT:    lw a0, 60(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a2, a3, a0
+; RV32IM-NEXT:    mul a6, a3, s7
+; RV32IM-NEXT:    mul t4, a3, s8
+; RV32IM-NEXT:    lw a0, 56(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul s7, a3, a0
+; RV32IM-NEXT:    lw a0, 52(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a1, a3, a0
+; RV32IM-NEXT:    lw a0, 48(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a0, a3, a0
+; RV32IM-NEXT:    lw a5, 44(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a5, a3, a5
+; RV32IM-NEXT:    lw t0, 40(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul t0, a3, t0
+; RV32IM-NEXT:    lw t6, 36(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul t6, a3, t6
+; RV32IM-NEXT:    lw s2, 32(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul s2, a3, s2
+; RV32IM-NEXT:    lw s8, 28(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul s8, a3, s8
+; RV32IM-NEXT:    lw s9, 24(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a3, a3, s9
+; RV32IM-NEXT:    lw s9, 12(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor s10, s10, s9
+; RV32IM-NEXT:    lw s9, 8(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor s6, s9, s6
+; RV32IM-NEXT:    xor s3, s5, s3
+; RV32IM-NEXT:    xor t5, s0, t5
+; RV32IM-NEXT:    xor a7, t1, a7
+; RV32IM-NEXT:    xor a2, a4, a2
+; RV32IM-NEXT:    xor a0, a1, a0
+; RV32IM-NEXT:    xor a1, s10, s6
+; RV32IM-NEXT:    lw a4, 16(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a4, s3, a4
+; RV32IM-NEXT:    xor t1, t5, s11
+; RV32IM-NEXT:    xor a7, a7, s1
+; RV32IM-NEXT:    xor a2, a2, a6
+; RV32IM-NEXT:    xor a0, a0, a5
+; RV32IM-NEXT:    xor a1, a1, a4
+; RV32IM-NEXT:    lw a4, 20(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a4, t1, a4
+; RV32IM-NEXT:    xor a5, a7, ra
+; RV32IM-NEXT:    xor a2, a2, t4
+; RV32IM-NEXT:    xor a0, a0, t0
+; RV32IM-NEXT:    lw a6, 88(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a1, a1, a6
+; RV32IM-NEXT:    lw a6, 84(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a4, a4, a6
+; RV32IM-NEXT:    lw a6, 76(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a5, a5, a6
+; RV32IM-NEXT:    xor a2, a2, s7
+; RV32IM-NEXT:    xor a0, a0, t6
+; RV32IM-NEXT:    lw a6, 80(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a5, a5, a6
+; RV32IM-NEXT:    xor a0, a0, s2
+; RV32IM-NEXT:    xor a4, a1, a4
+; RV32IM-NEXT:    slli a1, a1, 24
+; RV32IM-NEXT:    xor a4, a4, a5
+; RV32IM-NEXT:    xor a0, a0, s8
+; RV32IM-NEXT:    xor a2, a4, a2
+; RV32IM-NEXT:    xor a0, a0, a3
+; RV32IM-NEXT:    and a3, a2, s4
+; RV32IM-NEXT:    srli a4, a2, 8
+; RV32IM-NEXT:    xor a0, a2, a0
+; RV32IM-NEXT:    slli a3, a3, 8
+; RV32IM-NEXT:    and a2, a4, s4
+; RV32IM-NEXT:    srli a0, a0, 24
+; RV32IM-NEXT:    or a1, a1, a3
+; RV32IM-NEXT:    or a0, a2, a0
+; RV32IM-NEXT:    or a0, a1, a0
+; RV32IM-NEXT:    srli a1, a0, 4
+; RV32IM-NEXT:    and a0, a0, t2
+; RV32IM-NEXT:    and a1, a1, t2
+; RV32IM-NEXT:    slli a0, a0, 4
+; RV32IM-NEXT:    or a0, a1, a0
+; RV32IM-NEXT:    srli a1, a0, 2
+; RV32IM-NEXT:    and a0, a0, t3
+; RV32IM-NEXT:    and a1, a1, t3
+; RV32IM-NEXT:    slli a0, a0, 2
+; RV32IM-NEXT:    or a0, a1, a0
+; RV32IM-NEXT:    andi a1, a0, 5
+; RV32IM-NEXT:    srli a0, a0, 1
+; RV32IM-NEXT:    slli a1, a1, 1
+; RV32IM-NEXT:    andi a0, a0, 5
+; RV32IM-NEXT:    or a0, a0, a1
+; RV32IM-NEXT:    lw ra, 140(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s0, 136(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s1, 132(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s2, 128(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s3, 124(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s4, 120(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s5, 116(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s6, 112(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s7, 108(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s8, 104(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s9, 100(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s10, 96(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s11, 92(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    addi sp, sp, 144
+; RV32IM-NEXT:    ret
+;
+; RV64IM-LABEL: clmulr_i4:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    addi sp, sp, -480
+; RV64IM-NEXT:    sd ra, 472(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s0, 464(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s1, 456(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s2, 448(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s3, 440(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s4, 432(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s5, 424(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s6, 416(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s7, 408(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s8, 400(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s9, 392(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s10, 384(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s11, 376(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    srli a4, a0, 24
+; RV64IM-NEXT:    srli a6, a0, 8
+; RV64IM-NEXT:    li s4, 255
+; RV64IM-NEXT:    srli a5, a0, 40
+; RV64IM-NEXT:    lui s8, 16
+; RV64IM-NEXT:    srli t1, a0, 56
+; RV64IM-NEXT:    srliw t3, a0, 24
+; RV64IM-NEXT:    slli t4, a0, 56
+; RV64IM-NEXT:    lui s3, 61681
+; RV64IM-NEXT:    lui t5, 209715
+; RV64IM-NEXT:    lui s6, 349525
+; RV64IM-NEXT:    srli s9, a1, 24
+; RV64IM-NEXT:    srli s0, a1, 8
+; RV64IM-NEXT:    srli a7, a1, 40
+; RV64IM-NEXT:    srli t2, a1, 56
+; RV64IM-NEXT:    srliw s11, a1, 24
+; RV64IM-NEXT:    slli a3, a1, 56
+; RV64IM-NEXT:    li t0, 1
+; RV64IM-NEXT:    lui s1, 128
+; RV64IM-NEXT:    lui s2, 256
+; RV64IM-NEXT:    lui t6, 4096
+; RV64IM-NEXT:    lui s5, 8192
+; RV64IM-NEXT:    lui s7, 4080
+; RV64IM-NEXT:    and a2, a4, s7
+; RV64IM-NEXT:    slli ra, s4, 24
+; RV64IM-NEXT:    addi s10, s8, -256
+; RV64IM-NEXT:    and a4, a6, ra
+; RV64IM-NEXT:    sd ra, 368(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    or a2, a4, a2
+; RV64IM-NEXT:    and a4, a0, s7
+; RV64IM-NEXT:    slli t3, t3, 32
+; RV64IM-NEXT:    addi s3, s3, -241
+; RV64IM-NEXT:    addi s4, t5, 819
+; RV64IM-NEXT:    addi s6, s6, 1365
+; RV64IM-NEXT:    and a6, s9, s7
+; RV64IM-NEXT:    and a5, a5, s10
+; RV64IM-NEXT:    or a5, a5, t1
+; RV64IM-NEXT:    and t1, a1, s7
+; RV64IM-NEXT:    slli t5, s11, 32
+; RV64IM-NEXT:    slli a4, a4, 24
+; RV64IM-NEXT:    or s9, a4, t3
+; RV64IM-NEXT:    slli a4, s3, 32
+; RV64IM-NEXT:    add s3, s3, a4
+; RV64IM-NEXT:    slli a4, s4, 32
+; RV64IM-NEXT:    add s4, s4, a4
+; RV64IM-NEXT:    slli a4, s6, 32
+; RV64IM-NEXT:    add s6, s6, a4
+; RV64IM-NEXT:    slli t3, t0, 11
+; RV64IM-NEXT:    and a4, s0, ra
+; RV64IM-NEXT:    or a4, a4, a6
+; RV64IM-NEXT:    slli s11, t0, 32
+; RV64IM-NEXT:    and a6, a7, s10
+; RV64IM-NEXT:    or a6, a6, t2
+; RV64IM-NEXT:    slli ra, t0, 33
+; RV64IM-NEXT:    slli t1, t1, 24
+; RV64IM-NEXT:    or a7, t1, t5
+; RV64IM-NEXT:    slli s0, t0, 34
+; RV64IM-NEXT:    or a2, a2, a5
+; RV64IM-NEXT:    slli a5, t0, 35
+; RV64IM-NEXT:    sd a5, 304(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s10, 344(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a0, a0, s10
+; RV64IM-NEXT:    slli a0, a0, 40
+; RV64IM-NEXT:    or a0, t4, a0
+; RV64IM-NEXT:    slli a5, t0, 36
+; RV64IM-NEXT:    sd a5, 296(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    or a4, a4, a6
+; RV64IM-NEXT:    slli a6, t0, 37
+; RV64IM-NEXT:    and a1, a1, s10
+; RV64IM-NEXT:    slli a1, a1, 40
+; RV64IM-NEXT:    or a1, a3, a1
+; RV64IM-NEXT:    slli a3, t0, 38
+; RV64IM-NEXT:    sd a3, 280(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    or a0, a0, s9
+; RV64IM-NEXT:    or a1, a1, a7
+; RV64IM-NEXT:    or a0, a0, a2
+; RV64IM-NEXT:    or a1, a1, a4
+; RV64IM-NEXT:    srli a2, a0, 4
+; RV64IM-NEXT:    sd s3, 360(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a0, a0, s3
+; RV64IM-NEXT:    srli a3, a1, 4
+; RV64IM-NEXT:    and a1, a1, s3
+; RV64IM-NEXT:    and a2, a2, s3
+; RV64IM-NEXT:    slli a0, a0, 4
+; RV64IM-NEXT:    and a3, a3, s3
+; RV64IM-NEXT:    slli a1, a1, 4
+; RV64IM-NEXT:    or a0, a2, a0
+; RV64IM-NEXT:    or a1, a3, a1
+; RV64IM-NEXT:    srli a2, a0, 2
+; RV64IM-NEXT:    sd s4, 352(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a0, a0, s4
+; RV64IM-NEXT:    srli a3, a1, 2
+; RV64IM-NEXT:    and a1, a1, s4
+; RV64IM-NEXT:    and a2, a2, s4
+; RV64IM-NEXT:    slli a0, a0, 2
+; RV64IM-NEXT:    and a3, a3, s4
+; RV64IM-NEXT:    slli a1, a1, 2
+; RV64IM-NEXT:    or a0, a2, a0
+; RV64IM-NEXT:    or a1, a3, a1
+; RV64IM-NEXT:    srli a2, a0, 1
+; RV64IM-NEXT:    and a0, a0, s6
+; RV64IM-NEXT:    srli a3, a1, 1
+; RV64IM-NEXT:    and a1, a1, s6
+; RV64IM-NEXT:    and a2, a2, s6
+; RV64IM-NEXT:    slli a0, a0, 1
+; RV64IM-NEXT:    and a3, a3, s6
+; RV64IM-NEXT:    slli a1, a1, 1
+; RV64IM-NEXT:    or a0, a2, a0
+; RV64IM-NEXT:    or s6, a3, a1
+; RV64IM-NEXT:    andi a1, s6, 2
+; RV64IM-NEXT:    andi a2, s6, 1
+; RV64IM-NEXT:    andi a3, s6, 4
+; RV64IM-NEXT:    andi a4, s6, 8
+; RV64IM-NEXT:    andi a5, s6, 16
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    xor a1, a2, a1
+; RV64IM-NEXT:    sd a1, 336(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    andi a1, s6, 32
+; RV64IM-NEXT:    mul a2, a0, a3
+; RV64IM-NEXT:    mul a3, a0, a4
+; RV64IM-NEXT:    xor a2, a2, a3
+; RV64IM-NEXT:    sd a2, 328(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    andi a2, s6, 256
+; RV64IM-NEXT:    mul a3, a0, a5
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    xor a1, a3, a1
+; RV64IM-NEXT:    sd a1, 320(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    andi a1, s6, 512
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    xor a1, a2, a1
+; RV64IM-NEXT:    sd a1, 312(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a7, t0, 39
+; RV64IM-NEXT:    lui a1, 2
+; RV64IM-NEXT:    and a1, s6, a1
+; RV64IM-NEXT:    lui a2, 4
+; RV64IM-NEXT:    and a2, s6, a2
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    xor a1, a1, a2
+; RV64IM-NEXT:    sd a1, 288(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, t0, 40
+; RV64IM-NEXT:    and a2, s6, s1
+; RV64IM-NEXT:    and a3, s6, s2
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    mul a3, a0, a3
+; RV64IM-NEXT:    xor a2, a2, a3
+; RV64IM-NEXT:    sd a2, 272(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a2, t0, 41
+; RV64IM-NEXT:    and a3, s6, t6
+; RV64IM-NEXT:    and a4, s6, s5
+; RV64IM-NEXT:    mul a3, a0, a3
+; RV64IM-NEXT:    mul a4, a0, a4
+; RV64IM-NEXT:    xor a3, a3, a4
+; RV64IM-NEXT:    sd a3, 264(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a3, t0, 48
+; RV64IM-NEXT:    and a4, s6, s11
+; RV64IM-NEXT:    and a5, s6, ra
+; RV64IM-NEXT:    mul a4, a0, a4
+; RV64IM-NEXT:    mul a5, a0, a5
+; RV64IM-NEXT:    xor a4, a4, a5
+; RV64IM-NEXT:    sd a4, 256(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a4, t0, 49
+; RV64IM-NEXT:    and a1, s6, a1
+; RV64IM-NEXT:    and a2, s6, a2
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    xor a1, a1, a2
+; RV64IM-NEXT:    sd a1, 248(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, t0, 56
+; RV64IM-NEXT:    and a2, s6, a3
+; RV64IM-NEXT:    and a3, s6, a4
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    mul a3, a0, a3
+; RV64IM-NEXT:    xor a2, a2, a3
+; RV64IM-NEXT:    sd a2, 240(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a2, t0, 57
+; RV64IM-NEXT:    and a1, s6, a1
+; RV64IM-NEXT:    and a2, s6, a2
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    xor a1, a1, a2
+; RV64IM-NEXT:    sd a1, 232(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a2, t0, 42
+; RV64IM-NEXT:    slli ra, t0, 43
+; RV64IM-NEXT:    slli a4, t0, 44
+; RV64IM-NEXT:    slli t6, t0, 45
+; RV64IM-NEXT:    slli s1, t0, 46
+; RV64IM-NEXT:    slli s2, t0, 47
+; RV64IM-NEXT:    slli s3, t0, 50
+; RV64IM-NEXT:    slli s4, t0, 51
+; RV64IM-NEXT:    slli s5, t0, 52
+; RV64IM-NEXT:    slli a1, t0, 53
+; RV64IM-NEXT:    sd a1, 224(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, t0, 54
+; RV64IM-NEXT:    sd a1, 216(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, t0, 55
+; RV64IM-NEXT:    sd a1, 208(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, t0, 58
+; RV64IM-NEXT:    sd a1, 200(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, t0, 59
+; RV64IM-NEXT:    sd a1, 176(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, t0, 60
+; RV64IM-NEXT:    sd a1, 136(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, t0, 61
+; RV64IM-NEXT:    sd a1, 104(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli t0, t0, 62
+; RV64IM-NEXT:    sd t0, 80(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a1, s6, t3
+; RV64IM-NEXT:    sd a1, 192(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui a3, 1
+; RV64IM-NEXT:    and a1, s6, a3
+; RV64IM-NEXT:    sd a1, 184(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui a3, 8
+; RV64IM-NEXT:    and a1, s6, a3
+; RV64IM-NEXT:    sd a1, 168(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a1, s6, s8
+; RV64IM-NEXT:    sd a1, 160(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui s9, 32
+; RV64IM-NEXT:    and a1, s6, s9
+; RV64IM-NEXT:    sd a1, 152(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui s11, 64
+; RV64IM-NEXT:    and a1, s6, s11
+; RV64IM-NEXT:    sd a1, 144(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui s10, 512
+; RV64IM-NEXT:    and a1, s6, s10
+; RV64IM-NEXT:    sd a1, 128(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui s8, 1024
+; RV64IM-NEXT:    and a1, s6, s8
+; RV64IM-NEXT:    sd a1, 120(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui s7, 2048
+; RV64IM-NEXT:    and a1, s6, s7
+; RV64IM-NEXT:    sd a1, 112(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui t1, 16384
+; RV64IM-NEXT:    and a1, s6, t1
+; RV64IM-NEXT:    sd a1, 96(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui t2, 32768
+; RV64IM-NEXT:    and t2, s6, t2
+; RV64IM-NEXT:    lui t3, 65536
+; RV64IM-NEXT:    and a1, s6, t3
+; RV64IM-NEXT:    sd a1, 88(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui t4, 131072
+; RV64IM-NEXT:    and a5, s6, t4
+; RV64IM-NEXT:    lui t5, 262144
+; RV64IM-NEXT:    and t0, s6, t5
+; RV64IM-NEXT:    and s11, s6, s0
+; RV64IM-NEXT:    ld a1, 304(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a1, s6, a1
+; RV64IM-NEXT:    sd a1, 72(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 296(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a1, s6, a1
+; RV64IM-NEXT:    sd a1, 64(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a1, s6, a6
+; RV64IM-NEXT:    sd a1, 56(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 280(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a1, s6, a1
+; RV64IM-NEXT:    sd a1, 280(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a1, s6, a7
+; RV64IM-NEXT:    sd a1, 48(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a1, s6, a2
+; RV64IM-NEXT:    sd a1, 40(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and ra, s6, ra
+; RV64IM-NEXT:    and a1, s6, a4
+; RV64IM-NEXT:    sd a1, 32(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a1, s6, t6
+; RV64IM-NEXT:    sd a1, 24(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a1, s6, s1
+; RV64IM-NEXT:    sd a1, 16(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a1, s6, s2
+; RV64IM-NEXT:    sd a1, 8(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a1, s6, s3
+; RV64IM-NEXT:    sd a1, 0(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and s0, s6, s4
+; RV64IM-NEXT:    and s1, s6, s5
+; RV64IM-NEXT:    ld a1, 224(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s2, s6, a1
+; RV64IM-NEXT:    ld a1, 216(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s3, s6, a1
+; RV64IM-NEXT:    ld a1, 208(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s4, s6, a1
+; RV64IM-NEXT:    ld a1, 200(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s5, s6, a1
+; RV64IM-NEXT:    ld a1, 176(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s7, s6, a1
+; RV64IM-NEXT:    ld a1, 136(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s8, s6, a1
+; RV64IM-NEXT:    ld a1, 104(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s9, s6, a1
+; RV64IM-NEXT:    ld a1, 80(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s10, s6, a1
+; RV64IM-NEXT:    andi a1, s6, 64
+; RV64IM-NEXT:    andi a2, s6, 128
+; RV64IM-NEXT:    andi a3, s6, 1024
+; RV64IM-NEXT:    srliw a4, s6, 31
+; RV64IM-NEXT:    srli s6, s6, 63
+; RV64IM-NEXT:    mul t4, a0, a1
+; RV64IM-NEXT:    mul a1, a0, a2
+; RV64IM-NEXT:    sd a1, 224(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul t3, a0, a3
+; RV64IM-NEXT:    ld a1, 192(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 136(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 184(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 216(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 168(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul t1, a0, a1
+; RV64IM-NEXT:    ld a1, 160(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 104(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 152(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 192(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 144(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 304(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 128(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a7, a0, a1
+; RV64IM-NEXT:    ld a1, 120(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul t6, a0, a1
+; RV64IM-NEXT:    ld a1, 112(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 176(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 96(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a6, a0, a1
+; RV64IM-NEXT:    mul t5, a0, t2
+; RV64IM-NEXT:    ld a1, 88(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 160(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul a1, a0, a5
+; RV64IM-NEXT:    sd a1, 184(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul a1, a0, t0
+; RV64IM-NEXT:    sd a1, 296(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a4, a4, 31
+; RV64IM-NEXT:    mul a3, a0, s11
+; RV64IM-NEXT:    ld a1, 72(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul t0, a0, a1
+; RV64IM-NEXT:    ld a1, 64(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 144(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 56(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 152(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 280(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 208(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 48(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 280(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 40(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a2, a0, a1
+; RV64IM-NEXT:    mul a5, a0, ra
+; RV64IM-NEXT:    ld a1, 32(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul t2, a0, a1
+; RV64IM-NEXT:    ld a1, 24(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul s11, a0, a1
+; RV64IM-NEXT:    ld a1, 16(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul ra, a0, a1
+; RV64IM-NEXT:    ld a1, 8(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 200(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 0(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    mul s0, a0, s0
+; RV64IM-NEXT:    mul s1, a0, s1
+; RV64IM-NEXT:    mul s2, a0, s2
+; RV64IM-NEXT:    mul s3, a0, s3
+; RV64IM-NEXT:    mul s4, a0, s4
+; RV64IM-NEXT:    sd s4, 168(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul s5, a0, s5
+; RV64IM-NEXT:    mul s7, a0, s7
+; RV64IM-NEXT:    mul s8, a0, s8
+; RV64IM-NEXT:    mul s9, a0, s9
+; RV64IM-NEXT:    mul s10, a0, s10
+; RV64IM-NEXT:    slli s6, s6, 63
+; RV64IM-NEXT:    mul a4, a0, a4
+; RV64IM-NEXT:    mul a0, a0, s6
+; RV64IM-NEXT:    ld s6, 336(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s4, 328(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor s6, s6, s4
+; RV64IM-NEXT:    ld s4, 320(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t4, s4, t4
+; RV64IM-NEXT:    ld s4, 312(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t3, s4, t3
+; RV64IM-NEXT:    ld s4, 288(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t1, s4, t1
+; RV64IM-NEXT:    ld s4, 272(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a7, s4, a7
+; RV64IM-NEXT:    ld s4, 264(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a6, s4, a6
+; RV64IM-NEXT:    ld s4, 256(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a3, s4, a3
+; RV64IM-NEXT:    ld s4, 248(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a2, s4, a2
+; RV64IM-NEXT:    ld s4, 240(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a1, s4, a1
+; RV64IM-NEXT:    ld s4, 232(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor s5, s4, s5
+; RV64IM-NEXT:    xor t4, s6, t4
+; RV64IM-NEXT:    ld s4, 136(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t3, t3, s4
+; RV64IM-NEXT:    ld s4, 104(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t1, t1, s4
+; RV64IM-NEXT:    xor a7, a7, t6
+; RV64IM-NEXT:    xor a6, a6, t5
+; RV64IM-NEXT:    xor a3, a3, t0
+; RV64IM-NEXT:    xor a2, a2, a5
+; RV64IM-NEXT:    xor a1, a1, s0
+; RV64IM-NEXT:    xor a5, s5, s7
+; RV64IM-NEXT:    ld t0, 224(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t0, t4, t0
+; RV64IM-NEXT:    ld t4, 216(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t3, t3, t4
+; RV64IM-NEXT:    ld t4, 192(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t1, t1, t4
+; RV64IM-NEXT:    ld t4, 176(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a7, a7, t4
+; RV64IM-NEXT:    ld t4, 160(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a6, a6, t4
+; RV64IM-NEXT:    ld t4, 144(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a3, a3, t4
+; RV64IM-NEXT:    xor a2, a2, t2
+; RV64IM-NEXT:    xor a1, a1, s1
+; RV64IM-NEXT:    xor a5, a5, s8
+; RV64IM-NEXT:    ld t2, 304(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t1, t1, t2
+; RV64IM-NEXT:    ld t2, 184(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a6, a6, t2
+; RV64IM-NEXT:    ld t2, 152(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a3, a3, t2
+; RV64IM-NEXT:    xor a2, a2, s11
+; RV64IM-NEXT:    xor a1, a1, s2
+; RV64IM-NEXT:    xor a5, a5, s9
+; RV64IM-NEXT:    xor t2, t0, t3
+; RV64IM-NEXT:    xor t1, t2, t1
+; RV64IM-NEXT:    ld t2, 296(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a6, a6, t2
+; RV64IM-NEXT:    ld t2, 208(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a3, a3, t2
+; RV64IM-NEXT:    xor a2, a2, ra
+; RV64IM-NEXT:    xor a1, a1, s3
+; RV64IM-NEXT:    xor a5, a5, s10
+; RV64IM-NEXT:    xor a7, t1, a7
+; RV64IM-NEXT:    xor a4, a6, a4
+; RV64IM-NEXT:    ld a6, 280(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a3, a3, a6
+; RV64IM-NEXT:    ld a6, 200(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a2, a2, a6
+; RV64IM-NEXT:    slli t0, t0, 56
+; RV64IM-NEXT:    ld a6, 168(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a1, a1, a6
+; RV64IM-NEXT:    xor a0, a5, a0
+; RV64IM-NEXT:    ld t1, 344(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a5, a7, t1
+; RV64IM-NEXT:    xor a4, a7, a4
+; RV64IM-NEXT:    slli a5, a5, 40
+; RV64IM-NEXT:    xor a3, a4, a3
+; RV64IM-NEXT:    or a4, t0, a5
+; RV64IM-NEXT:    lui a7, 4080
+; RV64IM-NEXT:    and a5, a3, a7
+; RV64IM-NEXT:    xor a2, a3, a2
+; RV64IM-NEXT:    srli a3, a3, 8
+; RV64IM-NEXT:    slli a5, a5, 24
+; RV64IM-NEXT:    xor a1, a2, a1
+; RV64IM-NEXT:    ld a6, 368(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a3, a3, a6
+; RV64IM-NEXT:    srli a2, a2, 24
+; RV64IM-NEXT:    srliw a6, a1, 24
+; RV64IM-NEXT:    and a2, a2, a7
+; RV64IM-NEXT:    srli a7, a1, 40
+; RV64IM-NEXT:    xor a0, a1, a0
+; RV64IM-NEXT:    slli a6, a6, 32
+; RV64IM-NEXT:    or a2, a3, a2
+; RV64IM-NEXT:    and a1, a7, t1
+; RV64IM-NEXT:    srli a0, a0, 56
+; RV64IM-NEXT:    or a3, a5, a6
+; RV64IM-NEXT:    or a0, a1, a0
+; RV64IM-NEXT:    or a3, a4, a3
+; RV64IM-NEXT:    or a0, a2, a0
+; RV64IM-NEXT:    or a0, a3, a0
+; RV64IM-NEXT:    srli a1, a0, 4
+; RV64IM-NEXT:    ld a2, 360(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a0, a0, a2
+; RV64IM-NEXT:    and a1, a1, a2
+; RV64IM-NEXT:    slli a0, a0, 4
+; RV64IM-NEXT:    or a0, a1, a0
+; RV64IM-NEXT:    srli a1, a0, 2
+; RV64IM-NEXT:    ld a2, 352(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a0, a0, a2
+; RV64IM-NEXT:    and a1, a1, a2
+; RV64IM-NEXT:    slli a0, a0, 2
+; RV64IM-NEXT:    or a0, a1, a0
+; RV64IM-NEXT:    andi a1, a0, 5
+; RV64IM-NEXT:    srli a0, a0, 1
+; RV64IM-NEXT:    slli a1, a1, 1
+; RV64IM-NEXT:    andi a0, a0, 5
+; RV64IM-NEXT:    or a0, a0, a1
+; RV64IM-NEXT:    ld ra, 472(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s0, 464(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s1, 456(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s2, 448(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s3, 440(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s4, 432(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s5, 424(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s6, 416(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s7, 408(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s8, 400(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s9, 392(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s10, 384(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s11, 376(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    addi sp, sp, 480
+; RV64IM-NEXT:    ret
+  %a.ext = zext i4 %a to i8
+  %b.ext = zext i4 %b to i8
+  %clmul = call i8 @llvm.clmul.i8(i8 %a.ext, i8 %b.ext)
+  %res.ext = lshr i8 %clmul, 3
+  %res = trunc i8 %res.ext to i4
+  ret i4 %res
+}
+
+define i4 @clmulr_i4_bitreverse(i4 %a, i4 %b) nounwind {
+; RV32IM-LABEL: clmulr_i4_bitreverse:
+; RV32IM:       # %bb.0:
+; RV32IM-NEXT:    addi sp, sp, -144
+; RV32IM-NEXT:    sw ra, 140(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s0, 136(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s1, 132(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s2, 128(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s3, 124(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s4, 120(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s5, 116(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s6, 112(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s7, 108(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s8, 104(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s9, 100(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s10, 96(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s11, 92(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    srli t0, a0, 8
+; RV32IM-NEXT:    lui a3, 16
+; RV32IM-NEXT:    srli t1, a0, 24
+; RV32IM-NEXT:    slli a2, a0, 24
+; RV32IM-NEXT:    lui t3, 61681
+; RV32IM-NEXT:    lui t5, 209715
+; RV32IM-NEXT:    lui t6, 349525
+; RV32IM-NEXT:    srli t4, a1, 8
+; RV32IM-NEXT:    srli a4, a1, 24
+; RV32IM-NEXT:    slli a5, a1, 24
+; RV32IM-NEXT:    li s7, 1
+; RV32IM-NEXT:    lui t2, 4
+; RV32IM-NEXT:    lui s0, 8
+; RV32IM-NEXT:    lui s1, 32
+; RV32IM-NEXT:    lui s2, 64
+; RV32IM-NEXT:    lui s3, 128
+; RV32IM-NEXT:    lui s4, 256
+; RV32IM-NEXT:    lui s8, 512
+; RV32IM-NEXT:    lui a7, 1024
+; RV32IM-NEXT:    lui s9, 2048
+; RV32IM-NEXT:    lui s10, 4096
+; RV32IM-NEXT:    lui s11, 8192
+; RV32IM-NEXT:    lui ra, 16384
+; RV32IM-NEXT:    addi s5, a3, -256
+; RV32IM-NEXT:    and t0, t0, s5
+; RV32IM-NEXT:    or t1, t0, t1
+; RV32IM-NEXT:    lui a6, 32768
+; RV32IM-NEXT:    and t4, t4, s5
+; RV32IM-NEXT:    or a4, t4, a4
+; RV32IM-NEXT:    lui t0, 65536
+; RV32IM-NEXT:    and a0, a0, s5
+; RV32IM-NEXT:    slli a0, a0, 8
+; RV32IM-NEXT:    or a0, a2, a0
+; RV32IM-NEXT:    lui a2, 131072
+; RV32IM-NEXT:    and a1, a1, s5
+; RV32IM-NEXT:    slli a1, a1, 8
+; RV32IM-NEXT:    or t4, a5, a1
+; RV32IM-NEXT:    lui a1, 262144
+; RV32IM-NEXT:    or a0, a0, t1
+; RV32IM-NEXT:    lui a5, 524288
+; RV32IM-NEXT:    addi t3, t3, -241
+; RV32IM-NEXT:    addi t5, t5, 819
+; RV32IM-NEXT:    addi t6, t6, 1365
+; RV32IM-NEXT:    slli s7, s7, 11
+; RV32IM-NEXT:    or a4, t4, a4
+; RV32IM-NEXT:    srli t4, a0, 4
+; RV32IM-NEXT:    and a0, a0, t3
+; RV32IM-NEXT:    and t4, t4, t3
+; RV32IM-NEXT:    slli a0, a0, 4
+; RV32IM-NEXT:    or a0, t4, a0
+; RV32IM-NEXT:    srli t4, a4, 4
+; RV32IM-NEXT:    and a4, a4, t3
+; RV32IM-NEXT:    and t4, t4, t3
+; RV32IM-NEXT:    slli a4, a4, 4
+; RV32IM-NEXT:    or a4, t4, a4
+; RV32IM-NEXT:    srli t4, a0, 2
+; RV32IM-NEXT:    and a0, a0, t5
+; RV32IM-NEXT:    and t4, t4, t5
+; RV32IM-NEXT:    slli a0, a0, 2
+; RV32IM-NEXT:    or a0, t4, a0
+; RV32IM-NEXT:    srli t4, a4, 2
+; RV32IM-NEXT:    and a4, a4, t5
+; RV32IM-NEXT:    and t4, t4, t5
+; RV32IM-NEXT:    slli a4, a4, 2
+; RV32IM-NEXT:    or t4, t4, a4
+; RV32IM-NEXT:    srli a4, a0, 1
+; RV32IM-NEXT:    and a0, a0, t6
+; RV32IM-NEXT:    and a4, a4, t6
+; RV32IM-NEXT:    slli a0, a0, 1
+; RV32IM-NEXT:    or a4, a4, a0
+; RV32IM-NEXT:    srli a0, t4, 1
+; RV32IM-NEXT:    and t4, t4, t6
+; RV32IM-NEXT:    and a0, a0, t6
+; RV32IM-NEXT:    slli t4, t4, 1
+; RV32IM-NEXT:    or a0, a0, t4
+; RV32IM-NEXT:    andi t4, a0, 2
+; RV32IM-NEXT:    and s6, a0, s7
+; RV32IM-NEXT:    lui t1, 1
+; RV32IM-NEXT:    and t1, a0, t1
+; RV32IM-NEXT:    sw t1, 84(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lui t1, 2
+; RV32IM-NEXT:    and t1, a0, t1
+; RV32IM-NEXT:    sw t1, 80(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and t1, a0, t2
+; RV32IM-NEXT:    sw t1, 76(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and s0, a0, s0
+; RV32IM-NEXT:    and a3, a0, a3
+; RV32IM-NEXT:    sw a3, 72(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and s1, a0, s1
+; RV32IM-NEXT:    sw s1, 68(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a3, a0, s2
+; RV32IM-NEXT:    sw a3, 64(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and s3, a0, s3
+; RV32IM-NEXT:    and a3, a0, s4
+; RV32IM-NEXT:    sw a3, 60(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a3, a0, s8
+; RV32IM-NEXT:    sw a3, 56(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a3, a0, a7
+; RV32IM-NEXT:    sw a3, 52(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and s9, a0, s9
+; RV32IM-NEXT:    and a3, a0, s10
+; RV32IM-NEXT:    sw a3, 48(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a3, a0, s11
+; RV32IM-NEXT:    sw a3, 44(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a3, a0, ra
+; RV32IM-NEXT:    sw a3, 40(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a3, a0, a6
+; RV32IM-NEXT:    sw a3, 36(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a3, a0, t0
+; RV32IM-NEXT:    sw a3, 32(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a2, a0, a2
+; RV32IM-NEXT:    sw a2, 28(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a1, a0, a1
+; RV32IM-NEXT:    sw a1, 24(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a5, a0, a5
+; RV32IM-NEXT:    sw a5, 20(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    andi a1, a0, 1
+; RV32IM-NEXT:    andi a2, a0, 4
+; RV32IM-NEXT:    andi a3, a0, 8
+; RV32IM-NEXT:    andi a5, a0, 16
+; RV32IM-NEXT:    andi a6, a0, 32
+; RV32IM-NEXT:    andi a7, a0, 64
+; RV32IM-NEXT:    andi t0, a0, 128
+; RV32IM-NEXT:    andi t1, a0, 256
+; RV32IM-NEXT:    andi t2, a0, 512
+; RV32IM-NEXT:    andi a0, a0, 1024
+; RV32IM-NEXT:    mul t4, a4, t4
+; RV32IM-NEXT:    sw t4, 8(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul ra, a4, a1
+; RV32IM-NEXT:    mul s11, a4, a2
+; RV32IM-NEXT:    mul s8, a4, a3
+; RV32IM-NEXT:    mul s7, a4, a5
+; RV32IM-NEXT:    mul s4, a4, a6
+; RV32IM-NEXT:    mul a1, a4, a7
+; RV32IM-NEXT:    sw a1, 12(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul a1, a4, t0
+; RV32IM-NEXT:    sw a1, 88(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul s2, a4, t1
+; RV32IM-NEXT:    mul t2, a4, t2
+; RV32IM-NEXT:    mul a0, a4, a0
+; RV32IM-NEXT:    sw a0, 4(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul a0, a4, s6
+; RV32IM-NEXT:    sw a0, 16(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw a0, 84(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a0, a4, a0
+; RV32IM-NEXT:    sw a0, 84(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw a0, 80(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul t1, a4, a0
+; RV32IM-NEXT:    lw a0, 76(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a7, a4, a0
+; RV32IM-NEXT:    mul s1, a4, s0
+; RV32IM-NEXT:    lw a0, 72(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a0, a4, a0
+; RV32IM-NEXT:    sw a0, 72(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw a0, 68(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a0, a4, a0
+; RV32IM-NEXT:    sw a0, 76(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw a0, 64(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a0, a4, a0
+; RV32IM-NEXT:    sw a0, 80(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul a3, a4, s3
+; RV32IM-NEXT:    lw a0, 60(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a2, a4, a0
+; RV32IM-NEXT:    lw a0, 56(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a6, a4, a0
+; RV32IM-NEXT:    lw a0, 52(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul t4, a4, a0
+; RV32IM-NEXT:    mul s6, a4, s9
+; RV32IM-NEXT:    lw a0, 48(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a1, a4, a0
+; RV32IM-NEXT:    lw a0, 44(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a0, a4, a0
+; RV32IM-NEXT:    lw a5, 40(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a5, a4, a5
+; RV32IM-NEXT:    lw t0, 36(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul t0, a4, t0
+; RV32IM-NEXT:    lw s0, 32(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul s0, a4, s0
+; RV32IM-NEXT:    lw s3, 28(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul s3, a4, s3
+; RV32IM-NEXT:    lw s9, 24(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul s9, a4, s9
+; RV32IM-NEXT:    lw s10, 20(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a4, a4, s10
+; RV32IM-NEXT:    lw s10, 8(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor ra, ra, s10
+; RV32IM-NEXT:    xor s8, s11, s8
+; RV32IM-NEXT:    xor s4, s7, s4
+; RV32IM-NEXT:    xor t2, s2, t2
+; RV32IM-NEXT:    xor a7, t1, a7
+; RV32IM-NEXT:    xor a2, a3, a2
+; RV32IM-NEXT:    xor a0, a1, a0
+; RV32IM-NEXT:    xor a1, ra, s8
+; RV32IM-NEXT:    lw a3, 12(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a3, s4, a3
+; RV32IM-NEXT:    lw t1, 4(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor t1, t2, t1
+; RV32IM-NEXT:    xor a7, a7, s1
+; RV32IM-NEXT:    xor a2, a2, a6
+; RV32IM-NEXT:    xor a0, a0, a5
+; RV32IM-NEXT:    xor a1, a1, a3
+; RV32IM-NEXT:    lw a3, 16(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a3, t1, a3
+; RV32IM-NEXT:    lw a5, 72(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a5, a7, a5
+; RV32IM-NEXT:    xor a2, a2, t4
+; RV32IM-NEXT:    xor a0, a0, t0
+; RV32IM-NEXT:    lw a6, 88(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a1, a1, a6
+; RV32IM-NEXT:    lw a6, 84(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a3, a3, a6
+; RV32IM-NEXT:    lw a6, 76(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a5, a5, a6
+; RV32IM-NEXT:    xor a2, a2, s6
+; RV32IM-NEXT:    xor a0, a0, s0
+; RV32IM-NEXT:    lw a6, 80(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a5, a5, a6
+; RV32IM-NEXT:    xor a0, a0, s3
+; RV32IM-NEXT:    xor a3, a1, a3
+; RV32IM-NEXT:    slli a1, a1, 24
+; RV32IM-NEXT:    xor a3, a3, a5
+; RV32IM-NEXT:    xor a0, a0, s9
+; RV32IM-NEXT:    xor a2, a3, a2
+; RV32IM-NEXT:    xor a0, a0, a4
+; RV32IM-NEXT:    and a3, a2, s5
+; RV32IM-NEXT:    srli a4, a2, 8
+; RV32IM-NEXT:    xor a0, a2, a0
+; RV32IM-NEXT:    slli a3, a3, 8
+; RV32IM-NEXT:    and a2, a4, s5
+; RV32IM-NEXT:    srli a0, a0, 24
+; RV32IM-NEXT:    or a1, a1, a3
+; RV32IM-NEXT:    or a0, a2, a0
+; RV32IM-NEXT:    or a0, a1, a0
+; RV32IM-NEXT:    srli a1, a0, 4
+; RV32IM-NEXT:    and a0, a0, t3
+; RV32IM-NEXT:    and a1, a1, t3
+; RV32IM-NEXT:    slli a0, a0, 4
+; RV32IM-NEXT:    or a0, a1, a0
+; RV32IM-NEXT:    srli a1, a0, 2
+; RV32IM-NEXT:    and a0, a0, t5
+; RV32IM-NEXT:    and a1, a1, t5
+; RV32IM-NEXT:    slli a0, a0, 2
+; RV32IM-NEXT:    or a0, a1, a0
+; RV32IM-NEXT:    srli a1, a0, 1
+; RV32IM-NEXT:    and a0, a0, t6
+; RV32IM-NEXT:    and a1, a1, t6
+; RV32IM-NEXT:    slli a0, a0, 1
+; RV32IM-NEXT:    or a0, a1, a0
+; RV32IM-NEXT:    lw ra, 140(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s0, 136(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s1, 132(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s2, 128(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s3, 124(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s4, 120(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s5, 116(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s6, 112(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s7, 108(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s8, 104(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s9, 100(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s10, 96(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s11, 92(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    addi sp, sp, 144
+; RV32IM-NEXT:    ret
+;
+; RV64IM-LABEL: clmulr_i4_bitreverse:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    addi sp, sp, -496
+; RV64IM-NEXT:    sd ra, 488(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s0, 480(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s1, 472(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s2, 464(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s3, 456(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s4, 448(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s5, 440(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s6, 432(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s7, 424(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s8, 416(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s9, 408(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s10, 400(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s11, 392(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    srli a5, a0, 24
+; RV64IM-NEXT:    srli a6, a0, 8
+; RV64IM-NEXT:    li s4, 255
+; RV64IM-NEXT:    srli a4, a0, 40
+; RV64IM-NEXT:    lui s10, 16
+; RV64IM-NEXT:    srli a7, a0, 56
+; RV64IM-NEXT:    srliw t2, a0, 24
+; RV64IM-NEXT:    slli t3, a0, 56
+; RV64IM-NEXT:    lui t4, 61681
+; RV64IM-NEXT:    lui s6, 209715
+; RV64IM-NEXT:    lui s5, 349525
+; RV64IM-NEXT:    srli s3, a1, 24
+; RV64IM-NEXT:    srli t6, a1, 8
+; RV64IM-NEXT:    srli ra, a1, 40
+; RV64IM-NEXT:    srli t0, a1, 56
+; RV64IM-NEXT:    srliw s7, a1, 24
+; RV64IM-NEXT:    slli a3, a1, 56
+; RV64IM-NEXT:    li t1, 1
+; RV64IM-NEXT:    lui s1, 256
+; RV64IM-NEXT:    lui s2, 4096
+; RV64IM-NEXT:    lui s0, 8192
+; RV64IM-NEXT:    lui s9, 4080
+; RV64IM-NEXT:    and a2, a5, s9
+; RV64IM-NEXT:    slli t5, s4, 24
+; RV64IM-NEXT:    addi s11, s10, -256
+; RV64IM-NEXT:    and a5, a6, t5
+; RV64IM-NEXT:    sd t5, 384(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    or a2, a5, a2
+; RV64IM-NEXT:    and a5, a0, s9
+; RV64IM-NEXT:    slli t2, t2, 32
+; RV64IM-NEXT:    addi s4, t4, -241
+; RV64IM-NEXT:    addi s6, s6, 819
+; RV64IM-NEXT:    addi s8, s5, 1365
+; RV64IM-NEXT:    and a6, s3, s9
+; RV64IM-NEXT:    and a4, a4, s11
+; RV64IM-NEXT:    or a4, a4, a7
+; RV64IM-NEXT:    and a7, a1, s9
+; RV64IM-NEXT:    slli t4, s7, 32
+; RV64IM-NEXT:    slli a5, a5, 24
+; RV64IM-NEXT:    or s5, a5, t2
+; RV64IM-NEXT:    slli a5, s4, 32
+; RV64IM-NEXT:    add s4, s4, a5
+; RV64IM-NEXT:    slli a5, s6, 32
+; RV64IM-NEXT:    add s6, s6, a5
+; RV64IM-NEXT:    slli a5, s8, 32
+; RV64IM-NEXT:    add s8, s8, a5
+; RV64IM-NEXT:    slli s3, t1, 11
+; RV64IM-NEXT:    and a5, t6, t5
+; RV64IM-NEXT:    or a5, a5, a6
+; RV64IM-NEXT:    slli t2, t1, 32
+; RV64IM-NEXT:    and a6, ra, s11
+; RV64IM-NEXT:    or a6, a6, t0
+; RV64IM-NEXT:    slli ra, t1, 33
+; RV64IM-NEXT:    slli a7, a7, 24
+; RV64IM-NEXT:    or a7, a7, t4
+; RV64IM-NEXT:    slli s7, t1, 34
+; RV64IM-NEXT:    or a2, a2, a4
+; RV64IM-NEXT:    slli a4, t1, 35
+; RV64IM-NEXT:    sd a4, 312(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a0, a0, s11
+; RV64IM-NEXT:    sd s11, 352(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a0, a0, 40
+; RV64IM-NEXT:    or a0, t3, a0
+; RV64IM-NEXT:    slli a4, t1, 36
+; RV64IM-NEXT:    sd a4, 296(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    or a4, a5, a6
+; RV64IM-NEXT:    slli a6, t1, 37
+; RV64IM-NEXT:    and a1, a1, s11
+; RV64IM-NEXT:    slli a1, a1, 40
+; RV64IM-NEXT:    or a1, a3, a1
+; RV64IM-NEXT:    or a0, a0, s5
+; RV64IM-NEXT:    or a1, a1, a7
+; RV64IM-NEXT:    or a0, a0, a2
+; RV64IM-NEXT:    or a1, a1, a4
+; RV64IM-NEXT:    srli a2, a0, 4
+; RV64IM-NEXT:    sd s4, 376(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a0, a0, s4
+; RV64IM-NEXT:    srli a3, a1, 4
+; RV64IM-NEXT:    and a1, a1, s4
+; RV64IM-NEXT:    and a2, a2, s4
+; RV64IM-NEXT:    slli a0, a0, 4
+; RV64IM-NEXT:    and a3, a3, s4
+; RV64IM-NEXT:    slli a1, a1, 4
+; RV64IM-NEXT:    or a0, a2, a0
+; RV64IM-NEXT:    or a1, a3, a1
+; RV64IM-NEXT:    srli a2, a0, 2
+; RV64IM-NEXT:    sd s6, 368(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a0, a0, s6
+; RV64IM-NEXT:    srli a3, a1, 2
+; RV64IM-NEXT:    and a1, a1, s6
+; RV64IM-NEXT:    and a2, a2, s6
+; RV64IM-NEXT:    slli a0, a0, 2
+; RV64IM-NEXT:    and a3, a3, s6
+; RV64IM-NEXT:    slli a1, a1, 2
+; RV64IM-NEXT:    or a0, a2, a0
+; RV64IM-NEXT:    or a1, a3, a1
+; RV64IM-NEXT:    srli a2, a0, 1
+; RV64IM-NEXT:    sd s8, 360(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a0, a0, s8
+; RV64IM-NEXT:    srli a3, a1, 1
+; RV64IM-NEXT:    and a1, a1, s8
+; RV64IM-NEXT:    and a2, a2, s8
+; RV64IM-NEXT:    slli a0, a0, 1
+; RV64IM-NEXT:    and a3, a3, s8
+; RV64IM-NEXT:    slli a1, a1, 1
+; RV64IM-NEXT:    or a0, a2, a0
+; RV64IM-NEXT:    or s5, a3, a1
+; RV64IM-NEXT:    andi a1, s5, 2
+; RV64IM-NEXT:    andi a2, s5, 1
+; RV64IM-NEXT:    andi a3, s5, 4
+; RV64IM-NEXT:    andi a4, s5, 8
+; RV64IM-NEXT:    andi a5, s5, 16
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    xor a1, a2, a1
+; RV64IM-NEXT:    sd a1, 344(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    andi a1, s5, 32
+; RV64IM-NEXT:    mul a2, a0, a3
+; RV64IM-NEXT:    mul a3, a0, a4
+; RV64IM-NEXT:    xor a2, a2, a3
+; RV64IM-NEXT:    sd a2, 336(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    andi a2, s5, 256
+; RV64IM-NEXT:    mul a3, a0, a5
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    xor a1, a3, a1
+; RV64IM-NEXT:    sd a1, 328(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    andi a1, s5, 512
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    xor a1, a2, a1
+; RV64IM-NEXT:    sd a1, 320(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli s4, t1, 38
+; RV64IM-NEXT:    lui a1, 2
+; RV64IM-NEXT:    and a1, s5, a1
+; RV64IM-NEXT:    lui a2, 4
+; RV64IM-NEXT:    and a2, s5, a2
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    xor a1, a1, a2
+; RV64IM-NEXT:    sd a1, 304(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, t1, 40
+; RV64IM-NEXT:    lui a2, 128
+; RV64IM-NEXT:    and a2, s5, a2
+; RV64IM-NEXT:    and a3, s5, s1
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    mul a3, a0, a3
+; RV64IM-NEXT:    xor a2, a2, a3
+; RV64IM-NEXT:    sd a2, 288(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a2, t1, 41
+; RV64IM-NEXT:    and a3, s5, s2
+; RV64IM-NEXT:    and a4, s5, s0
+; RV64IM-NEXT:    mul a3, a0, a3
+; RV64IM-NEXT:    mul a4, a0, a4
+; RV64IM-NEXT:    xor a3, a3, a4
+; RV64IM-NEXT:    sd a3, 280(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a3, t1, 48
+; RV64IM-NEXT:    and a4, s5, t2
+; RV64IM-NEXT:    and a5, s5, ra
+; RV64IM-NEXT:    mul a4, a0, a4
+; RV64IM-NEXT:    mul a5, a0, a5
+; RV64IM-NEXT:    xor a4, a4, a5
+; RV64IM-NEXT:    sd a4, 272(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a4, t1, 49
+; RV64IM-NEXT:    and a1, s5, a1
+; RV64IM-NEXT:    and a2, s5, a2
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    xor a1, a1, a2
+; RV64IM-NEXT:    sd a1, 264(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, t1, 56
+; RV64IM-NEXT:    and a2, s5, a3
+; RV64IM-NEXT:    and a3, s5, a4
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    mul a3, a0, a3
+; RV64IM-NEXT:    xor a2, a2, a3
+; RV64IM-NEXT:    sd a2, 256(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a2, t1, 57
+; RV64IM-NEXT:    and a1, s5, a1
+; RV64IM-NEXT:    and a2, s5, a2
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    xor a1, a1, a2
+; RV64IM-NEXT:    sd a1, 248(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a2, t1, 39
+; RV64IM-NEXT:    slli ra, t1, 42
+; RV64IM-NEXT:    slli a4, t1, 43
+; RV64IM-NEXT:    slli a5, t1, 44
+; RV64IM-NEXT:    slli s0, t1, 45
+; RV64IM-NEXT:    slli s1, t1, 46
+; RV64IM-NEXT:    slli s2, t1, 47
+; RV64IM-NEXT:    slli s6, t1, 50
+; RV64IM-NEXT:    slli a1, t1, 51
+; RV64IM-NEXT:    sd a1, 240(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, t1, 52
+; RV64IM-NEXT:    sd a1, 232(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, t1, 53
+; RV64IM-NEXT:    sd a1, 224(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, t1, 54
+; RV64IM-NEXT:    sd a1, 216(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, t1, 55
+; RV64IM-NEXT:    sd a1, 208(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, t1, 58
+; RV64IM-NEXT:    sd a1, 200(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, t1, 59
+; RV64IM-NEXT:    sd a1, 184(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, t1, 60
+; RV64IM-NEXT:    sd a1, 144(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, t1, 61
+; RV64IM-NEXT:    sd a1, 112(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli t1, t1, 62
+; RV64IM-NEXT:    sd t1, 88(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and t1, s5, s3
+; RV64IM-NEXT:    lui a3, 1
+; RV64IM-NEXT:    and a1, s5, a3
+; RV64IM-NEXT:    sd a1, 192(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui a3, 8
+; RV64IM-NEXT:    and a1, s5, a3
+; RV64IM-NEXT:    sd a1, 176(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a1, s5, s10
+; RV64IM-NEXT:    sd a1, 168(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui s8, 32
+; RV64IM-NEXT:    and a1, s5, s8
+; RV64IM-NEXT:    sd a1, 160(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui s11, 64
+; RV64IM-NEXT:    and a1, s5, s11
+; RV64IM-NEXT:    sd a1, 152(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui s10, 512
+; RV64IM-NEXT:    and a1, s5, s10
+; RV64IM-NEXT:    sd a1, 136(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui s9, 1024
+; RV64IM-NEXT:    and a1, s5, s9
+; RV64IM-NEXT:    sd a1, 128(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui t0, 2048
+; RV64IM-NEXT:    and a1, s5, t0
+; RV64IM-NEXT:    sd a1, 120(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui t2, 16384
+; RV64IM-NEXT:    and a1, s5, t2
+; RV64IM-NEXT:    sd a1, 104(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui t3, 32768
+; RV64IM-NEXT:    and t3, s5, t3
+; RV64IM-NEXT:    lui t4, 65536
+; RV64IM-NEXT:    and a1, s5, t4
+; RV64IM-NEXT:    sd a1, 96(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui t5, 131072
+; RV64IM-NEXT:    and a7, s5, t5
+; RV64IM-NEXT:    lui t6, 262144
+; RV64IM-NEXT:    and t6, s5, t6
+; RV64IM-NEXT:    and s11, s5, s7
+; RV64IM-NEXT:    ld a1, 312(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a1, s5, a1
+; RV64IM-NEXT:    sd a1, 80(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 296(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a1, s5, a1
+; RV64IM-NEXT:    sd a1, 72(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a1, s5, a6
+; RV64IM-NEXT:    sd a1, 64(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a1, s5, s4
+; RV64IM-NEXT:    sd a1, 56(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a1, s5, a2
+; RV64IM-NEXT:    sd a1, 48(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and ra, s5, ra
+; RV64IM-NEXT:    and a1, s5, a4
+; RV64IM-NEXT:    sd a1, 40(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a1, s5, a5
+; RV64IM-NEXT:    sd a1, 32(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a1, s5, s0
+; RV64IM-NEXT:    sd a1, 24(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a1, s5, s1
+; RV64IM-NEXT:    sd a1, 16(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a1, s5, s2
+; RV64IM-NEXT:    sd a1, 8(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a1, s5, s6
+; RV64IM-NEXT:    sd a1, 0(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 240(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s0, s5, a1
+; RV64IM-NEXT:    ld a1, 232(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s1, s5, a1
+; RV64IM-NEXT:    ld a1, 224(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s2, s5, a1
+; RV64IM-NEXT:    ld a1, 216(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s3, s5, a1
+; RV64IM-NEXT:    ld a1, 208(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s4, s5, a1
+; RV64IM-NEXT:    ld a1, 200(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s6, s5, a1
+; RV64IM-NEXT:    ld a1, 184(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s7, s5, a1
+; RV64IM-NEXT:    ld a1, 144(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s8, s5, a1
+; RV64IM-NEXT:    ld a1, 112(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s9, s5, a1
+; RV64IM-NEXT:    ld a1, 88(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s10, s5, a1
+; RV64IM-NEXT:    andi a1, s5, 64
+; RV64IM-NEXT:    andi a2, s5, 128
+; RV64IM-NEXT:    andi a3, s5, 1024
+; RV64IM-NEXT:    srliw a4, s5, 31
+; RV64IM-NEXT:    srli s5, s5, 63
+; RV64IM-NEXT:    mul t4, a0, a1
+; RV64IM-NEXT:    mul a1, a0, a2
+; RV64IM-NEXT:    sd a1, 232(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul t2, a0, a3
+; RV64IM-NEXT:    mul a1, a0, t1
+; RV64IM-NEXT:    sd a1, 144(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 192(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 224(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 176(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul t0, a0, a1
+; RV64IM-NEXT:    ld a1, 168(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 112(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 160(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 200(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 152(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 312(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 136(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a6, a0, a1
+; RV64IM-NEXT:    ld a1, 128(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul t5, a0, a1
+; RV64IM-NEXT:    ld a1, 120(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 184(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 104(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a5, a0, a1
+; RV64IM-NEXT:    mul t3, a0, t3
+; RV64IM-NEXT:    ld a1, 96(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 160(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul a1, a0, a7
+; RV64IM-NEXT:    sd a1, 192(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul a1, a0, t6
+; RV64IM-NEXT:    sd a1, 296(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a4, a4, 31
+; RV64IM-NEXT:    mul a2, a0, s11
+; RV64IM-NEXT:    ld a1, 80(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a7, a0, a1
+; RV64IM-NEXT:    ld a1, 72(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul s11, a0, a1
+; RV64IM-NEXT:    ld a1, 64(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 152(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 56(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 216(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 48(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 240(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul ra, a0, ra
+; RV64IM-NEXT:    ld a1, 40(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a3, a0, a1
+; RV64IM-NEXT:    ld a1, 32(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul t1, a0, a1
+; RV64IM-NEXT:    ld a1, 24(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul t6, a0, a1
+; RV64IM-NEXT:    ld a1, 16(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 168(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 8(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 208(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 0(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    mul s0, a0, s0
+; RV64IM-NEXT:    mul s1, a0, s1
+; RV64IM-NEXT:    mul s2, a0, s2
+; RV64IM-NEXT:    mul s3, a0, s3
+; RV64IM-NEXT:    mul s4, a0, s4
+; RV64IM-NEXT:    sd s4, 176(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul s6, a0, s6
+; RV64IM-NEXT:    mul s7, a0, s7
+; RV64IM-NEXT:    mul s8, a0, s8
+; RV64IM-NEXT:    mul s9, a0, s9
+; RV64IM-NEXT:    mul s10, a0, s10
+; RV64IM-NEXT:    slli s5, s5, 63
+; RV64IM-NEXT:    mul a4, a0, a4
+; RV64IM-NEXT:    mul a0, a0, s5
+; RV64IM-NEXT:    ld s5, 344(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s4, 336(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor s5, s5, s4
+; RV64IM-NEXT:    ld s4, 328(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t4, s4, t4
+; RV64IM-NEXT:    ld s4, 320(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t2, s4, t2
+; RV64IM-NEXT:    ld s4, 304(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t0, s4, t0
+; RV64IM-NEXT:    ld s4, 288(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a6, s4, a6
+; RV64IM-NEXT:    ld s4, 280(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a5, s4, a5
+; RV64IM-NEXT:    ld s4, 272(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a2, s4, a2
+; RV64IM-NEXT:    ld s4, 264(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor ra, s4, ra
+; RV64IM-NEXT:    ld s4, 256(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a1, s4, a1
+; RV64IM-NEXT:    ld s4, 248(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor s6, s4, s6
+; RV64IM-NEXT:    xor t4, s5, t4
+; RV64IM-NEXT:    ld s4, 144(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t2, t2, s4
+; RV64IM-NEXT:    ld s4, 112(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t0, t0, s4
+; RV64IM-NEXT:    xor a6, a6, t5
+; RV64IM-NEXT:    xor a5, a5, t3
+; RV64IM-NEXT:    xor a2, a2, a7
+; RV64IM-NEXT:    xor a3, ra, a3
+; RV64IM-NEXT:    xor a1, a1, s0
+; RV64IM-NEXT:    xor a7, s6, s7
+; RV64IM-NEXT:    ld t3, 232(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t3, t4, t3
+; RV64IM-NEXT:    ld t4, 224(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t2, t2, t4
+; RV64IM-NEXT:    ld t4, 200(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t0, t0, t4
+; RV64IM-NEXT:    ld t4, 184(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a6, a6, t4
+; RV64IM-NEXT:    ld t4, 160(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a5, a5, t4
+; RV64IM-NEXT:    xor a2, a2, s11
+; RV64IM-NEXT:    xor a3, a3, t1
+; RV64IM-NEXT:    xor a1, a1, s1
+; RV64IM-NEXT:    xor a7, a7, s8
+; RV64IM-NEXT:    ld t1, 312(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t0, t0, t1
+; RV64IM-NEXT:    ld t1, 192(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a5, a5, t1
+; RV64IM-NEXT:    ld t1, 152(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a2, a2, t1
+; RV64IM-NEXT:    xor a3, a3, t6
+; RV64IM-NEXT:    xor a1, a1, s2
+; RV64IM-NEXT:    xor a7, a7, s9
+; RV64IM-NEXT:    xor t1, t3, t2
+; RV64IM-NEXT:    xor t0, t1, t0
+; RV64IM-NEXT:    ld t1, 296(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a5, a5, t1
+; RV64IM-NEXT:    ld t1, 216(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a2, a2, t1
+; RV64IM-NEXT:    ld t1, 168(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a3, a3, t1
+; RV64IM-NEXT:    xor a1, a1, s3
+; RV64IM-NEXT:    xor a7, a7, s10
+; RV64IM-NEXT:    xor a6, t0, a6
+; RV64IM-NEXT:    xor a4, a5, a4
+; RV64IM-NEXT:    ld a5, 240(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a2, a2, a5
+; RV64IM-NEXT:    ld a5, 208(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a3, a3, a5
+; RV64IM-NEXT:    slli t3, t3, 56
+; RV64IM-NEXT:    ld a5, 176(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a1, a1, a5
+; RV64IM-NEXT:    xor a0, a7, a0
+; RV64IM-NEXT:    ld t0, 352(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a5, a6, t0
+; RV64IM-NEXT:    xor a4, a6, a4
+; RV64IM-NEXT:    slli a5, a5, 40
+; RV64IM-NEXT:    xor a2, a4, a2
+; RV64IM-NEXT:    or a4, t3, a5
+; RV64IM-NEXT:    lui a7, 4080
+; RV64IM-NEXT:    and a5, a2, a7
+; RV64IM-NEXT:    xor a3, a2, a3
+; RV64IM-NEXT:    srli a2, a2, 8
+; RV64IM-NEXT:    slli a5, a5, 24
+; RV64IM-NEXT:    xor a1, a3, a1
+; RV64IM-NEXT:    ld a6, 384(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a2, a2, a6
+; RV64IM-NEXT:    srli a3, a3, 24
+; RV64IM-NEXT:    srliw a6, a1, 24
+; RV64IM-NEXT:    and a3, a3, a7
+; RV64IM-NEXT:    srli a7, a1, 40
+; RV64IM-NEXT:    xor a0, a1, a0
+; RV64IM-NEXT:    slli a6, a6, 32
+; RV64IM-NEXT:    or a2, a2, a3
+; RV64IM-NEXT:    and a1, a7, t0
+; RV64IM-NEXT:    srli a0, a0, 56
+; RV64IM-NEXT:    or a3, a5, a6
+; RV64IM-NEXT:    or a0, a1, a0
+; RV64IM-NEXT:    or a3, a4, a3
+; RV64IM-NEXT:    or a0, a2, a0
+; RV64IM-NEXT:    or a0, a3, a0
+; RV64IM-NEXT:    srli a1, a0, 4
+; RV64IM-NEXT:    ld a2, 376(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a0, a0, a2
+; RV64IM-NEXT:    and a1, a1, a2
+; RV64IM-NEXT:    slli a0, a0, 4
+; RV64IM-NEXT:    or a0, a1, a0
+; RV64IM-NEXT:    srli a1, a0, 2
+; RV64IM-NEXT:    ld a2, 368(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a0, a0, a2
+; RV64IM-NEXT:    and a1, a1, a2
+; RV64IM-NEXT:    slli a0, a0, 2
+; RV64IM-NEXT:    or a0, a1, a0
+; RV64IM-NEXT:    srli a1, a0, 1
+; RV64IM-NEXT:    ld a2, 360(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a0, a0, a2
+; RV64IM-NEXT:    and a1, a1, a2
+; RV64IM-NEXT:    slli a0, a0, 1
+; RV64IM-NEXT:    or a0, a1, a0
+; RV64IM-NEXT:    ld ra, 488(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s0, 480(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s1, 472(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s2, 464(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s3, 456(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s4, 448(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s5, 440(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s6, 432(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s7, 424(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s8, 416(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s9, 408(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s10, 400(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s11, 392(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    addi sp, sp, 496
+; RV64IM-NEXT:    ret
+  %a.rev = call i4 @llvm.bitreverse.i4(i4 %a)
+  %b.rev = call i4 @llvm.bitreverse.i4(i4 %b)
+  %res.rev = call i4 @llvm.clmul.i4(i4 %a.rev, i4 %b.rev)
+  %res = call i4 @llvm.bitreverse.i4(i4 %res.rev)
+  ret i4 %res
+}
+
+define i8 @clmulr_i8(i8 %a, i8 %b) nounwind {
+; RV32IM-LABEL: clmulr_i8:
+; RV32IM:       # %bb.0:
+; RV32IM-NEXT:    addi sp, sp, -144
+; RV32IM-NEXT:    sw ra, 140(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s0, 136(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s1, 132(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s2, 128(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s3, 124(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s4, 120(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s5, 116(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s6, 112(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s7, 108(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s8, 104(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s9, 100(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s10, 96(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s11, 92(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    srli t0, a0, 8
+; RV32IM-NEXT:    lui a3, 16
+; RV32IM-NEXT:    srli t1, a0, 24
+; RV32IM-NEXT:    slli a2, a0, 24
+; RV32IM-NEXT:    lui t2, 61681
+; RV32IM-NEXT:    lui t3, 209715
+; RV32IM-NEXT:    lui a7, 349525
+; RV32IM-NEXT:    srli t4, a1, 8
+; RV32IM-NEXT:    srli t5, a1, 24
+; RV32IM-NEXT:    slli a4, a1, 24
+; RV32IM-NEXT:    li t6, 1
+; RV32IM-NEXT:    lui s0, 4
+; RV32IM-NEXT:    lui s1, 8
+; RV32IM-NEXT:    lui s2, 32
+; RV32IM-NEXT:    lui s3, 64
+; RV32IM-NEXT:    lui s5, 128
+; RV32IM-NEXT:    lui s6, 256
+; RV32IM-NEXT:    lui s7, 512
+; RV32IM-NEXT:    lui s8, 1024
+; RV32IM-NEXT:    lui s9, 2048
+; RV32IM-NEXT:    lui s10, 4096
+; RV32IM-NEXT:    lui s11, 8192
+; RV32IM-NEXT:    lui ra, 16384
+; RV32IM-NEXT:    addi s4, a3, -256
+; RV32IM-NEXT:    lui a5, 16
+; RV32IM-NEXT:    and t0, t0, s4
+; RV32IM-NEXT:    or a3, t0, t1
+; RV32IM-NEXT:    lui t0, 32768
+; RV32IM-NEXT:    and t1, t4, s4
+; RV32IM-NEXT:    or t4, t1, t5
+; RV32IM-NEXT:    lui a6, 65536
+; RV32IM-NEXT:    and a0, a0, s4
+; RV32IM-NEXT:    slli a0, a0, 8
+; RV32IM-NEXT:    or t5, a2, a0
+; RV32IM-NEXT:    lui a2, 131072
+; RV32IM-NEXT:    and a1, a1, s4
+; RV32IM-NEXT:    slli a1, a1, 8
+; RV32IM-NEXT:    or a0, a4, a1
+; RV32IM-NEXT:    lui a1, 262144
+; RV32IM-NEXT:    addi t2, t2, -241
+; RV32IM-NEXT:    addi t3, t3, 819
+; RV32IM-NEXT:    addi a7, a7, 1365
+; RV32IM-NEXT:    or a3, t5, a3
+; RV32IM-NEXT:    or a0, a0, t4
+; RV32IM-NEXT:    srli t4, a3, 4
+; RV32IM-NEXT:    and a3, a3, t2
+; RV32IM-NEXT:    srli t5, a0, 4
+; RV32IM-NEXT:    and a0, a0, t2
+; RV32IM-NEXT:    and t4, t4, t2
+; RV32IM-NEXT:    slli a3, a3, 4
+; RV32IM-NEXT:    and t5, t5, t2
+; RV32IM-NEXT:    slli a0, a0, 4
+; RV32IM-NEXT:    or a3, t4, a3
+; RV32IM-NEXT:    or a0, t5, a0
+; RV32IM-NEXT:    srli t4, a3, 2
+; RV32IM-NEXT:    and a3, a3, t3
+; RV32IM-NEXT:    srli t5, a0, 2
+; RV32IM-NEXT:    and a0, a0, t3
+; RV32IM-NEXT:    and t4, t4, t3
+; RV32IM-NEXT:    slli a3, a3, 2
+; RV32IM-NEXT:    and t5, t5, t3
+; RV32IM-NEXT:    slli a0, a0, 2
+; RV32IM-NEXT:    or a3, t4, a3
+; RV32IM-NEXT:    or a0, t5, a0
+; RV32IM-NEXT:    srli t4, a3, 1
+; RV32IM-NEXT:    and a3, a3, a7
+; RV32IM-NEXT:    srli t5, a0, 1
+; RV32IM-NEXT:    and a0, a0, a7
+; RV32IM-NEXT:    and t4, t4, a7
+; RV32IM-NEXT:    and a7, t5, a7
+; RV32IM-NEXT:    lui a4, 524288
+; RV32IM-NEXT:    slli t6, t6, 11
+; RV32IM-NEXT:    slli a3, a3, 1
+; RV32IM-NEXT:    slli a0, a0, 1
+; RV32IM-NEXT:    or a3, t4, a3
+; RV32IM-NEXT:    or a0, a7, a0
+; RV32IM-NEXT:    andi t5, a0, 2
+; RV32IM-NEXT:    andi t4, a0, 1
+; RV32IM-NEXT:    and t6, a0, t6
+; RV32IM-NEXT:    lui a7, 1
+; RV32IM-NEXT:    and a7, a0, a7
+; RV32IM-NEXT:    sw a7, 84(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lui a7, 2
+; RV32IM-NEXT:    and a7, a0, a7
+; RV32IM-NEXT:    sw a7, 80(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and s0, a0, s0
+; RV32IM-NEXT:    sw s0, 76(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and s1, a0, s1
+; RV32IM-NEXT:    and a5, a0, a5
+; RV32IM-NEXT:    sw a5, 72(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and s2, a0, s2
+; RV32IM-NEXT:    and a5, a0, s3
+; RV32IM-NEXT:    sw a5, 68(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a5, a0, s5
+; RV32IM-NEXT:    sw a5, 64(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a5, a0, s6
+; RV32IM-NEXT:    sw a5, 60(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and s7, a0, s7
+; RV32IM-NEXT:    and s8, a0, s8
+; RV32IM-NEXT:    and a5, a0, s9
+; RV32IM-NEXT:    sw a5, 56(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a5, a0, s10
+; RV32IM-NEXT:    sw a5, 52(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a5, a0, s11
+; RV32IM-NEXT:    sw a5, 48(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a5, a0, ra
+; RV32IM-NEXT:    sw a5, 44(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a5, a0, t0
+; RV32IM-NEXT:    sw a5, 40(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a5, a0, a6
+; RV32IM-NEXT:    sw a5, 36(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a2, a0, a2
+; RV32IM-NEXT:    sw a2, 32(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a1, a0, a1
+; RV32IM-NEXT:    sw a1, 28(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a4, a0, a4
+; RV32IM-NEXT:    sw a4, 24(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    andi a1, a0, 4
+; RV32IM-NEXT:    andi a2, a0, 8
+; RV32IM-NEXT:    andi a4, a0, 16
+; RV32IM-NEXT:    andi a5, a0, 32
+; RV32IM-NEXT:    andi a6, a0, 64
+; RV32IM-NEXT:    andi a7, a0, 128
+; RV32IM-NEXT:    andi t0, a0, 256
+; RV32IM-NEXT:    andi t1, a0, 512
+; RV32IM-NEXT:    andi a0, a0, 1024
+; RV32IM-NEXT:    mul t5, a3, t5
+; RV32IM-NEXT:    sw t5, 12(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul s10, a3, t4
+; RV32IM-NEXT:    mul a1, a3, a1
+; RV32IM-NEXT:    sw a1, 8(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul s6, a3, a2
+; RV32IM-NEXT:    mul s5, a3, a4
+; RV32IM-NEXT:    mul s3, a3, a5
+; RV32IM-NEXT:    mul a1, a3, a6
+; RV32IM-NEXT:    sw a1, 16(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul a1, a3, a7
+; RV32IM-NEXT:    sw a1, 88(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul s0, a3, t0
+; RV32IM-NEXT:    mul t5, a3, t1
+; RV32IM-NEXT:    mul s11, a3, a0
+; RV32IM-NEXT:    mul a0, a3, t6
+; RV32IM-NEXT:    sw a0, 20(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw a0, 84(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a0, a3, a0
+; RV32IM-NEXT:    sw a0, 84(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw a0, 80(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul t1, a3, a0
+; RV32IM-NEXT:    lw a0, 76(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a7, a3, a0
+; RV32IM-NEXT:    mul s1, a3, s1
+; RV32IM-NEXT:    lw a0, 72(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul ra, a3, a0
+; RV32IM-NEXT:    mul a0, a3, s2
+; RV32IM-NEXT:    sw a0, 76(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw a0, 68(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a0, a3, a0
+; RV32IM-NEXT:    sw a0, 80(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw a0, 64(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a4, a3, a0
+; RV32IM-NEXT:    lw a0, 60(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a2, a3, a0
+; RV32IM-NEXT:    mul a6, a3, s7
+; RV32IM-NEXT:    mul t4, a3, s8
+; RV32IM-NEXT:    lw a0, 56(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul s7, a3, a0
+; RV32IM-NEXT:    lw a0, 52(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a1, a3, a0
+; RV32IM-NEXT:    lw a0, 48(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a0, a3, a0
+; RV32IM-NEXT:    lw a5, 44(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a5, a3, a5
+; RV32IM-NEXT:    lw t0, 40(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul t0, a3, t0
+; RV32IM-NEXT:    lw t6, 36(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul t6, a3, t6
+; RV32IM-NEXT:    lw s2, 32(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul s2, a3, s2
+; RV32IM-NEXT:    lw s8, 28(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul s8, a3, s8
+; RV32IM-NEXT:    lw s9, 24(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a3, a3, s9
+; RV32IM-NEXT:    lw s9, 12(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor s10, s10, s9
+; RV32IM-NEXT:    lw s9, 8(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor s6, s9, s6
+; RV32IM-NEXT:    xor s3, s5, s3
+; RV32IM-NEXT:    xor t5, s0, t5
+; RV32IM-NEXT:    xor a7, t1, a7
+; RV32IM-NEXT:    xor a2, a4, a2
+; RV32IM-NEXT:    xor a0, a1, a0
+; RV32IM-NEXT:    xor a1, s10, s6
+; RV32IM-NEXT:    lw a4, 16(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a4, s3, a4
+; RV32IM-NEXT:    xor t1, t5, s11
+; RV32IM-NEXT:    xor a7, a7, s1
+; RV32IM-NEXT:    xor a2, a2, a6
+; RV32IM-NEXT:    xor a0, a0, a5
+; RV32IM-NEXT:    xor a1, a1, a4
+; RV32IM-NEXT:    lw a4, 20(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a4, t1, a4
+; RV32IM-NEXT:    xor a5, a7, ra
+; RV32IM-NEXT:    xor a2, a2, t4
+; RV32IM-NEXT:    xor a0, a0, t0
+; RV32IM-NEXT:    lw a6, 88(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a1, a1, a6
+; RV32IM-NEXT:    lw a6, 84(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a4, a4, a6
+; RV32IM-NEXT:    lw a6, 76(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a5, a5, a6
+; RV32IM-NEXT:    xor a2, a2, s7
+; RV32IM-NEXT:    xor a0, a0, t6
+; RV32IM-NEXT:    lw a6, 80(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a5, a5, a6
+; RV32IM-NEXT:    xor a0, a0, s2
+; RV32IM-NEXT:    xor a4, a1, a4
+; RV32IM-NEXT:    slli a1, a1, 24
+; RV32IM-NEXT:    xor a4, a4, a5
+; RV32IM-NEXT:    xor a0, a0, s8
+; RV32IM-NEXT:    xor a2, a4, a2
+; RV32IM-NEXT:    xor a0, a0, a3
+; RV32IM-NEXT:    and a3, a2, s4
+; RV32IM-NEXT:    srli a4, a2, 8
+; RV32IM-NEXT:    xor a0, a2, a0
+; RV32IM-NEXT:    slli a3, a3, 8
+; RV32IM-NEXT:    and a2, a4, s4
+; RV32IM-NEXT:    srli a0, a0, 24
+; RV32IM-NEXT:    or a1, a1, a3
+; RV32IM-NEXT:    or a0, a2, a0
+; RV32IM-NEXT:    or a0, a1, a0
+; RV32IM-NEXT:    srli a1, a0, 4
+; RV32IM-NEXT:    and a0, a0, t2
+; RV32IM-NEXT:    and a1, a1, t2
+; RV32IM-NEXT:    slli a0, a0, 4
+; RV32IM-NEXT:    or a0, a1, a0
+; RV32IM-NEXT:    srli a1, a0, 2
+; RV32IM-NEXT:    and a0, a0, t3
+; RV32IM-NEXT:    and a1, a1, t3
+; RV32IM-NEXT:    slli a0, a0, 2
+; RV32IM-NEXT:    or a0, a1, a0
+; RV32IM-NEXT:    andi a1, a0, 85
+; RV32IM-NEXT:    srli a0, a0, 1
+; RV32IM-NEXT:    slli a1, a1, 1
+; RV32IM-NEXT:    andi a0, a0, 85
+; RV32IM-NEXT:    or a0, a0, a1
+; RV32IM-NEXT:    lw ra, 140(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s0, 136(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s1, 132(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s2, 128(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s3, 124(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s4, 120(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s5, 116(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s6, 112(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s7, 108(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s8, 104(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s9, 100(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s10, 96(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s11, 92(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    addi sp, sp, 144
+; RV32IM-NEXT:    ret
+;
+; RV64IM-LABEL: clmulr_i8:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    addi sp, sp, -480
+; RV64IM-NEXT:    sd ra, 472(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s0, 464(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s1, 456(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s2, 448(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s3, 440(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s4, 432(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s5, 424(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s6, 416(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s7, 408(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s8, 400(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s9, 392(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s10, 384(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s11, 376(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    srli a4, a0, 24
+; RV64IM-NEXT:    srli a6, a0, 8
+; RV64IM-NEXT:    li s4, 255
+; RV64IM-NEXT:    srli a5, a0, 40
+; RV64IM-NEXT:    lui s8, 16
+; RV64IM-NEXT:    srli t1, a0, 56
+; RV64IM-NEXT:    srliw t3, a0, 24
+; RV64IM-NEXT:    slli t4, a0, 56
+; RV64IM-NEXT:    lui s3, 61681
+; RV64IM-NEXT:    lui t5, 209715
+; RV64IM-NEXT:    lui s6, 349525
+; RV64IM-NEXT:    srli s9, a1, 24
+; RV64IM-NEXT:    srli s0, a1, 8
+; RV64IM-NEXT:    srli a7, a1, 40
+; RV64IM-NEXT:    srli t2, a1, 56
+; RV64IM-NEXT:    srliw s11, a1, 24
+; RV64IM-NEXT:    slli a3, a1, 56
+; RV64IM-NEXT:    li t0, 1
+; RV64IM-NEXT:    lui s1, 128
+; RV64IM-NEXT:    lui s2, 256
+; RV64IM-NEXT:    lui t6, 4096
+; RV64IM-NEXT:    lui s5, 8192
+; RV64IM-NEXT:    lui s7, 4080
+; RV64IM-NEXT:    and a2, a4, s7
+; RV64IM-NEXT:    slli ra, s4, 24
+; RV64IM-NEXT:    addi s10, s8, -256
+; RV64IM-NEXT:    and a4, a6, ra
+; RV64IM-NEXT:    sd ra, 368(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    or a2, a4, a2
+; RV64IM-NEXT:    and a4, a0, s7
+; RV64IM-NEXT:    slli t3, t3, 32
+; RV64IM-NEXT:    addi s3, s3, -241
+; RV64IM-NEXT:    addi s4, t5, 819
+; RV64IM-NEXT:    addi s6, s6, 1365
+; RV64IM-NEXT:    and a6, s9, s7
+; RV64IM-NEXT:    and a5, a5, s10
+; RV64IM-NEXT:    or a5, a5, t1
+; RV64IM-NEXT:    and t1, a1, s7
+; RV64IM-NEXT:    slli t5, s11, 32
+; RV64IM-NEXT:    slli a4, a4, 24
+; RV64IM-NEXT:    or s9, a4, t3
+; RV64IM-NEXT:    slli a4, s3, 32
+; RV64IM-NEXT:    add s3, s3, a4
+; RV64IM-NEXT:    slli a4, s4, 32
+; RV64IM-NEXT:    add s4, s4, a4
+; RV64IM-NEXT:    slli a4, s6, 32
+; RV64IM-NEXT:    add s6, s6, a4
+; RV64IM-NEXT:    slli t3, t0, 11
+; RV64IM-NEXT:    and a4, s0, ra
+; RV64IM-NEXT:    or a4, a4, a6
+; RV64IM-NEXT:    slli s11, t0, 32
+; RV64IM-NEXT:    and a6, a7, s10
+; RV64IM-NEXT:    or a6, a6, t2
+; RV64IM-NEXT:    slli ra, t0, 33
+; RV64IM-NEXT:    slli t1, t1, 24
+; RV64IM-NEXT:    or a7, t1, t5
+; RV64IM-NEXT:    slli s0, t0, 34
+; RV64IM-NEXT:    or a2, a2, a5
+; RV64IM-NEXT:    slli a5, t0, 35
+; RV64IM-NEXT:    sd a5, 304(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s10, 344(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a0, a0, s10
+; RV64IM-NEXT:    slli a0, a0, 40
+; RV64IM-NEXT:    or a0, t4, a0
+; RV64IM-NEXT:    slli a5, t0, 36
+; RV64IM-NEXT:    sd a5, 296(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    or a4, a4, a6
+; RV64IM-NEXT:    slli a6, t0, 37
+; RV64IM-NEXT:    and a1, a1, s10
+; RV64IM-NEXT:    slli a1, a1, 40
+; RV64IM-NEXT:    or a1, a3, a1
+; RV64IM-NEXT:    slli a3, t0, 38
+; RV64IM-NEXT:    sd a3, 280(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    or a0, a0, s9
+; RV64IM-NEXT:    or a1, a1, a7
+; RV64IM-NEXT:    or a0, a0, a2
+; RV64IM-NEXT:    or a1, a1, a4
+; RV64IM-NEXT:    srli a2, a0, 4
+; RV64IM-NEXT:    sd s3, 360(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a0, a0, s3
+; RV64IM-NEXT:    srli a3, a1, 4
+; RV64IM-NEXT:    and a1, a1, s3
+; RV64IM-NEXT:    and a2, a2, s3
+; RV64IM-NEXT:    slli a0, a0, 4
+; RV64IM-NEXT:    and a3, a3, s3
+; RV64IM-NEXT:    slli a1, a1, 4
+; RV64IM-NEXT:    or a0, a2, a0
+; RV64IM-NEXT:    or a1, a3, a1
+; RV64IM-NEXT:    srli a2, a0, 2
+; RV64IM-NEXT:    sd s4, 352(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a0, a0, s4
+; RV64IM-NEXT:    srli a3, a1, 2
+; RV64IM-NEXT:    and a1, a1, s4
+; RV64IM-NEXT:    and a2, a2, s4
+; RV64IM-NEXT:    slli a0, a0, 2
+; RV64IM-NEXT:    and a3, a3, s4
+; RV64IM-NEXT:    slli a1, a1, 2
+; RV64IM-NEXT:    or a0, a2, a0
+; RV64IM-NEXT:    or a1, a3, a1
+; RV64IM-NEXT:    srli a2, a0, 1
+; RV64IM-NEXT:    and a0, a0, s6
+; RV64IM-NEXT:    srli a3, a1, 1
+; RV64IM-NEXT:    and a1, a1, s6
+; RV64IM-NEXT:    and a2, a2, s6
+; RV64IM-NEXT:    slli a0, a0, 1
+; RV64IM-NEXT:    and a3, a3, s6
+; RV64IM-NEXT:    slli a1, a1, 1
+; RV64IM-NEXT:    or a0, a2, a0
+; RV64IM-NEXT:    or s6, a3, a1
+; RV64IM-NEXT:    andi a1, s6, 2
+; RV64IM-NEXT:    andi a2, s6, 1
+; RV64IM-NEXT:    andi a3, s6, 4
+; RV64IM-NEXT:    andi a4, s6, 8
+; RV64IM-NEXT:    andi a5, s6, 16
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    xor a1, a2, a1
+; RV64IM-NEXT:    sd a1, 336(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    andi a1, s6, 32
+; RV64IM-NEXT:    mul a2, a0, a3
+; RV64IM-NEXT:    mul a3, a0, a4
+; RV64IM-NEXT:    xor a2, a2, a3
+; RV64IM-NEXT:    sd a2, 328(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    andi a2, s6, 256
+; RV64IM-NEXT:    mul a3, a0, a5
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    xor a1, a3, a1
+; RV64IM-NEXT:    sd a1, 320(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    andi a1, s6, 512
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    xor a1, a2, a1
+; RV64IM-NEXT:    sd a1, 312(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a7, t0, 39
+; RV64IM-NEXT:    lui a1, 2
+; RV64IM-NEXT:    and a1, s6, a1
+; RV64IM-NEXT:    lui a2, 4
+; RV64IM-NEXT:    and a2, s6, a2
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    xor a1, a1, a2
+; RV64IM-NEXT:    sd a1, 288(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, t0, 40
+; RV64IM-NEXT:    and a2, s6, s1
+; RV64IM-NEXT:    and a3, s6, s2
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    mul a3, a0, a3
+; RV64IM-NEXT:    xor a2, a2, a3
+; RV64IM-NEXT:    sd a2, 272(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a2, t0, 41
+; RV64IM-NEXT:    and a3, s6, t6
+; RV64IM-NEXT:    and a4, s6, s5
+; RV64IM-NEXT:    mul a3, a0, a3
+; RV64IM-NEXT:    mul a4, a0, a4
+; RV64IM-NEXT:    xor a3, a3, a4
+; RV64IM-NEXT:    sd a3, 264(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a3, t0, 48
+; RV64IM-NEXT:    and a4, s6, s11
+; RV64IM-NEXT:    and a5, s6, ra
+; RV64IM-NEXT:    mul a4, a0, a4
+; RV64IM-NEXT:    mul a5, a0, a5
+; RV64IM-NEXT:    xor a4, a4, a5
+; RV64IM-NEXT:    sd a4, 256(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a4, t0, 49
+; RV64IM-NEXT:    and a1, s6, a1
+; RV64IM-NEXT:    and a2, s6, a2
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    xor a1, a1, a2
+; RV64IM-NEXT:    sd a1, 248(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, t0, 56
+; RV64IM-NEXT:    and a2, s6, a3
+; RV64IM-NEXT:    and a3, s6, a4
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    mul a3, a0, a3
+; RV64IM-NEXT:    xor a2, a2, a3
+; RV64IM-NEXT:    sd a2, 240(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a2, t0, 57
+; RV64IM-NEXT:    and a1, s6, a1
+; RV64IM-NEXT:    and a2, s6, a2
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    xor a1, a1, a2
+; RV64IM-NEXT:    sd a1, 232(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a2, t0, 42
+; RV64IM-NEXT:    slli ra, t0, 43
+; RV64IM-NEXT:    slli a4, t0, 44
+; RV64IM-NEXT:    slli t6, t0, 45
+; RV64IM-NEXT:    slli s1, t0, 46
+; RV64IM-NEXT:    slli s2, t0, 47
+; RV64IM-NEXT:    slli s3, t0, 50
+; RV64IM-NEXT:    slli s4, t0, 51
+; RV64IM-NEXT:    slli s5, t0, 52
+; RV64IM-NEXT:    slli a1, t0, 53
+; RV64IM-NEXT:    sd a1, 224(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, t0, 54
+; RV64IM-NEXT:    sd a1, 216(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, t0, 55
+; RV64IM-NEXT:    sd a1, 208(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, t0, 58
+; RV64IM-NEXT:    sd a1, 200(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, t0, 59
+; RV64IM-NEXT:    sd a1, 176(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, t0, 60
+; RV64IM-NEXT:    sd a1, 136(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, t0, 61
+; RV64IM-NEXT:    sd a1, 104(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli t0, t0, 62
+; RV64IM-NEXT:    sd t0, 80(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a1, s6, t3
+; RV64IM-NEXT:    sd a1, 192(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui a3, 1
+; RV64IM-NEXT:    and a1, s6, a3
+; RV64IM-NEXT:    sd a1, 184(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui a3, 8
+; RV64IM-NEXT:    and a1, s6, a3
+; RV64IM-NEXT:    sd a1, 168(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a1, s6, s8
+; RV64IM-NEXT:    sd a1, 160(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui s9, 32
+; RV64IM-NEXT:    and a1, s6, s9
+; RV64IM-NEXT:    sd a1, 152(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui s11, 64
+; RV64IM-NEXT:    and a1, s6, s11
+; RV64IM-NEXT:    sd a1, 144(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui s10, 512
+; RV64IM-NEXT:    and a1, s6, s10
+; RV64IM-NEXT:    sd a1, 128(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui s8, 1024
+; RV64IM-NEXT:    and a1, s6, s8
+; RV64IM-NEXT:    sd a1, 120(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui s7, 2048
+; RV64IM-NEXT:    and a1, s6, s7
+; RV64IM-NEXT:    sd a1, 112(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui t1, 16384
+; RV64IM-NEXT:    and a1, s6, t1
+; RV64IM-NEXT:    sd a1, 96(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui t2, 32768
+; RV64IM-NEXT:    and t2, s6, t2
+; RV64IM-NEXT:    lui t3, 65536
+; RV64IM-NEXT:    and a1, s6, t3
+; RV64IM-NEXT:    sd a1, 88(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui t4, 131072
+; RV64IM-NEXT:    and a5, s6, t4
+; RV64IM-NEXT:    lui t5, 262144
+; RV64IM-NEXT:    and t0, s6, t5
+; RV64IM-NEXT:    and s11, s6, s0
+; RV64IM-NEXT:    ld a1, 304(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a1, s6, a1
+; RV64IM-NEXT:    sd a1, 72(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 296(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a1, s6, a1
+; RV64IM-NEXT:    sd a1, 64(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a1, s6, a6
+; RV64IM-NEXT:    sd a1, 56(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 280(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a1, s6, a1
+; RV64IM-NEXT:    sd a1, 280(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a1, s6, a7
+; RV64IM-NEXT:    sd a1, 48(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a1, s6, a2
+; RV64IM-NEXT:    sd a1, 40(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and ra, s6, ra
+; RV64IM-NEXT:    and a1, s6, a4
+; RV64IM-NEXT:    sd a1, 32(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a1, s6, t6
+; RV64IM-NEXT:    sd a1, 24(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a1, s6, s1
+; RV64IM-NEXT:    sd a1, 16(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a1, s6, s2
+; RV64IM-NEXT:    sd a1, 8(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a1, s6, s3
+; RV64IM-NEXT:    sd a1, 0(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and s0, s6, s4
+; RV64IM-NEXT:    and s1, s6, s5
+; RV64IM-NEXT:    ld a1, 224(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s2, s6, a1
+; RV64IM-NEXT:    ld a1, 216(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s3, s6, a1
+; RV64IM-NEXT:    ld a1, 208(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s4, s6, a1
+; RV64IM-NEXT:    ld a1, 200(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s5, s6, a1
+; RV64IM-NEXT:    ld a1, 176(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s7, s6, a1
+; RV64IM-NEXT:    ld a1, 136(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s8, s6, a1
+; RV64IM-NEXT:    ld a1, 104(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s9, s6, a1
+; RV64IM-NEXT:    ld a1, 80(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s10, s6, a1
+; RV64IM-NEXT:    andi a1, s6, 64
+; RV64IM-NEXT:    andi a2, s6, 128
+; RV64IM-NEXT:    andi a3, s6, 1024
+; RV64IM-NEXT:    srliw a4, s6, 31
+; RV64IM-NEXT:    srli s6, s6, 63
+; RV64IM-NEXT:    mul t4, a0, a1
+; RV64IM-NEXT:    mul a1, a0, a2
+; RV64IM-NEXT:    sd a1, 224(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul t3, a0, a3
+; RV64IM-NEXT:    ld a1, 192(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 136(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 184(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 216(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 168(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul t1, a0, a1
+; RV64IM-NEXT:    ld a1, 160(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 104(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 152(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 192(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 144(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 304(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 128(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a7, a0, a1
+; RV64IM-NEXT:    ld a1, 120(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul t6, a0, a1
+; RV64IM-NEXT:    ld a1, 112(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 176(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 96(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a6, a0, a1
+; RV64IM-NEXT:    mul t5, a0, t2
+; RV64IM-NEXT:    ld a1, 88(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 160(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul a1, a0, a5
+; RV64IM-NEXT:    sd a1, 184(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul a1, a0, t0
+; RV64IM-NEXT:    sd a1, 296(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a4, a4, 31
+; RV64IM-NEXT:    mul a3, a0, s11
+; RV64IM-NEXT:    ld a1, 72(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul t0, a0, a1
+; RV64IM-NEXT:    ld a1, 64(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 144(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 56(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 152(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 280(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 208(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 48(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 280(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 40(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a2, a0, a1
+; RV64IM-NEXT:    mul a5, a0, ra
+; RV64IM-NEXT:    ld a1, 32(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul t2, a0, a1
+; RV64IM-NEXT:    ld a1, 24(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul s11, a0, a1
+; RV64IM-NEXT:    ld a1, 16(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul ra, a0, a1
+; RV64IM-NEXT:    ld a1, 8(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 200(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 0(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    mul s0, a0, s0
+; RV64IM-NEXT:    mul s1, a0, s1
+; RV64IM-NEXT:    mul s2, a0, s2
+; RV64IM-NEXT:    mul s3, a0, s3
+; RV64IM-NEXT:    mul s4, a0, s4
+; RV64IM-NEXT:    sd s4, 168(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul s5, a0, s5
+; RV64IM-NEXT:    mul s7, a0, s7
+; RV64IM-NEXT:    mul s8, a0, s8
+; RV64IM-NEXT:    mul s9, a0, s9
+; RV64IM-NEXT:    mul s10, a0, s10
+; RV64IM-NEXT:    slli s6, s6, 63
+; RV64IM-NEXT:    mul a4, a0, a4
+; RV64IM-NEXT:    mul a0, a0, s6
+; RV64IM-NEXT:    ld s6, 336(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s4, 328(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor s6, s6, s4
+; RV64IM-NEXT:    ld s4, 320(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t4, s4, t4
+; RV64IM-NEXT:    ld s4, 312(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t3, s4, t3
+; RV64IM-NEXT:    ld s4, 288(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t1, s4, t1
+; RV64IM-NEXT:    ld s4, 272(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a7, s4, a7
+; RV64IM-NEXT:    ld s4, 264(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a6, s4, a6
+; RV64IM-NEXT:    ld s4, 256(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a3, s4, a3
+; RV64IM-NEXT:    ld s4, 248(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a2, s4, a2
+; RV64IM-NEXT:    ld s4, 240(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a1, s4, a1
+; RV64IM-NEXT:    ld s4, 232(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor s5, s4, s5
+; RV64IM-NEXT:    xor t4, s6, t4
+; RV64IM-NEXT:    ld s4, 136(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t3, t3, s4
+; RV64IM-NEXT:    ld s4, 104(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t1, t1, s4
+; RV64IM-NEXT:    xor a7, a7, t6
+; RV64IM-NEXT:    xor a6, a6, t5
+; RV64IM-NEXT:    xor a3, a3, t0
+; RV64IM-NEXT:    xor a2, a2, a5
+; RV64IM-NEXT:    xor a1, a1, s0
+; RV64IM-NEXT:    xor a5, s5, s7
+; RV64IM-NEXT:    ld t0, 224(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t0, t4, t0
+; RV64IM-NEXT:    ld t4, 216(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t3, t3, t4
+; RV64IM-NEXT:    ld t4, 192(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t1, t1, t4
+; RV64IM-NEXT:    ld t4, 176(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a7, a7, t4
+; RV64IM-NEXT:    ld t4, 160(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a6, a6, t4
+; RV64IM-NEXT:    ld t4, 144(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a3, a3, t4
+; RV64IM-NEXT:    xor a2, a2, t2
+; RV64IM-NEXT:    xor a1, a1, s1
+; RV64IM-NEXT:    xor a5, a5, s8
+; RV64IM-NEXT:    ld t2, 304(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t1, t1, t2
+; RV64IM-NEXT:    ld t2, 184(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a6, a6, t2
+; RV64IM-NEXT:    ld t2, 152(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a3, a3, t2
+; RV64IM-NEXT:    xor a2, a2, s11
+; RV64IM-NEXT:    xor a1, a1, s2
+; RV64IM-NEXT:    xor a5, a5, s9
+; RV64IM-NEXT:    xor t2, t0, t3
+; RV64IM-NEXT:    xor t1, t2, t1
+; RV64IM-NEXT:    ld t2, 296(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a6, a6, t2
+; RV64IM-NEXT:    ld t2, 208(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a3, a3, t2
+; RV64IM-NEXT:    xor a2, a2, ra
+; RV64IM-NEXT:    xor a1, a1, s3
+; RV64IM-NEXT:    xor a5, a5, s10
+; RV64IM-NEXT:    xor a7, t1, a7
+; RV64IM-NEXT:    xor a4, a6, a4
+; RV64IM-NEXT:    ld a6, 280(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a3, a3, a6
+; RV64IM-NEXT:    ld a6, 200(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a2, a2, a6
+; RV64IM-NEXT:    slli t0, t0, 56
+; RV64IM-NEXT:    ld a6, 168(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a1, a1, a6
+; RV64IM-NEXT:    xor a0, a5, a0
+; RV64IM-NEXT:    ld t1, 344(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a5, a7, t1
+; RV64IM-NEXT:    xor a4, a7, a4
+; RV64IM-NEXT:    slli a5, a5, 40
+; RV64IM-NEXT:    xor a3, a4, a3
+; RV64IM-NEXT:    or a4, t0, a5
+; RV64IM-NEXT:    lui a7, 4080
+; RV64IM-NEXT:    and a5, a3, a7
+; RV64IM-NEXT:    xor a2, a3, a2
+; RV64IM-NEXT:    srli a3, a3, 8
+; RV64IM-NEXT:    slli a5, a5, 24
+; RV64IM-NEXT:    xor a1, a2, a1
+; RV64IM-NEXT:    ld a6, 368(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a3, a3, a6
+; RV64IM-NEXT:    srli a2, a2, 24
+; RV64IM-NEXT:    srliw a6, a1, 24
+; RV64IM-NEXT:    and a2, a2, a7
+; RV64IM-NEXT:    srli a7, a1, 40
+; RV64IM-NEXT:    xor a0, a1, a0
+; RV64IM-NEXT:    slli a6, a6, 32
+; RV64IM-NEXT:    or a2, a3, a2
+; RV64IM-NEXT:    and a1, a7, t1
+; RV64IM-NEXT:    srli a0, a0, 56
+; RV64IM-NEXT:    or a3, a5, a6
+; RV64IM-NEXT:    or a0, a1, a0
+; RV64IM-NEXT:    or a3, a4, a3
+; RV64IM-NEXT:    or a0, a2, a0
+; RV64IM-NEXT:    or a0, a3, a0
+; RV64IM-NEXT:    srli a1, a0, 4
+; RV64IM-NEXT:    ld a2, 360(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a0, a0, a2
+; RV64IM-NEXT:    and a1, a1, a2
+; RV64IM-NEXT:    slli a0, a0, 4
+; RV64IM-NEXT:    or a0, a1, a0
+; RV64IM-NEXT:    srli a1, a0, 2
+; RV64IM-NEXT:    ld a2, 352(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a0, a0, a2
+; RV64IM-NEXT:    and a1, a1, a2
+; RV64IM-NEXT:    slli a0, a0, 2
+; RV64IM-NEXT:    or a0, a1, a0
+; RV64IM-NEXT:    andi a1, a0, 85
+; RV64IM-NEXT:    srli a0, a0, 1
+; RV64IM-NEXT:    slli a1, a1, 1
+; RV64IM-NEXT:    andi a0, a0, 85
+; RV64IM-NEXT:    or a0, a0, a1
+; RV64IM-NEXT:    ld ra, 472(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s0, 464(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s1, 456(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s2, 448(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s3, 440(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s4, 432(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s5, 424(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s6, 416(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s7, 408(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s8, 400(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s9, 392(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s10, 384(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s11, 376(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    addi sp, sp, 480
+; RV64IM-NEXT:    ret
+  %a.ext = zext i8 %a to i16
+  %b.ext = zext i8 %b to i16
+  %clmul = call i16 @llvm.clmul.i16(i16 %a.ext, i16 %b.ext)
+  %res.ext = lshr i16 %clmul, 7
+  %res = trunc i16 %res.ext to i8
+  ret i8 %res
+}
+
+define i16 @clmulr_i16(i16 %a, i16 %b) nounwind {
+; RV32IM-LABEL: clmulr_i16:
+; RV32IM:       # %bb.0:
+; RV32IM-NEXT:    addi sp, sp, -144
+; RV32IM-NEXT:    sw ra, 140(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s0, 136(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s1, 132(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s2, 128(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s3, 124(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s4, 120(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s5, 116(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s6, 112(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s7, 108(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s8, 104(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s9, 100(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s10, 96(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s11, 92(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    srli t0, a0, 8
+; RV32IM-NEXT:    lui ra, 16
+; RV32IM-NEXT:    srli t1, a0, 24
+; RV32IM-NEXT:    slli a2, a0, 24
+; RV32IM-NEXT:    lui s6, 61681
+; RV32IM-NEXT:    lui t3, 209715
+; RV32IM-NEXT:    lui a4, 349525
+; RV32IM-NEXT:    srli t4, a1, 8
+; RV32IM-NEXT:    srli t5, a1, 24
+; RV32IM-NEXT:    slli a5, a1, 24
+; RV32IM-NEXT:    li t6, 1
+; RV32IM-NEXT:    lui a7, 2
+; RV32IM-NEXT:    lui a6, 4
+; RV32IM-NEXT:    lui t2, 8
+; RV32IM-NEXT:    lui s1, 32
+; RV32IM-NEXT:    lui s0, 64
+; RV32IM-NEXT:    lui s3, 128
+; RV32IM-NEXT:    lui s4, 256
+; RV32IM-NEXT:    lui s5, 512
+; RV32IM-NEXT:    lui s8, 1024
+; RV32IM-NEXT:    lui s7, 2048
+; RV32IM-NEXT:    lui s9, 4096
+; RV32IM-NEXT:    lui s10, 8192
+; RV32IM-NEXT:    lui s11, 16384
+; RV32IM-NEXT:    addi s2, ra, -256
+; RV32IM-NEXT:    sw s2, 88(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and t0, t0, s2
+; RV32IM-NEXT:    or t1, t0, t1
+; RV32IM-NEXT:    lui a3, 32768
+; RV32IM-NEXT:    and t4, t4, s2
+; RV32IM-NEXT:    or t5, t4, t5
+; RV32IM-NEXT:    lui t0, 65536
+; RV32IM-NEXT:    and a0, a0, s2
+; RV32IM-NEXT:    slli a0, a0, 8
+; RV32IM-NEXT:    or a2, a2, a0
+; RV32IM-NEXT:    lui t4, 131072
+; RV32IM-NEXT:    and a1, a1, s2
+; RV32IM-NEXT:    slli a1, a1, 8
+; RV32IM-NEXT:    or a0, a5, a1
+; RV32IM-NEXT:    lui a5, 262144
+; RV32IM-NEXT:    addi s2, s6, -241
+; RV32IM-NEXT:    addi s6, t3, 819
+; RV32IM-NEXT:    addi a4, a4, 1365
+; RV32IM-NEXT:    or a2, a2, t1
+; RV32IM-NEXT:    or a0, a0, t5
+; RV32IM-NEXT:    srli t1, a2, 4
+; RV32IM-NEXT:    and a2, a2, s2
+; RV32IM-NEXT:    srli t5, a0, 4
+; RV32IM-NEXT:    and a0, a0, s2
+; RV32IM-NEXT:    and t1, t1, s2
+; RV32IM-NEXT:    slli a2, a2, 4
+; RV32IM-NEXT:    and t5, t5, s2
+; RV32IM-NEXT:    slli a0, a0, 4
+; RV32IM-NEXT:    or a2, t1, a2
+; RV32IM-NEXT:    or a0, t5, a0
+; RV32IM-NEXT:    srli t1, a2, 2
+; RV32IM-NEXT:    and a2, a2, s6
+; RV32IM-NEXT:    srli t5, a0, 2
+; RV32IM-NEXT:    and a0, a0, s6
+; RV32IM-NEXT:    and t1, t1, s6
+; RV32IM-NEXT:    slli a2, a2, 2
+; RV32IM-NEXT:    and t5, t5, s6
+; RV32IM-NEXT:    slli a0, a0, 2
+; RV32IM-NEXT:    or a2, t1, a2
+; RV32IM-NEXT:    or a0, t5, a0
+; RV32IM-NEXT:    srli t1, a2, 1
+; RV32IM-NEXT:    and a2, a2, a4
+; RV32IM-NEXT:    srli t5, a0, 1
+; RV32IM-NEXT:    and a0, a0, a4
+; RV32IM-NEXT:    and t1, t1, a4
+; RV32IM-NEXT:    and t5, t5, a4
+; RV32IM-NEXT:    lui a1, 524288
+; RV32IM-NEXT:    slli t6, t6, 11
+; RV32IM-NEXT:    slli a2, a2, 1
+; RV32IM-NEXT:    slli a0, a0, 1
+; RV32IM-NEXT:    or a4, t1, a2
+; RV32IM-NEXT:    or a0, t5, a0
+; RV32IM-NEXT:    andi t3, a0, 2
+; RV32IM-NEXT:    andi t5, a0, 1
+; RV32IM-NEXT:    and t6, a0, t6
+; RV32IM-NEXT:    lui a2, 1
+; RV32IM-NEXT:    and a2, a0, a2
+; RV32IM-NEXT:    sw a2, 84(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a2, a0, a7
+; RV32IM-NEXT:    sw a2, 76(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a2, a0, a6
+; RV32IM-NEXT:    sw a2, 72(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and t2, a0, t2
+; RV32IM-NEXT:    and ra, a0, ra
+; RV32IM-NEXT:    and s1, a0, s1
+; RV32IM-NEXT:    sw s1, 68(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and s0, a0, s0
+; RV32IM-NEXT:    and s3, a0, s3
+; RV32IM-NEXT:    and a2, a0, s4
+; RV32IM-NEXT:    sw a2, 64(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a2, a0, s5
+; RV32IM-NEXT:    sw a2, 60(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a2, a0, s8
+; RV32IM-NEXT:    sw a2, 56(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a2, a0, s7
+; RV32IM-NEXT:    sw a2, 52(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a2, a0, s9
+; RV32IM-NEXT:    sw a2, 48(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a2, a0, s10
+; RV32IM-NEXT:    sw a2, 44(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a2, a0, s11
+; RV32IM-NEXT:    sw a2, 40(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a3, a0, a3
+; RV32IM-NEXT:    sw a3, 36(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a2, a0, t0
+; RV32IM-NEXT:    sw a2, 32(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a2, a0, t4
+; RV32IM-NEXT:    sw a2, 28(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a5, a0, a5
+; RV32IM-NEXT:    sw a5, 24(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a1, a0, a1
+; RV32IM-NEXT:    sw a1, 20(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    andi a1, a0, 4
+; RV32IM-NEXT:    andi a2, a0, 8
+; RV32IM-NEXT:    andi a3, a0, 16
+; RV32IM-NEXT:    andi a5, a0, 32
+; RV32IM-NEXT:    andi a6, a0, 64
+; RV32IM-NEXT:    andi a7, a0, 128
+; RV32IM-NEXT:    andi t0, a0, 256
+; RV32IM-NEXT:    andi t1, a0, 512
+; RV32IM-NEXT:    andi a0, a0, 1024
+; RV32IM-NEXT:    mul s11, a4, t3
+; RV32IM-NEXT:    mul s9, a4, t5
+; RV32IM-NEXT:    mul s8, a4, a1
+; RV32IM-NEXT:    mul s4, a4, a2
+; RV32IM-NEXT:    mul s5, a4, a3
+; RV32IM-NEXT:    mul s1, a4, a5
+; RV32IM-NEXT:    mul a1, a4, a6
+; RV32IM-NEXT:    sw a1, 8(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul a1, a4, a7
+; RV32IM-NEXT:    sw a1, 80(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul t5, a4, t0
+; RV32IM-NEXT:    mul t3, a4, t1
+; RV32IM-NEXT:    mul s10, a4, a0
+; RV32IM-NEXT:    mul a0, a4, t6
+; RV32IM-NEXT:    sw a0, 12(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw a0, 84(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a0, a4, a0
+; RV32IM-NEXT:    sw a0, 16(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw a0, 76(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul t1, a4, a0
+; RV32IM-NEXT:    lw a0, 72(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a7, a4, a0
+; RV32IM-NEXT:    mul t6, a4, t2
+; RV32IM-NEXT:    mul s7, a4, ra
+; RV32IM-NEXT:    lw a0, 68(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a0, a4, a0
+; RV32IM-NEXT:    sw a0, 72(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul a0, a4, s0
+; RV32IM-NEXT:    sw a0, 76(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul a3, a4, s3
+; RV32IM-NEXT:    lw a0, 64(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a2, a4, a0
+; RV32IM-NEXT:    lw a0, 60(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a6, a4, a0
+; RV32IM-NEXT:    lw a0, 56(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul t2, a4, a0
+; RV32IM-NEXT:    lw a0, 52(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul s3, a4, a0
+; RV32IM-NEXT:    lw a0, 48(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a1, a4, a0
+; RV32IM-NEXT:    lw a0, 44(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a0, a4, a0
+; RV32IM-NEXT:    lw a5, 40(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a5, a4, a5
+; RV32IM-NEXT:    lw t0, 36(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul t0, a4, t0
+; RV32IM-NEXT:    lw t4, 32(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul t4, a4, t4
+; RV32IM-NEXT:    lw s0, 28(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul s0, a4, s0
+; RV32IM-NEXT:    lw ra, 24(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul ra, a4, ra
+; RV32IM-NEXT:    sw ra, 84(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw ra, 20(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a4, a4, ra
+; RV32IM-NEXT:    xor s9, s9, s11
+; RV32IM-NEXT:    xor s4, s8, s4
+; RV32IM-NEXT:    xor s1, s5, s1
+; RV32IM-NEXT:    xor t3, t5, t3
+; RV32IM-NEXT:    xor a7, t1, a7
+; RV32IM-NEXT:    xor a2, a3, a2
+; RV32IM-NEXT:    xor a0, a1, a0
+; RV32IM-NEXT:    xor a1, s9, s4
+; RV32IM-NEXT:    lw a3, 8(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a3, s1, a3
+; RV32IM-NEXT:    xor t1, t3, s10
+; RV32IM-NEXT:    xor a7, a7, t6
+; RV32IM-NEXT:    xor a2, a2, a6
+; RV32IM-NEXT:    xor a0, a0, a5
+; RV32IM-NEXT:    xor a1, a1, a3
+; RV32IM-NEXT:    lw a3, 12(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a3, t1, a3
+; RV32IM-NEXT:    xor a5, a7, s7
+; RV32IM-NEXT:    xor a2, a2, t2
+; RV32IM-NEXT:    xor a0, a0, t0
+; RV32IM-NEXT:    lw a6, 80(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a1, a1, a6
+; RV32IM-NEXT:    lw a6, 16(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a3, a3, a6
+; RV32IM-NEXT:    lw a6, 72(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a5, a5, a6
+; RV32IM-NEXT:    xor a2, a2, s3
+; RV32IM-NEXT:    xor a0, a0, t4
+; RV32IM-NEXT:    lw a6, 76(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a5, a5, a6
+; RV32IM-NEXT:    xor a0, a0, s0
+; RV32IM-NEXT:    xor a3, a1, a3
+; RV32IM-NEXT:    xor a3, a3, a5
+; RV32IM-NEXT:    lui a5, 5
+; RV32IM-NEXT:    addi a5, a5, 1365
+; RV32IM-NEXT:    slli a1, a1, 24
+; RV32IM-NEXT:    lw a6, 84(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a0, a0, a6
+; RV32IM-NEXT:    xor a2, a3, a2
+; RV32IM-NEXT:    xor a0, a0, a4
+; RV32IM-NEXT:    lw a6, 88(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    and a3, a2, a6
+; RV32IM-NEXT:    srli a4, a2, 8
+; RV32IM-NEXT:    xor a0, a2, a0
+; RV32IM-NEXT:    slli a3, a3, 8
+; RV32IM-NEXT:    and a2, a4, a6
+; RV32IM-NEXT:    srli a0, a0, 24
+; RV32IM-NEXT:    or a1, a1, a3
+; RV32IM-NEXT:    or a0, a2, a0
+; RV32IM-NEXT:    or a0, a1, a0
+; RV32IM-NEXT:    srli a1, a0, 4
+; RV32IM-NEXT:    and a0, a0, s2
+; RV32IM-NEXT:    and a1, a1, s2
+; RV32IM-NEXT:    slli a0, a0, 4
+; RV32IM-NEXT:    or a0, a1, a0
+; RV32IM-NEXT:    srli a1, a0, 2
+; RV32IM-NEXT:    and a0, a0, s6
+; RV32IM-NEXT:    and a1, a1, s6
+; RV32IM-NEXT:    slli a0, a0, 2
+; RV32IM-NEXT:    or a0, a1, a0
+; RV32IM-NEXT:    srli a1, a0, 1
+; RV32IM-NEXT:    and a0, a0, a5
+; RV32IM-NEXT:    and a1, a1, a5
+; RV32IM-NEXT:    slli a0, a0, 1
+; RV32IM-NEXT:    or a0, a1, a0
+; RV32IM-NEXT:    lw ra, 140(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s0, 136(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s1, 132(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s2, 128(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s3, 124(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s4, 120(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s5, 116(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s6, 112(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s7, 108(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s8, 104(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s9, 100(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s10, 96(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s11, 92(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    addi sp, sp, 144
+; RV32IM-NEXT:    ret
+;
+; RV64IM-LABEL: clmulr_i16:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    addi sp, sp, -480
+; RV64IM-NEXT:    sd ra, 472(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s0, 464(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s1, 456(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s2, 448(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s3, 440(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s4, 432(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s5, 424(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s6, 416(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s7, 408(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s8, 400(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s9, 392(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s10, 384(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s11, 376(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    srli a3, a0, 24
+; RV64IM-NEXT:    srli a7, a0, 8
+; RV64IM-NEXT:    li s4, 255
+; RV64IM-NEXT:    srli a4, a0, 40
+; RV64IM-NEXT:    lui s10, 16
+; RV64IM-NEXT:    srli t1, a0, 56
+; RV64IM-NEXT:    srliw t4, a0, 24
+; RV64IM-NEXT:    slli a5, a0, 56
+; RV64IM-NEXT:    lui s3, 61681
+; RV64IM-NEXT:    lui t5, 209715
+; RV64IM-NEXT:    lui s6, 349525
+; RV64IM-NEXT:    srli s9, a1, 24
+; RV64IM-NEXT:    srli s0, a1, 8
+; RV64IM-NEXT:    srli ra, a1, 40
+; RV64IM-NEXT:    srli t2, a1, 56
+; RV64IM-NEXT:    srliw s11, a1, 24
+; RV64IM-NEXT:    slli a6, a1, 56
+; RV64IM-NEXT:    li t0, 1
+; RV64IM-NEXT:    lui s1, 128
+; RV64IM-NEXT:    lui s2, 256
+; RV64IM-NEXT:    lui t6, 4096
+; RV64IM-NEXT:    lui s5, 8192
+; RV64IM-NEXT:    lui s7, 4080
+; RV64IM-NEXT:    and a2, a3, s7
+; RV64IM-NEXT:    slli t3, s4, 24
+; RV64IM-NEXT:    addi s8, s10, -256
+; RV64IM-NEXT:    and a3, a7, t3
+; RV64IM-NEXT:    sd t3, 368(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    or a2, a3, a2
+; RV64IM-NEXT:    and a3, a0, s7
+; RV64IM-NEXT:    slli t4, t4, 32
+; RV64IM-NEXT:    addi s3, s3, -241
+; RV64IM-NEXT:    addi s4, t5, 819
+; RV64IM-NEXT:    addi s6, s6, 1365
+; RV64IM-NEXT:    and a7, s9, s7
+; RV64IM-NEXT:    and a4, a4, s8
+; RV64IM-NEXT:    or a4, a4, t1
+; RV64IM-NEXT:    and t1, a1, s7
+; RV64IM-NEXT:    slli t5, s11, 32
+; RV64IM-NEXT:    slli a3, a3, 24
+; RV64IM-NEXT:    or s9, a3, t4
+; RV64IM-NEXT:    slli a3, s3, 32
+; RV64IM-NEXT:    add s3, s3, a3
+; RV64IM-NEXT:    slli a3, s4, 32
+; RV64IM-NEXT:    add s4, s4, a3
+; RV64IM-NEXT:    slli a3, s6, 32
+; RV64IM-NEXT:    add s6, s6, a3
+; RV64IM-NEXT:    slli t4, t0, 11
+; RV64IM-NEXT:    and a3, s0, t3
+; RV64IM-NEXT:    or a3, a3, a7
+; RV64IM-NEXT:    slli s11, t0, 32
+; RV64IM-NEXT:    and a7, ra, s8
+; RV64IM-NEXT:    or a7, a7, t2
+; RV64IM-NEXT:    slli ra, t0, 33
+; RV64IM-NEXT:    slli t1, t1, 24
+; RV64IM-NEXT:    or t1, t1, t5
+; RV64IM-NEXT:    slli s0, t0, 34
+; RV64IM-NEXT:    or a2, a2, a4
+; RV64IM-NEXT:    slli a4, t0, 35
+; RV64IM-NEXT:    sd a4, 304(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a0, a0, s8
+; RV64IM-NEXT:    sd s8, 344(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a0, a0, 40
+; RV64IM-NEXT:    or a0, a5, a0
+; RV64IM-NEXT:    slli a4, t0, 36
+; RV64IM-NEXT:    sd a4, 296(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    or a3, a3, a7
+; RV64IM-NEXT:    slli a7, t0, 37
+; RV64IM-NEXT:    and a1, a1, s8
+; RV64IM-NEXT:    slli a1, a1, 40
+; RV64IM-NEXT:    or a1, a6, a1
+; RV64IM-NEXT:    slli a6, t0, 38
+; RV64IM-NEXT:    or a0, a0, s9
+; RV64IM-NEXT:    or a1, a1, t1
+; RV64IM-NEXT:    or a0, a0, a2
+; RV64IM-NEXT:    or a1, a1, a3
+; RV64IM-NEXT:    srli a2, a0, 4
+; RV64IM-NEXT:    sd s3, 360(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a0, a0, s3
+; RV64IM-NEXT:    srli a3, a1, 4
+; RV64IM-NEXT:    and a1, a1, s3
+; RV64IM-NEXT:    and a2, a2, s3
+; RV64IM-NEXT:    slli a0, a0, 4
+; RV64IM-NEXT:    and a3, a3, s3
+; RV64IM-NEXT:    slli a1, a1, 4
+; RV64IM-NEXT:    or a0, a2, a0
+; RV64IM-NEXT:    or a1, a3, a1
+; RV64IM-NEXT:    srli a2, a0, 2
+; RV64IM-NEXT:    sd s4, 352(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a0, a0, s4
+; RV64IM-NEXT:    srli a3, a1, 2
+; RV64IM-NEXT:    and a1, a1, s4
+; RV64IM-NEXT:    and a2, a2, s4
+; RV64IM-NEXT:    slli a0, a0, 2
+; RV64IM-NEXT:    and a3, a3, s4
+; RV64IM-NEXT:    slli a1, a1, 2
+; RV64IM-NEXT:    or a0, a2, a0
+; RV64IM-NEXT:    or a1, a3, a1
+; RV64IM-NEXT:    srli a2, a0, 1
+; RV64IM-NEXT:    and a0, a0, s6
+; RV64IM-NEXT:    srli a3, a1, 1
+; RV64IM-NEXT:    and a1, a1, s6
+; RV64IM-NEXT:    and a2, a2, s6
+; RV64IM-NEXT:    slli a0, a0, 1
+; RV64IM-NEXT:    and a3, a3, s6
+; RV64IM-NEXT:    slli a1, a1, 1
+; RV64IM-NEXT:    or a0, a2, a0
+; RV64IM-NEXT:    or s6, a3, a1
+; RV64IM-NEXT:    andi a1, s6, 2
+; RV64IM-NEXT:    andi a2, s6, 1
+; RV64IM-NEXT:    andi a3, s6, 4
+; RV64IM-NEXT:    andi a4, s6, 8
+; RV64IM-NEXT:    andi a5, s6, 16
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    xor a1, a2, a1
+; RV64IM-NEXT:    sd a1, 336(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    andi a1, s6, 32
+; RV64IM-NEXT:    mul a2, a0, a3
+; RV64IM-NEXT:    mul a3, a0, a4
+; RV64IM-NEXT:    xor a2, a2, a3
+; RV64IM-NEXT:    sd a2, 328(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    andi a2, s6, 256
+; RV64IM-NEXT:    mul a3, a0, a5
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    xor a1, a3, a1
+; RV64IM-NEXT:    sd a1, 320(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    andi a1, s6, 512
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    xor a1, a2, a1
+; RV64IM-NEXT:    sd a1, 312(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli s3, t0, 39
+; RV64IM-NEXT:    lui a1, 2
+; RV64IM-NEXT:    and a1, s6, a1
+; RV64IM-NEXT:    lui a2, 4
+; RV64IM-NEXT:    and a2, s6, a2
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    xor a1, a1, a2
+; RV64IM-NEXT:    sd a1, 288(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, t0, 40
+; RV64IM-NEXT:    and a2, s6, s1
+; RV64IM-NEXT:    and a3, s6, s2
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    mul a3, a0, a3
+; RV64IM-NEXT:    xor a2, a2, a3
+; RV64IM-NEXT:    sd a2, 280(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a2, t0, 41
+; RV64IM-NEXT:    and a3, s6, t6
+; RV64IM-NEXT:    and a4, s6, s5
+; RV64IM-NEXT:    mul a3, a0, a3
+; RV64IM-NEXT:    mul a4, a0, a4
+; RV64IM-NEXT:    xor a3, a3, a4
+; RV64IM-NEXT:    sd a3, 272(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a3, t0, 48
+; RV64IM-NEXT:    and a4, s6, s11
+; RV64IM-NEXT:    and a5, s6, ra
+; RV64IM-NEXT:    mul a4, a0, a4
+; RV64IM-NEXT:    mul a5, a0, a5
+; RV64IM-NEXT:    xor a4, a4, a5
+; RV64IM-NEXT:    sd a4, 264(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a4, t0, 49
+; RV64IM-NEXT:    and a1, s6, a1
+; RV64IM-NEXT:    and a2, s6, a2
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    xor a1, a1, a2
+; RV64IM-NEXT:    sd a1, 256(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, t0, 56
+; RV64IM-NEXT:    and a2, s6, a3
+; RV64IM-NEXT:    and a3, s6, a4
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    mul a3, a0, a3
+; RV64IM-NEXT:    xor a2, a2, a3
+; RV64IM-NEXT:    sd a2, 248(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a2, t0, 57
+; RV64IM-NEXT:    and a1, s6, a1
+; RV64IM-NEXT:    and a2, s6, a2
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    xor a1, a1, a2
+; RV64IM-NEXT:    sd a1, 240(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a2, t0, 42
+; RV64IM-NEXT:    slli ra, t0, 43
+; RV64IM-NEXT:    slli a4, t0, 44
+; RV64IM-NEXT:    slli t6, t0, 45
+; RV64IM-NEXT:    slli s1, t0, 46
+; RV64IM-NEXT:    slli s2, t0, 47
+; RV64IM-NEXT:    slli s4, t0, 50
+; RV64IM-NEXT:    slli s5, t0, 51
+; RV64IM-NEXT:    slli a1, t0, 52
+; RV64IM-NEXT:    sd a1, 232(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, t0, 53
+; RV64IM-NEXT:    sd a1, 224(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, t0, 54
+; RV64IM-NEXT:    sd a1, 216(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, t0, 55
+; RV64IM-NEXT:    sd a1, 208(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, t0, 58
+; RV64IM-NEXT:    sd a1, 200(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, t0, 59
+; RV64IM-NEXT:    sd a1, 176(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, t0, 60
+; RV64IM-NEXT:    sd a1, 136(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, t0, 61
+; RV64IM-NEXT:    sd a1, 104(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli t0, t0, 62
+; RV64IM-NEXT:    sd t0, 80(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a1, s6, t4
+; RV64IM-NEXT:    sd a1, 192(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui a3, 1
+; RV64IM-NEXT:    and a1, s6, a3
+; RV64IM-NEXT:    sd a1, 184(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui a3, 8
+; RV64IM-NEXT:    and a1, s6, a3
+; RV64IM-NEXT:    sd a1, 168(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a1, s6, s10
+; RV64IM-NEXT:    sd a1, 160(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui s9, 32
+; RV64IM-NEXT:    and a1, s6, s9
+; RV64IM-NEXT:    sd a1, 152(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui s11, 64
+; RV64IM-NEXT:    and a1, s6, s11
+; RV64IM-NEXT:    sd a1, 144(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui s10, 512
+; RV64IM-NEXT:    and a1, s6, s10
+; RV64IM-NEXT:    sd a1, 128(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui s8, 1024
+; RV64IM-NEXT:    and a1, s6, s8
+; RV64IM-NEXT:    sd a1, 120(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui s7, 2048
+; RV64IM-NEXT:    and a1, s6, s7
+; RV64IM-NEXT:    sd a1, 112(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui t1, 16384
+; RV64IM-NEXT:    and a1, s6, t1
+; RV64IM-NEXT:    sd a1, 96(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui t2, 32768
+; RV64IM-NEXT:    and t2, s6, t2
+; RV64IM-NEXT:    lui t3, 65536
+; RV64IM-NEXT:    and a1, s6, t3
+; RV64IM-NEXT:    sd a1, 88(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui t4, 131072
+; RV64IM-NEXT:    and a5, s6, t4
+; RV64IM-NEXT:    lui t5, 262144
+; RV64IM-NEXT:    and t0, s6, t5
+; RV64IM-NEXT:    and s11, s6, s0
+; RV64IM-NEXT:    ld a1, 304(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a1, s6, a1
+; RV64IM-NEXT:    sd a1, 304(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 296(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a1, s6, a1
+; RV64IM-NEXT:    sd a1, 72(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a1, s6, a7
+; RV64IM-NEXT:    sd a1, 64(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a1, s6, a6
+; RV64IM-NEXT:    sd a1, 56(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a1, s6, s3
+; RV64IM-NEXT:    sd a1, 48(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a1, s6, a2
+; RV64IM-NEXT:    sd a1, 40(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and ra, s6, ra
+; RV64IM-NEXT:    and a1, s6, a4
+; RV64IM-NEXT:    sd a1, 32(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a1, s6, t6
+; RV64IM-NEXT:    sd a1, 24(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a1, s6, s1
+; RV64IM-NEXT:    sd a1, 16(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a1, s6, s2
+; RV64IM-NEXT:    sd a1, 8(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a1, s6, s4
+; RV64IM-NEXT:    sd a1, 0(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and s0, s6, s5
+; RV64IM-NEXT:    ld a1, 232(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s1, s6, a1
+; RV64IM-NEXT:    ld a1, 224(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s2, s6, a1
+; RV64IM-NEXT:    ld a1, 216(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s3, s6, a1
+; RV64IM-NEXT:    ld a1, 208(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s4, s6, a1
+; RV64IM-NEXT:    ld a1, 200(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s5, s6, a1
+; RV64IM-NEXT:    ld a1, 176(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s7, s6, a1
+; RV64IM-NEXT:    ld a1, 136(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s8, s6, a1
+; RV64IM-NEXT:    ld a1, 104(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s9, s6, a1
+; RV64IM-NEXT:    ld a1, 80(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s10, s6, a1
+; RV64IM-NEXT:    andi a1, s6, 64
+; RV64IM-NEXT:    andi a2, s6, 128
+; RV64IM-NEXT:    andi a3, s6, 1024
+; RV64IM-NEXT:    srliw a4, s6, 31
+; RV64IM-NEXT:    srli s6, s6, 63
+; RV64IM-NEXT:    mul t4, a0, a1
+; RV64IM-NEXT:    mul a1, a0, a2
+; RV64IM-NEXT:    sd a1, 216(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul t3, a0, a3
+; RV64IM-NEXT:    ld a1, 192(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 136(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 184(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 208(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 168(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul t1, a0, a1
+; RV64IM-NEXT:    ld a1, 160(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 104(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 152(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 184(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 144(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 296(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 128(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a7, a0, a1
+; RV64IM-NEXT:    ld a1, 120(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul t6, a0, a1
+; RV64IM-NEXT:    ld a1, 112(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 168(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 96(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a6, a0, a1
+; RV64IM-NEXT:    mul t5, a0, t2
+; RV64IM-NEXT:    ld a1, 88(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 160(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul a1, a0, a5
+; RV64IM-NEXT:    sd a1, 176(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul a1, a0, t0
+; RV64IM-NEXT:    sd a1, 232(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a4, a4, 31
+; RV64IM-NEXT:    mul a3, a0, s11
+; RV64IM-NEXT:    ld a1, 304(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul t0, a0, a1
+; RV64IM-NEXT:    ld a1, 72(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 144(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 64(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 152(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 56(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 200(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 48(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 224(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 40(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a2, a0, a1
+; RV64IM-NEXT:    mul a5, a0, ra
+; RV64IM-NEXT:    ld a1, 32(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul t2, a0, a1
+; RV64IM-NEXT:    ld a1, 24(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul s11, a0, a1
+; RV64IM-NEXT:    ld a1, 16(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul ra, a0, a1
+; RV64IM-NEXT:    ld a1, 8(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 192(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 0(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    mul s0, a0, s0
+; RV64IM-NEXT:    mul s1, a0, s1
+; RV64IM-NEXT:    mul s2, a0, s2
+; RV64IM-NEXT:    mul s3, a0, s3
+; RV64IM-NEXT:    mul s4, a0, s4
+; RV64IM-NEXT:    mul s5, a0, s5
+; RV64IM-NEXT:    mul s7, a0, s7
+; RV64IM-NEXT:    mul s8, a0, s8
+; RV64IM-NEXT:    mul s9, a0, s9
+; RV64IM-NEXT:    mul s10, a0, s10
+; RV64IM-NEXT:    slli s6, s6, 63
+; RV64IM-NEXT:    mul a4, a0, a4
+; RV64IM-NEXT:    mul a0, a0, s6
+; RV64IM-NEXT:    sd a0, 304(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld s6, 336(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld a0, 328(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor s6, s6, a0
+; RV64IM-NEXT:    ld a0, 320(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t4, a0, t4
+; RV64IM-NEXT:    ld a0, 312(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t3, a0, t3
+; RV64IM-NEXT:    ld a0, 288(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t1, a0, t1
+; RV64IM-NEXT:    ld a0, 280(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a7, a0, a7
+; RV64IM-NEXT:    ld a0, 272(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a6, a0, a6
+; RV64IM-NEXT:    ld a0, 264(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a3, a0, a3
+; RV64IM-NEXT:    ld a0, 256(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a2, a0, a2
+; RV64IM-NEXT:    ld a0, 248(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a1, a0, a1
+; RV64IM-NEXT:    ld a0, 240(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor s5, a0, s5
+; RV64IM-NEXT:    xor t4, s6, t4
+; RV64IM-NEXT:    ld a0, 136(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t3, t3, a0
+; RV64IM-NEXT:    ld a0, 104(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t1, t1, a0
+; RV64IM-NEXT:    xor a7, a7, t6
+; RV64IM-NEXT:    xor a6, a6, t5
+; RV64IM-NEXT:    xor a3, a3, t0
+; RV64IM-NEXT:    xor a2, a2, a5
+; RV64IM-NEXT:    xor a1, a1, s0
+; RV64IM-NEXT:    xor a5, s5, s7
+; RV64IM-NEXT:    ld a0, 216(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t0, t4, a0
+; RV64IM-NEXT:    ld a0, 208(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t3, t3, a0
+; RV64IM-NEXT:    ld a0, 184(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t1, t1, a0
+; RV64IM-NEXT:    ld a0, 168(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a7, a7, a0
+; RV64IM-NEXT:    ld a0, 160(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a6, a6, a0
+; RV64IM-NEXT:    ld a0, 144(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a3, a3, a0
+; RV64IM-NEXT:    xor a2, a2, t2
+; RV64IM-NEXT:    xor a1, a1, s1
+; RV64IM-NEXT:    xor a5, a5, s8
+; RV64IM-NEXT:    ld a0, 296(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t1, t1, a0
+; RV64IM-NEXT:    ld a0, 176(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a6, a6, a0
+; RV64IM-NEXT:    ld a0, 152(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a3, a3, a0
+; RV64IM-NEXT:    xor a2, a2, s11
+; RV64IM-NEXT:    xor a1, a1, s2
+; RV64IM-NEXT:    xor a5, a5, s9
+; RV64IM-NEXT:    xor t2, t0, t3
+; RV64IM-NEXT:    xor t1, t2, t1
+; RV64IM-NEXT:    ld a0, 232(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a6, a6, a0
+; RV64IM-NEXT:    ld a0, 200(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a3, a3, a0
+; RV64IM-NEXT:    xor a2, a2, ra
+; RV64IM-NEXT:    xor a1, a1, s3
+; RV64IM-NEXT:    xor a5, a5, s10
+; RV64IM-NEXT:    xor a7, t1, a7
+; RV64IM-NEXT:    xor a4, a6, a4
+; RV64IM-NEXT:    ld a0, 224(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a3, a3, a0
+; RV64IM-NEXT:    ld a0, 192(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a2, a2, a0
+; RV64IM-NEXT:    xor a1, a1, s4
+; RV64IM-NEXT:    lui a6, 5
+; RV64IM-NEXT:    addi a6, a6, 1365
+; RV64IM-NEXT:    slli t0, t0, 56
+; RV64IM-NEXT:    ld a0, 304(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a0, a5, a0
+; RV64IM-NEXT:    ld t1, 344(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a5, a7, t1
+; RV64IM-NEXT:    xor a4, a7, a4
+; RV64IM-NEXT:    slli a5, a5, 40
+; RV64IM-NEXT:    xor a3, a4, a3
+; RV64IM-NEXT:    or a4, t0, a5
+; RV64IM-NEXT:    lui t0, 4080
+; RV64IM-NEXT:    and a5, a3, t0
+; RV64IM-NEXT:    xor a2, a3, a2
+; RV64IM-NEXT:    srli a3, a3, 8
+; RV64IM-NEXT:    slli a5, a5, 24
+; RV64IM-NEXT:    xor a1, a2, a1
+; RV64IM-NEXT:    ld a7, 368(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a3, a3, a7
+; RV64IM-NEXT:    srli a2, a2, 24
+; RV64IM-NEXT:    srliw a7, a1, 24
+; RV64IM-NEXT:    and a2, a2, t0
+; RV64IM-NEXT:    srli t0, a1, 40
+; RV64IM-NEXT:    xor a0, a1, a0
+; RV64IM-NEXT:    slli a7, a7, 32
+; RV64IM-NEXT:    or a2, a3, a2
+; RV64IM-NEXT:    and a1, t0, t1
+; RV64IM-NEXT:    srli a0, a0, 56
+; RV64IM-NEXT:    or a3, a5, a7
+; RV64IM-NEXT:    or a0, a1, a0
+; RV64IM-NEXT:    or a3, a4, a3
+; RV64IM-NEXT:    or a0, a2, a0
+; RV64IM-NEXT:    or a0, a3, a0
+; RV64IM-NEXT:    srli a1, a0, 4
+; RV64IM-NEXT:    ld a2, 360(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a0, a0, a2
+; RV64IM-NEXT:    and a1, a1, a2
+; RV64IM-NEXT:    slli a0, a0, 4
+; RV64IM-NEXT:    or a0, a1, a0
+; RV64IM-NEXT:    srli a1, a0, 2
+; RV64IM-NEXT:    ld a2, 352(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a0, a0, a2
+; RV64IM-NEXT:    and a1, a1, a2
+; RV64IM-NEXT:    slli a0, a0, 2
+; RV64IM-NEXT:    or a0, a1, a0
+; RV64IM-NEXT:    srli a1, a0, 1
+; RV64IM-NEXT:    and a0, a0, a6
+; RV64IM-NEXT:    and a1, a1, a6
+; RV64IM-NEXT:    slli a0, a0, 1
+; RV64IM-NEXT:    or a0, a1, a0
+; RV64IM-NEXT:    ld ra, 472(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s0, 464(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s1, 456(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s2, 448(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s3, 440(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s4, 432(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s5, 424(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s6, 416(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s7, 408(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s8, 400(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s9, 392(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s10, 384(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s11, 376(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    addi sp, sp, 480
+; RV64IM-NEXT:    ret
+  %a.ext = zext i16 %a to i32
+  %b.ext = zext i16 %b to i32
+  %clmul = call i32 @llvm.clmul.i32(i32 %a.ext, i32 %b.ext)
+  %res.ext = lshr i32 %clmul, 15
+  %res = trunc i32 %res.ext to i16
+  ret i16 %res
+}
+
+define i32 @clmulr_i32(i32 %a, i32 %b) nounwind {
+; RV32IM-LABEL: clmulr_i32:
+; RV32IM:       # %bb.0:
+; RV32IM-NEXT:    addi sp, sp, -144
+; RV32IM-NEXT:    sw ra, 140(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s0, 136(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s1, 132(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s2, 128(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s3, 124(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s4, 120(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s5, 116(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s6, 112(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s7, 108(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s8, 104(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s9, 100(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s10, 96(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s11, 92(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    srli t0, a0, 8
+; RV32IM-NEXT:    lui a3, 16
+; RV32IM-NEXT:    srli t1, a0, 24
+; RV32IM-NEXT:    slli a2, a0, 24
+; RV32IM-NEXT:    lui t3, 61681
+; RV32IM-NEXT:    lui t5, 209715
+; RV32IM-NEXT:    lui t6, 349525
+; RV32IM-NEXT:    srli t4, a1, 8
+; RV32IM-NEXT:    srli a4, a1, 24
+; RV32IM-NEXT:    slli a5, a1, 24
+; RV32IM-NEXT:    li s7, 1
+; RV32IM-NEXT:    lui t2, 4
+; RV32IM-NEXT:    lui s0, 8
+; RV32IM-NEXT:    lui s1, 32
+; RV32IM-NEXT:    lui s2, 64
+; RV32IM-NEXT:    lui s3, 128
+; RV32IM-NEXT:    lui s4, 256
+; RV32IM-NEXT:    lui s8, 512
+; RV32IM-NEXT:    lui a7, 1024
+; RV32IM-NEXT:    lui s9, 2048
+; RV32IM-NEXT:    lui s10, 4096
+; RV32IM-NEXT:    lui s11, 8192
+; RV32IM-NEXT:    lui ra, 16384
+; RV32IM-NEXT:    addi s5, a3, -256
+; RV32IM-NEXT:    and t0, t0, s5
+; RV32IM-NEXT:    or t1, t0, t1
+; RV32IM-NEXT:    lui a6, 32768
+; RV32IM-NEXT:    and t4, t4, s5
+; RV32IM-NEXT:    or a4, t4, a4
+; RV32IM-NEXT:    lui t0, 65536
+; RV32IM-NEXT:    and a0, a0, s5
+; RV32IM-NEXT:    slli a0, a0, 8
+; RV32IM-NEXT:    or a0, a2, a0
+; RV32IM-NEXT:    lui a2, 131072
+; RV32IM-NEXT:    and a1, a1, s5
+; RV32IM-NEXT:    slli a1, a1, 8
+; RV32IM-NEXT:    or t4, a5, a1
+; RV32IM-NEXT:    lui a1, 262144
+; RV32IM-NEXT:    or a0, a0, t1
+; RV32IM-NEXT:    lui a5, 524288
+; RV32IM-NEXT:    addi t3, t3, -241
+; RV32IM-NEXT:    addi t5, t5, 819
+; RV32IM-NEXT:    addi t6, t6, 1365
+; RV32IM-NEXT:    slli s7, s7, 11
+; RV32IM-NEXT:    or a4, t4, a4
+; RV32IM-NEXT:    srli t4, a0, 4
+; RV32IM-NEXT:    and a0, a0, t3
+; RV32IM-NEXT:    and t4, t4, t3
+; RV32IM-NEXT:    slli a0, a0, 4
+; RV32IM-NEXT:    or a0, t4, a0
+; RV32IM-NEXT:    srli t4, a4, 4
+; RV32IM-NEXT:    and a4, a4, t3
+; RV32IM-NEXT:    and t4, t4, t3
+; RV32IM-NEXT:    slli a4, a4, 4
+; RV32IM-NEXT:    or a4, t4, a4
+; RV32IM-NEXT:    srli t4, a0, 2
+; RV32IM-NEXT:    and a0, a0, t5
+; RV32IM-NEXT:    and t4, t4, t5
+; RV32IM-NEXT:    slli a0, a0, 2
+; RV32IM-NEXT:    or a0, t4, a0
+; RV32IM-NEXT:    srli t4, a4, 2
+; RV32IM-NEXT:    and a4, a4, t5
+; RV32IM-NEXT:    and t4, t4, t5
+; RV32IM-NEXT:    slli a4, a4, 2
+; RV32IM-NEXT:    or t4, t4, a4
+; RV32IM-NEXT:    srli a4, a0, 1
+; RV32IM-NEXT:    and a0, a0, t6
+; RV32IM-NEXT:    and a4, a4, t6
+; RV32IM-NEXT:    slli a0, a0, 1
+; RV32IM-NEXT:    or a4, a4, a0
+; RV32IM-NEXT:    srli a0, t4, 1
+; RV32IM-NEXT:    and t4, t4, t6
+; RV32IM-NEXT:    and a0, a0, t6
+; RV32IM-NEXT:    slli t4, t4, 1
+; RV32IM-NEXT:    or a0, a0, t4
+; RV32IM-NEXT:    andi t4, a0, 2
+; RV32IM-NEXT:    and s6, a0, s7
+; RV32IM-NEXT:    lui t1, 1
+; RV32IM-NEXT:    and t1, a0, t1
+; RV32IM-NEXT:    sw t1, 84(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lui t1, 2
+; RV32IM-NEXT:    and t1, a0, t1
+; RV32IM-NEXT:    sw t1, 80(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and t1, a0, t2
+; RV32IM-NEXT:    sw t1, 76(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and s0, a0, s0
+; RV32IM-NEXT:    and a3, a0, a3
+; RV32IM-NEXT:    sw a3, 72(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and s1, a0, s1
+; RV32IM-NEXT:    sw s1, 68(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a3, a0, s2
+; RV32IM-NEXT:    sw a3, 64(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and s3, a0, s3
+; RV32IM-NEXT:    and a3, a0, s4
+; RV32IM-NEXT:    sw a3, 60(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a3, a0, s8
+; RV32IM-NEXT:    sw a3, 56(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a3, a0, a7
+; RV32IM-NEXT:    sw a3, 52(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and s9, a0, s9
+; RV32IM-NEXT:    and a3, a0, s10
+; RV32IM-NEXT:    sw a3, 48(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a3, a0, s11
+; RV32IM-NEXT:    sw a3, 44(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a3, a0, ra
+; RV32IM-NEXT:    sw a3, 40(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a3, a0, a6
+; RV32IM-NEXT:    sw a3, 36(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a3, a0, t0
+; RV32IM-NEXT:    sw a3, 32(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a2, a0, a2
+; RV32IM-NEXT:    sw a2, 28(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a1, a0, a1
+; RV32IM-NEXT:    sw a1, 24(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a5, a0, a5
+; RV32IM-NEXT:    sw a5, 20(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    andi a1, a0, 1
+; RV32IM-NEXT:    andi a2, a0, 4
+; RV32IM-NEXT:    andi a3, a0, 8
+; RV32IM-NEXT:    andi a5, a0, 16
+; RV32IM-NEXT:    andi a6, a0, 32
+; RV32IM-NEXT:    andi a7, a0, 64
+; RV32IM-NEXT:    andi t0, a0, 128
+; RV32IM-NEXT:    andi t1, a0, 256
+; RV32IM-NEXT:    andi t2, a0, 512
+; RV32IM-NEXT:    andi a0, a0, 1024
+; RV32IM-NEXT:    mul t4, a4, t4
+; RV32IM-NEXT:    sw t4, 8(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul ra, a4, a1
+; RV32IM-NEXT:    mul s11, a4, a2
+; RV32IM-NEXT:    mul s8, a4, a3
+; RV32IM-NEXT:    mul s7, a4, a5
+; RV32IM-NEXT:    mul s4, a4, a6
+; RV32IM-NEXT:    mul a1, a4, a7
+; RV32IM-NEXT:    sw a1, 12(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul a1, a4, t0
+; RV32IM-NEXT:    sw a1, 88(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul s2, a4, t1
+; RV32IM-NEXT:    mul t2, a4, t2
+; RV32IM-NEXT:    mul a0, a4, a0
+; RV32IM-NEXT:    sw a0, 4(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul a0, a4, s6
+; RV32IM-NEXT:    sw a0, 16(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw a0, 84(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a0, a4, a0
+; RV32IM-NEXT:    sw a0, 84(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw a0, 80(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul t1, a4, a0
+; RV32IM-NEXT:    lw a0, 76(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a7, a4, a0
+; RV32IM-NEXT:    mul s1, a4, s0
+; RV32IM-NEXT:    lw a0, 72(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a0, a4, a0
+; RV32IM-NEXT:    sw a0, 72(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw a0, 68(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a0, a4, a0
+; RV32IM-NEXT:    sw a0, 76(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw a0, 64(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a0, a4, a0
+; RV32IM-NEXT:    sw a0, 80(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul a3, a4, s3
+; RV32IM-NEXT:    lw a0, 60(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a2, a4, a0
+; RV32IM-NEXT:    lw a0, 56(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a6, a4, a0
+; RV32IM-NEXT:    lw a0, 52(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul t4, a4, a0
+; RV32IM-NEXT:    mul s6, a4, s9
+; RV32IM-NEXT:    lw a0, 48(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a1, a4, a0
+; RV32IM-NEXT:    lw a0, 44(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a0, a4, a0
+; RV32IM-NEXT:    lw a5, 40(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a5, a4, a5
+; RV32IM-NEXT:    lw t0, 36(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul t0, a4, t0
+; RV32IM-NEXT:    lw s0, 32(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul s0, a4, s0
+; RV32IM-NEXT:    lw s3, 28(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul s3, a4, s3
+; RV32IM-NEXT:    lw s9, 24(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul s9, a4, s9
+; RV32IM-NEXT:    lw s10, 20(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a4, a4, s10
+; RV32IM-NEXT:    lw s10, 8(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor ra, ra, s10
+; RV32IM-NEXT:    xor s8, s11, s8
+; RV32IM-NEXT:    xor s4, s7, s4
+; RV32IM-NEXT:    xor t2, s2, t2
+; RV32IM-NEXT:    xor a7, t1, a7
+; RV32IM-NEXT:    xor a2, a3, a2
+; RV32IM-NEXT:    xor a0, a1, a0
+; RV32IM-NEXT:    xor a1, ra, s8
+; RV32IM-NEXT:    lw a3, 12(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a3, s4, a3
+; RV32IM-NEXT:    lw t1, 4(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor t1, t2, t1
+; RV32IM-NEXT:    xor a7, a7, s1
+; RV32IM-NEXT:    xor a2, a2, a6
+; RV32IM-NEXT:    xor a0, a0, a5
+; RV32IM-NEXT:    xor a1, a1, a3
+; RV32IM-NEXT:    lw a3, 16(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a3, t1, a3
+; RV32IM-NEXT:    lw a5, 72(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a5, a7, a5
+; RV32IM-NEXT:    xor a2, a2, t4
+; RV32IM-NEXT:    xor a0, a0, t0
+; RV32IM-NEXT:    lw a6, 88(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a1, a1, a6
+; RV32IM-NEXT:    lw a6, 84(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a3, a3, a6
+; RV32IM-NEXT:    lw a6, 76(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a5, a5, a6
+; RV32IM-NEXT:    xor a2, a2, s6
+; RV32IM-NEXT:    xor a0, a0, s0
+; RV32IM-NEXT:    lw a6, 80(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a5, a5, a6
+; RV32IM-NEXT:    xor a0, a0, s3
+; RV32IM-NEXT:    xor a3, a1, a3
+; RV32IM-NEXT:    slli a1, a1, 24
+; RV32IM-NEXT:    xor a3, a3, a5
+; RV32IM-NEXT:    xor a0, a0, s9
+; RV32IM-NEXT:    xor a2, a3, a2
+; RV32IM-NEXT:    xor a0, a0, a4
+; RV32IM-NEXT:    and a3, a2, s5
+; RV32IM-NEXT:    srli a4, a2, 8
+; RV32IM-NEXT:    xor a0, a2, a0
+; RV32IM-NEXT:    slli a3, a3, 8
+; RV32IM-NEXT:    and a2, a4, s5
+; RV32IM-NEXT:    srli a0, a0, 24
+; RV32IM-NEXT:    or a1, a1, a3
+; RV32IM-NEXT:    or a0, a2, a0
+; RV32IM-NEXT:    or a0, a1, a0
+; RV32IM-NEXT:    srli a1, a0, 4
+; RV32IM-NEXT:    and a0, a0, t3
+; RV32IM-NEXT:    and a1, a1, t3
+; RV32IM-NEXT:    slli a0, a0, 4
+; RV32IM-NEXT:    or a0, a1, a0
+; RV32IM-NEXT:    srli a1, a0, 2
+; RV32IM-NEXT:    and a0, a0, t5
+; RV32IM-NEXT:    and a1, a1, t5
+; RV32IM-NEXT:    slli a0, a0, 2
+; RV32IM-NEXT:    or a0, a1, a0
+; RV32IM-NEXT:    srli a1, a0, 1
+; RV32IM-NEXT:    and a0, a0, t6
+; RV32IM-NEXT:    and a1, a1, t6
+; RV32IM-NEXT:    slli a0, a0, 1
+; RV32IM-NEXT:    or a0, a1, a0
+; RV32IM-NEXT:    lw ra, 140(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s0, 136(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s1, 132(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s2, 128(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s3, 124(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s4, 120(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s5, 116(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s6, 112(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s7, 108(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s8, 104(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s9, 100(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s10, 96(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s11, 92(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    addi sp, sp, 144
+; RV32IM-NEXT:    ret
+;
+; RV64IM-LABEL: clmulr_i32:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    addi sp, sp, -512
+; RV64IM-NEXT:    sd ra, 504(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s0, 496(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s1, 488(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s2, 480(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s3, 472(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s4, 464(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s5, 456(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s6, 448(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s7, 440(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s8, 432(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s9, 424(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s10, 416(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s11, 408(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    srli a3, a0, 24
+; RV64IM-NEXT:    srli a7, a0, 8
+; RV64IM-NEXT:    li s1, 255
+; RV64IM-NEXT:    srli a6, a0, 40
+; RV64IM-NEXT:    lui a4, 16
+; RV64IM-NEXT:    srli t2, a0, 56
+; RV64IM-NEXT:    srliw t3, a0, 24
+; RV64IM-NEXT:    slli a2, a0, 56
+; RV64IM-NEXT:    lui t4, 61681
+; RV64IM-NEXT:    lui t6, 209715
+; RV64IM-NEXT:    lui s9, 349525
+; RV64IM-NEXT:    srli s7, a1, 24
+; RV64IM-NEXT:    srli s5, a1, 8
+; RV64IM-NEXT:    srli t5, a1, 40
+; RV64IM-NEXT:    srli t0, a1, 56
+; RV64IM-NEXT:    srliw ra, a1, 24
+; RV64IM-NEXT:    slli a5, a1, 56
+; RV64IM-NEXT:    li t1, 1
+; RV64IM-NEXT:    lui s10, 128
+; RV64IM-NEXT:    lui s2, 256
+; RV64IM-NEXT:    lui s3, 4096
+; RV64IM-NEXT:    lui s0, 8192
+; RV64IM-NEXT:    lui s8, 4080
+; RV64IM-NEXT:    and a3, a3, s8
+; RV64IM-NEXT:    slli s1, s1, 24
+; RV64IM-NEXT:    addi s11, a4, -256
+; RV64IM-NEXT:    and a7, a7, s1
+; RV64IM-NEXT:    sd s1, 400(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    or a3, a7, a3
+; RV64IM-NEXT:    and a7, a0, s8
+; RV64IM-NEXT:    slli t3, t3, 32
+; RV64IM-NEXT:    addi s4, t4, -241
+; RV64IM-NEXT:    addi s6, t6, 819
+; RV64IM-NEXT:    addi a4, s9, 1365
+; RV64IM-NEXT:    and t4, s7, s8
+; RV64IM-NEXT:    and a6, a6, s11
+; RV64IM-NEXT:    or a6, a6, t2
+; RV64IM-NEXT:    and t2, a1, s8
+; RV64IM-NEXT:    slli t6, ra, 32
+; RV64IM-NEXT:    slli a7, a7, 24
+; RV64IM-NEXT:    or s9, a7, t3
+; RV64IM-NEXT:    slli a7, s4, 32
+; RV64IM-NEXT:    add s4, s4, a7
+; RV64IM-NEXT:    slli a7, s6, 32
+; RV64IM-NEXT:    add s6, s6, a7
+; RV64IM-NEXT:    slli s7, t1, 11
+; RV64IM-NEXT:    and a7, s5, s1
+; RV64IM-NEXT:    or a7, a7, t4
+; RV64IM-NEXT:    slli t4, t1, 32
+; RV64IM-NEXT:    and t3, t5, s11
+; RV64IM-NEXT:    or t0, t3, t0
+; RV64IM-NEXT:    slli ra, t1, 33
+; RV64IM-NEXT:    slli t2, t2, 24
+; RV64IM-NEXT:    or t2, t2, t6
+; RV64IM-NEXT:    slli s1, t1, 34
+; RV64IM-NEXT:    or a3, a3, a6
+; RV64IM-NEXT:    slli a6, t1, 35
+; RV64IM-NEXT:    sd a6, 328(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s11, 368(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a0, a0, s11
+; RV64IM-NEXT:    slli a0, a0, 40
+; RV64IM-NEXT:    or a0, a2, a0
+; RV64IM-NEXT:    slli a2, t1, 36
+; RV64IM-NEXT:    sd a2, 312(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    or a2, a7, t0
+; RV64IM-NEXT:    slli a7, t1, 37
+; RV64IM-NEXT:    and a1, a1, s11
+; RV64IM-NEXT:    slli a1, a1, 40
+; RV64IM-NEXT:    or a1, a5, a1
+; RV64IM-NEXT:    sd a4, 392(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a5, a4, 32
+; RV64IM-NEXT:    add a5, a4, a5
+; RV64IM-NEXT:    or a0, a0, s9
+; RV64IM-NEXT:    or a1, a1, t2
+; RV64IM-NEXT:    or a0, a0, a3
+; RV64IM-NEXT:    or a1, a1, a2
+; RV64IM-NEXT:    srli a2, a0, 4
+; RV64IM-NEXT:    sd s4, 384(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a0, a0, s4
+; RV64IM-NEXT:    srli a3, a1, 4
+; RV64IM-NEXT:    and a1, a1, s4
+; RV64IM-NEXT:    and a2, a2, s4
+; RV64IM-NEXT:    slli a0, a0, 4
+; RV64IM-NEXT:    and a3, a3, s4
+; RV64IM-NEXT:    slli a1, a1, 4
+; RV64IM-NEXT:    or a0, a2, a0
+; RV64IM-NEXT:    or a1, a3, a1
+; RV64IM-NEXT:    srli a2, a0, 2
+; RV64IM-NEXT:    sd s6, 376(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a0, a0, s6
+; RV64IM-NEXT:    srli a3, a1, 2
+; RV64IM-NEXT:    and a1, a1, s6
+; RV64IM-NEXT:    and a2, a2, s6
+; RV64IM-NEXT:    slli a0, a0, 2
+; RV64IM-NEXT:    and a3, a3, s6
+; RV64IM-NEXT:    slli a1, a1, 2
+; RV64IM-NEXT:    or a0, a2, a0
+; RV64IM-NEXT:    or a1, a3, a1
+; RV64IM-NEXT:    srli a2, a0, 1
+; RV64IM-NEXT:    and a0, a0, a5
+; RV64IM-NEXT:    srli a3, a1, 1
+; RV64IM-NEXT:    and a1, a1, a5
+; RV64IM-NEXT:    and a2, a2, a5
+; RV64IM-NEXT:    slli a0, a0, 1
+; RV64IM-NEXT:    and a3, a3, a5
+; RV64IM-NEXT:    slli a1, a1, 1
+; RV64IM-NEXT:    or a0, a2, a0
+; RV64IM-NEXT:    or s5, a3, a1
+; RV64IM-NEXT:    andi a1, s5, 2
+; RV64IM-NEXT:    andi a2, s5, 1
+; RV64IM-NEXT:    andi a3, s5, 4
+; RV64IM-NEXT:    andi a5, s5, 8
+; RV64IM-NEXT:    andi a6, s5, 16
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    xor a1, a2, a1
+; RV64IM-NEXT:    sd a1, 360(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    andi a1, s5, 32
+; RV64IM-NEXT:    mul a2, a0, a3
+; RV64IM-NEXT:    mul a3, a0, a5
+; RV64IM-NEXT:    xor a2, a2, a3
+; RV64IM-NEXT:    sd a2, 352(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    andi a2, s5, 256
+; RV64IM-NEXT:    mul a3, a0, a6
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    xor a1, a3, a1
+; RV64IM-NEXT:    sd a1, 344(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    andi a1, s5, 512
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    xor a1, a2, a1
+; RV64IM-NEXT:    sd a1, 336(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli s4, t1, 38
+; RV64IM-NEXT:    lui a1, 2
+; RV64IM-NEXT:    and a1, s5, a1
+; RV64IM-NEXT:    lui a2, 4
+; RV64IM-NEXT:    and a2, s5, a2
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    xor a1, a1, a2
+; RV64IM-NEXT:    sd a1, 320(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, t1, 40
+; RV64IM-NEXT:    and a2, s5, s10
+; RV64IM-NEXT:    and a3, s5, s2
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    mul a3, a0, a3
+; RV64IM-NEXT:    xor a2, a2, a3
+; RV64IM-NEXT:    sd a2, 304(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a2, t1, 41
+; RV64IM-NEXT:    and a3, s5, s3
+; RV64IM-NEXT:    and a4, s5, s0
+; RV64IM-NEXT:    mul a3, a0, a3
+; RV64IM-NEXT:    mul a4, a0, a4
+; RV64IM-NEXT:    xor a3, a3, a4
+; RV64IM-NEXT:    sd a3, 296(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a3, t1, 48
+; RV64IM-NEXT:    and a4, s5, t4
+; RV64IM-NEXT:    and a5, s5, ra
+; RV64IM-NEXT:    mul a4, a0, a4
+; RV64IM-NEXT:    mul a5, a0, a5
+; RV64IM-NEXT:    xor a4, a4, a5
+; RV64IM-NEXT:    sd a4, 288(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a4, t1, 49
+; RV64IM-NEXT:    and a1, s5, a1
+; RV64IM-NEXT:    and a2, s5, a2
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    xor a1, a1, a2
+; RV64IM-NEXT:    sd a1, 280(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, t1, 56
+; RV64IM-NEXT:    and a2, s5, a3
+; RV64IM-NEXT:    and a3, s5, a4
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    mul a3, a0, a3
+; RV64IM-NEXT:    xor a2, a2, a3
+; RV64IM-NEXT:    sd a2, 272(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a2, t1, 57
+; RV64IM-NEXT:    and a1, s5, a1
+; RV64IM-NEXT:    and a2, s5, a2
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    xor a1, a1, a2
+; RV64IM-NEXT:    sd a1, 264(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli ra, t1, 39
+; RV64IM-NEXT:    slli a2, t1, 42
+; RV64IM-NEXT:    slli a4, t1, 43
+; RV64IM-NEXT:    slli s0, t1, 44
+; RV64IM-NEXT:    slli s2, t1, 45
+; RV64IM-NEXT:    slli s3, t1, 46
+; RV64IM-NEXT:    slli s6, t1, 47
+; RV64IM-NEXT:    slli a1, t1, 50
+; RV64IM-NEXT:    sd a1, 256(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, t1, 51
+; RV64IM-NEXT:    sd a1, 248(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, t1, 52
+; RV64IM-NEXT:    sd a1, 240(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, t1, 53
+; RV64IM-NEXT:    sd a1, 232(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, t1, 54
+; RV64IM-NEXT:    sd a1, 224(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, t1, 55
+; RV64IM-NEXT:    sd a1, 216(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, t1, 58
+; RV64IM-NEXT:    sd a1, 208(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, t1, 59
+; RV64IM-NEXT:    sd a1, 192(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, t1, 60
+; RV64IM-NEXT:    sd a1, 152(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, t1, 61
+; RV64IM-NEXT:    sd a1, 128(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli t1, t1, 62
+; RV64IM-NEXT:    sd t1, 104(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and t1, s5, s7
+; RV64IM-NEXT:    lui a3, 1
+; RV64IM-NEXT:    and a1, s5, a3
+; RV64IM-NEXT:    sd a1, 200(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui a3, 8
+; RV64IM-NEXT:    and a1, s5, a3
+; RV64IM-NEXT:    sd a1, 184(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui a1, 16
+; RV64IM-NEXT:    and a1, s5, a1
+; RV64IM-NEXT:    sd a1, 176(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui s9, 32
+; RV64IM-NEXT:    and a1, s5, s9
+; RV64IM-NEXT:    sd a1, 168(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui s11, 64
+; RV64IM-NEXT:    and a1, s5, s11
+; RV64IM-NEXT:    sd a1, 160(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui s10, 512
+; RV64IM-NEXT:    and a1, s5, s10
+; RV64IM-NEXT:    sd a1, 144(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui s8, 1024
+; RV64IM-NEXT:    and a1, s5, s8
+; RV64IM-NEXT:    sd a1, 136(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui t0, 2048
+; RV64IM-NEXT:    and t0, s5, t0
+; RV64IM-NEXT:    lui t2, 16384
+; RV64IM-NEXT:    and t2, s5, t2
+; RV64IM-NEXT:    lui t3, 32768
+; RV64IM-NEXT:    and a1, s5, t3
+; RV64IM-NEXT:    sd a1, 120(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui t4, 65536
+; RV64IM-NEXT:    and a1, s5, t4
+; RV64IM-NEXT:    sd a1, 112(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui t5, 131072
+; RV64IM-NEXT:    and a5, s5, t5
+; RV64IM-NEXT:    lui t6, 262144
+; RV64IM-NEXT:    and a6, s5, t6
+; RV64IM-NEXT:    and s11, s5, s1
+; RV64IM-NEXT:    ld a1, 328(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and t5, s5, a1
+; RV64IM-NEXT:    ld a1, 312(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and t6, s5, a1
+; RV64IM-NEXT:    and a1, s5, a7
+; RV64IM-NEXT:    sd a1, 96(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a1, s5, s4
+; RV64IM-NEXT:    sd a1, 88(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and ra, s5, ra
+; RV64IM-NEXT:    and a1, s5, a2
+; RV64IM-NEXT:    sd a1, 80(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a1, s5, a4
+; RV64IM-NEXT:    sd a1, 72(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a1, s5, s0
+; RV64IM-NEXT:    sd a1, 64(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a1, s5, s2
+; RV64IM-NEXT:    sd a1, 56(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a1, s5, s3
+; RV64IM-NEXT:    sd a1, 48(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a1, s5, s6
+; RV64IM-NEXT:    sd a1, 40(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 256(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a1, s5, a1
+; RV64IM-NEXT:    sd a1, 32(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 248(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a1, s5, a1
+; RV64IM-NEXT:    sd a1, 24(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 240(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a1, s5, a1
+; RV64IM-NEXT:    sd a1, 16(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 232(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a1, s5, a1
+; RV64IM-NEXT:    sd a1, 8(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 224(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a1, s5, a1
+; RV64IM-NEXT:    sd a1, 0(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 216(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s4, s5, a1
+; RV64IM-NEXT:    ld a1, 208(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s6, s5, a1
+; RV64IM-NEXT:    ld a1, 192(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s7, s5, a1
+; RV64IM-NEXT:    ld a1, 152(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s8, s5, a1
+; RV64IM-NEXT:    ld a1, 128(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s9, s5, a1
+; RV64IM-NEXT:    ld a1, 104(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s10, s5, a1
+; RV64IM-NEXT:    andi a1, s5, 64
+; RV64IM-NEXT:    andi a2, s5, 128
+; RV64IM-NEXT:    andi a3, s5, 1024
+; RV64IM-NEXT:    srliw a4, s5, 31
+; RV64IM-NEXT:    srli t3, s5, 63
+; RV64IM-NEXT:    mul s2, a0, a1
+; RV64IM-NEXT:    mul a1, a0, a2
+; RV64IM-NEXT:    sd a1, 248(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul s0, a0, a3
+; RV64IM-NEXT:    mul a1, a0, t1
+; RV64IM-NEXT:    sd a1, 152(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 200(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 240(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 184(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul t4, a0, a1
+; RV64IM-NEXT:    ld a1, 176(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 128(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 168(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 216(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 160(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 328(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 144(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul t1, a0, a1
+; RV64IM-NEXT:    ld a1, 136(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul s3, a0, a1
+; RV64IM-NEXT:    mul a1, a0, t0
+; RV64IM-NEXT:    sd a1, 200(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul a7, a0, t2
+; RV64IM-NEXT:    ld a1, 120(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul s1, a0, a1
+; RV64IM-NEXT:    ld a1, 112(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 176(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul a1, a0, a5
+; RV64IM-NEXT:    sd a1, 208(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul a1, a0, a6
+; RV64IM-NEXT:    sd a1, 312(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a4, a4, 31
+; RV64IM-NEXT:    mul a5, a0, s11
+; RV64IM-NEXT:    mul t2, a0, t5
+; RV64IM-NEXT:    mul s11, a0, t6
+; RV64IM-NEXT:    ld a1, 96(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 168(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 88(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 232(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul a1, a0, ra
+; RV64IM-NEXT:    sd a1, 256(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 80(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a2, a0, a1
+; RV64IM-NEXT:    ld a1, 72(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a6, a0, a1
+; RV64IM-NEXT:    ld a1, 64(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul t5, a0, a1
+; RV64IM-NEXT:    ld a1, 56(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul s5, a0, a1
+; RV64IM-NEXT:    ld a1, 48(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 184(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 40(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 224(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 32(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    ld a3, 24(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a3, a0, a3
+; RV64IM-NEXT:    ld t0, 16(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul t0, a0, t0
+; RV64IM-NEXT:    ld t6, 8(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul t6, a0, t6
+; RV64IM-NEXT:    ld ra, 0(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul ra, a0, ra
+; RV64IM-NEXT:    mul s4, a0, s4
+; RV64IM-NEXT:    sd s4, 192(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul s6, a0, s6
+; RV64IM-NEXT:    mul s7, a0, s7
+; RV64IM-NEXT:    mul s8, a0, s8
+; RV64IM-NEXT:    mul s9, a0, s9
+; RV64IM-NEXT:    mul s10, a0, s10
+; RV64IM-NEXT:    slli t3, t3, 63
+; RV64IM-NEXT:    mul a4, a0, a4
+; RV64IM-NEXT:    mul a0, a0, t3
+; RV64IM-NEXT:    ld t3, 360(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s4, 352(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t3, t3, s4
+; RV64IM-NEXT:    ld s4, 344(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor s2, s4, s2
+; RV64IM-NEXT:    ld s4, 336(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor s0, s4, s0
+; RV64IM-NEXT:    ld s4, 320(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t4, s4, t4
+; RV64IM-NEXT:    ld s4, 304(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t1, s4, t1
+; RV64IM-NEXT:    ld s4, 296(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a7, s4, a7
+; RV64IM-NEXT:    ld s4, 288(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a5, s4, a5
+; RV64IM-NEXT:    ld s4, 280(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a2, s4, a2
+; RV64IM-NEXT:    ld s4, 272(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a1, s4, a1
+; RV64IM-NEXT:    ld s4, 264(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor s6, s4, s6
+; RV64IM-NEXT:    xor t3, t3, s2
+; RV64IM-NEXT:    ld s2, 152(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor s0, s0, s2
+; RV64IM-NEXT:    ld s2, 128(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t4, t4, s2
+; RV64IM-NEXT:    xor t1, t1, s3
+; RV64IM-NEXT:    xor a7, a7, s1
+; RV64IM-NEXT:    xor a5, a5, t2
+; RV64IM-NEXT:    xor a2, a2, a6
+; RV64IM-NEXT:    xor a1, a1, a3
+; RV64IM-NEXT:    xor a3, s6, s7
+; RV64IM-NEXT:    ld a6, 248(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a6, t3, a6
+; RV64IM-NEXT:    ld t2, 240(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t2, s0, t2
+; RV64IM-NEXT:    ld t3, 216(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t3, t4, t3
+; RV64IM-NEXT:    ld t4, 200(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t1, t1, t4
+; RV64IM-NEXT:    ld t4, 176(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a7, a7, t4
+; RV64IM-NEXT:    xor a5, a5, s11
+; RV64IM-NEXT:    xor a2, a2, t5
+; RV64IM-NEXT:    xor a1, a1, t0
+; RV64IM-NEXT:    xor a3, a3, s8
+; RV64IM-NEXT:    ld t0, 328(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t0, t3, t0
+; RV64IM-NEXT:    ld t3, 208(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a7, a7, t3
+; RV64IM-NEXT:    ld t3, 168(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a5, a5, t3
+; RV64IM-NEXT:    xor a2, a2, s5
+; RV64IM-NEXT:    xor a1, a1, t6
+; RV64IM-NEXT:    xor a3, a3, s9
+; RV64IM-NEXT:    xor t2, a6, t2
+; RV64IM-NEXT:    xor t0, t2, t0
+; RV64IM-NEXT:    ld t2, 312(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a7, a7, t2
+; RV64IM-NEXT:    ld t2, 232(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a5, a5, t2
+; RV64IM-NEXT:    ld t2, 184(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a2, a2, t2
+; RV64IM-NEXT:    xor a1, a1, ra
+; RV64IM-NEXT:    xor a3, a3, s10
+; RV64IM-NEXT:    xor t0, t0, t1
+; RV64IM-NEXT:    xor a4, a7, a4
+; RV64IM-NEXT:    ld a7, 256(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a5, a5, a7
+; RV64IM-NEXT:    ld a7, 224(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a2, a2, a7
+; RV64IM-NEXT:    slli a6, a6, 56
+; RV64IM-NEXT:    ld a7, 192(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a1, a1, a7
+; RV64IM-NEXT:    xor a0, a3, a0
+; RV64IM-NEXT:    ld t1, 368(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a3, t0, t1
+; RV64IM-NEXT:    xor a4, t0, a4
+; RV64IM-NEXT:    slli a3, a3, 40
+; RV64IM-NEXT:    xor a4, a4, a5
+; RV64IM-NEXT:    or a3, a6, a3
+; RV64IM-NEXT:    lui a7, 4080
+; RV64IM-NEXT:    and a5, a4, a7
+; RV64IM-NEXT:    xor a2, a4, a2
+; RV64IM-NEXT:    srli a4, a4, 8
+; RV64IM-NEXT:    slli a5, a5, 24
+; RV64IM-NEXT:    xor a1, a2, a1
+; RV64IM-NEXT:    ld a6, 400(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a4, a4, a6
+; RV64IM-NEXT:    srli a2, a2, 24
+; RV64IM-NEXT:    srliw a6, a1, 24
+; RV64IM-NEXT:    and a2, a2, a7
+; RV64IM-NEXT:    srli a7, a1, 40
+; RV64IM-NEXT:    xor a0, a1, a0
+; RV64IM-NEXT:    slli a6, a6, 32
+; RV64IM-NEXT:    or a2, a4, a2
+; RV64IM-NEXT:    and a1, a7, t1
+; RV64IM-NEXT:    srli a0, a0, 56
+; RV64IM-NEXT:    or a4, a5, a6
+; RV64IM-NEXT:    or a0, a1, a0
+; RV64IM-NEXT:    or a3, a3, a4
+; RV64IM-NEXT:    or a0, a2, a0
+; RV64IM-NEXT:    or a0, a3, a0
+; RV64IM-NEXT:    srli a1, a0, 4
+; RV64IM-NEXT:    ld a2, 384(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a0, a0, a2
+; RV64IM-NEXT:    and a1, a1, a2
+; RV64IM-NEXT:    slli a0, a0, 4
+; RV64IM-NEXT:    or a0, a1, a0
+; RV64IM-NEXT:    srli a1, a0, 2
+; RV64IM-NEXT:    ld a2, 376(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a0, a0, a2
+; RV64IM-NEXT:    and a1, a1, a2
+; RV64IM-NEXT:    slli a0, a0, 2
+; RV64IM-NEXT:    or a0, a1, a0
+; RV64IM-NEXT:    srli a1, a0, 1
+; RV64IM-NEXT:    ld a2, 392(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a0, a0, a2
+; RV64IM-NEXT:    and a1, a1, a2
+; RV64IM-NEXT:    slli a0, a0, 1
+; RV64IM-NEXT:    or a0, a1, a0
+; RV64IM-NEXT:    ld ra, 504(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s0, 496(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s1, 488(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s2, 480(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s3, 472(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s4, 464(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s5, 456(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s6, 448(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s7, 440(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s8, 432(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s9, 424(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s10, 416(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s11, 408(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    addi sp, sp, 512
+; RV64IM-NEXT:    ret
+  %a.ext = zext i32 %a to i64
+  %b.ext = zext i32 %b to i64
+  %clmul = call i64 @llvm.clmul.i64(i64 %a.ext, i64 %b.ext)
+  %res.ext = lshr i64 %clmul, 31
+  %res = trunc i64 %res.ext to i32
+  ret i32 %res
+}
+
+define i4 @clmulr_constfold_i4() nounwind {
+; CHECK-LABEL: clmulr_constfold_i4:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a0, 0
+; CHECK-NEXT:    ret
+  %clmul = call i8 @llvm.clmul.i8(i8 1, i8 2)
+  %res.ext = lshr i8 %clmul, 3
+  %res = trunc i8 %res.ext to i4
+  ret i4 %res
+}
+
+define i16 @clmulr_constfold_i16() nounwind {
+; CHECK-LABEL: clmulr_constfold_i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    ret
+  %clmul = call i32 @llvm.clmul.i16(i32 -2, i32 -1)
+  %res.ext = lshr i32 %clmul, 15
+  %res = trunc i32 %res.ext to i16
+  ret i16 %res
+}
+
+define i4 @clmulh_i4(i4 %a, i4 %b) nounwind {
+; RV32IM-LABEL: clmulh_i4:
+; RV32IM:       # %bb.0:
+; RV32IM-NEXT:    addi sp, sp, -144
+; RV32IM-NEXT:    sw ra, 140(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s0, 136(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s1, 132(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s2, 128(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s3, 124(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s4, 120(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s5, 116(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s6, 112(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s7, 108(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s8, 104(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s9, 100(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s10, 96(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s11, 92(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    srli t0, a0, 8
+; RV32IM-NEXT:    lui a3, 16
+; RV32IM-NEXT:    srli t1, a0, 24
+; RV32IM-NEXT:    slli a2, a0, 24
+; RV32IM-NEXT:    lui t2, 61681
+; RV32IM-NEXT:    lui t3, 209715
+; RV32IM-NEXT:    lui a7, 349525
+; RV32IM-NEXT:    srli t4, a1, 8
+; RV32IM-NEXT:    srli t5, a1, 24
+; RV32IM-NEXT:    slli a4, a1, 24
+; RV32IM-NEXT:    li t6, 1
+; RV32IM-NEXT:    lui s0, 4
+; RV32IM-NEXT:    lui s1, 8
+; RV32IM-NEXT:    lui s2, 32
+; RV32IM-NEXT:    lui s3, 64
+; RV32IM-NEXT:    lui s5, 128
+; RV32IM-NEXT:    lui s6, 256
+; RV32IM-NEXT:    lui s7, 512
+; RV32IM-NEXT:    lui s8, 1024
+; RV32IM-NEXT:    lui s9, 2048
+; RV32IM-NEXT:    lui s10, 4096
+; RV32IM-NEXT:    lui s11, 8192
+; RV32IM-NEXT:    lui ra, 16384
+; RV32IM-NEXT:    addi s4, a3, -256
+; RV32IM-NEXT:    lui a5, 16
+; RV32IM-NEXT:    and t0, t0, s4
+; RV32IM-NEXT:    or a3, t0, t1
+; RV32IM-NEXT:    lui t0, 32768
+; RV32IM-NEXT:    and t1, t4, s4
+; RV32IM-NEXT:    or t4, t1, t5
+; RV32IM-NEXT:    lui a6, 65536
+; RV32IM-NEXT:    and a0, a0, s4
+; RV32IM-NEXT:    slli a0, a0, 8
+; RV32IM-NEXT:    or t5, a2, a0
+; RV32IM-NEXT:    lui a2, 131072
+; RV32IM-NEXT:    and a1, a1, s4
+; RV32IM-NEXT:    slli a1, a1, 8
+; RV32IM-NEXT:    or a0, a4, a1
+; RV32IM-NEXT:    lui a1, 262144
+; RV32IM-NEXT:    addi t2, t2, -241
+; RV32IM-NEXT:    addi t3, t3, 819
+; RV32IM-NEXT:    addi a7, a7, 1365
+; RV32IM-NEXT:    or a3, t5, a3
+; RV32IM-NEXT:    or a0, a0, t4
+; RV32IM-NEXT:    srli t4, a3, 4
+; RV32IM-NEXT:    and a3, a3, t2
+; RV32IM-NEXT:    srli t5, a0, 4
+; RV32IM-NEXT:    and a0, a0, t2
+; RV32IM-NEXT:    and t4, t4, t2
+; RV32IM-NEXT:    slli a3, a3, 4
+; RV32IM-NEXT:    and t5, t5, t2
+; RV32IM-NEXT:    slli a0, a0, 4
+; RV32IM-NEXT:    or a3, t4, a3
+; RV32IM-NEXT:    or a0, t5, a0
+; RV32IM-NEXT:    srli t4, a3, 2
+; RV32IM-NEXT:    and a3, a3, t3
+; RV32IM-NEXT:    srli t5, a0, 2
+; RV32IM-NEXT:    and a0, a0, t3
+; RV32IM-NEXT:    and t4, t4, t3
+; RV32IM-NEXT:    slli a3, a3, 2
+; RV32IM-NEXT:    and t5, t5, t3
+; RV32IM-NEXT:    slli a0, a0, 2
+; RV32IM-NEXT:    or a3, t4, a3
+; RV32IM-NEXT:    or a0, t5, a0
+; RV32IM-NEXT:    srli t4, a3, 1
+; RV32IM-NEXT:    and a3, a3, a7
+; RV32IM-NEXT:    srli t5, a0, 1
+; RV32IM-NEXT:    and a0, a0, a7
+; RV32IM-NEXT:    and t4, t4, a7
+; RV32IM-NEXT:    and a7, t5, a7
+; RV32IM-NEXT:    lui a4, 524288
+; RV32IM-NEXT:    slli t6, t6, 11
+; RV32IM-NEXT:    slli a3, a3, 1
+; RV32IM-NEXT:    slli a0, a0, 1
+; RV32IM-NEXT:    or a3, t4, a3
+; RV32IM-NEXT:    or a0, a7, a0
+; RV32IM-NEXT:    andi t5, a0, 2
+; RV32IM-NEXT:    andi t4, a0, 1
+; RV32IM-NEXT:    and t6, a0, t6
+; RV32IM-NEXT:    lui a7, 1
+; RV32IM-NEXT:    and a7, a0, a7
+; RV32IM-NEXT:    sw a7, 84(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lui a7, 2
+; RV32IM-NEXT:    and a7, a0, a7
+; RV32IM-NEXT:    sw a7, 80(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and s0, a0, s0
+; RV32IM-NEXT:    sw s0, 76(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and s1, a0, s1
+; RV32IM-NEXT:    and a5, a0, a5
+; RV32IM-NEXT:    sw a5, 72(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and s2, a0, s2
+; RV32IM-NEXT:    and a5, a0, s3
+; RV32IM-NEXT:    sw a5, 68(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a5, a0, s5
+; RV32IM-NEXT:    sw a5, 64(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a5, a0, s6
+; RV32IM-NEXT:    sw a5, 60(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and s7, a0, s7
+; RV32IM-NEXT:    and s8, a0, s8
+; RV32IM-NEXT:    and a5, a0, s9
+; RV32IM-NEXT:    sw a5, 56(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a5, a0, s10
+; RV32IM-NEXT:    sw a5, 52(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a5, a0, s11
+; RV32IM-NEXT:    sw a5, 48(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a5, a0, ra
+; RV32IM-NEXT:    sw a5, 44(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a5, a0, t0
+; RV32IM-NEXT:    sw a5, 40(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a5, a0, a6
+; RV32IM-NEXT:    sw a5, 36(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a2, a0, a2
+; RV32IM-NEXT:    sw a2, 32(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a1, a0, a1
+; RV32IM-NEXT:    sw a1, 28(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a4, a0, a4
+; RV32IM-NEXT:    sw a4, 24(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    andi a1, a0, 4
+; RV32IM-NEXT:    andi a2, a0, 8
+; RV32IM-NEXT:    andi a4, a0, 16
+; RV32IM-NEXT:    andi a5, a0, 32
+; RV32IM-NEXT:    andi a6, a0, 64
+; RV32IM-NEXT:    andi a7, a0, 128
+; RV32IM-NEXT:    andi t0, a0, 256
+; RV32IM-NEXT:    andi t1, a0, 512
+; RV32IM-NEXT:    andi a0, a0, 1024
+; RV32IM-NEXT:    mul t5, a3, t5
+; RV32IM-NEXT:    sw t5, 12(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul s10, a3, t4
+; RV32IM-NEXT:    mul a1, a3, a1
+; RV32IM-NEXT:    sw a1, 8(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul s6, a3, a2
+; RV32IM-NEXT:    mul s5, a3, a4
+; RV32IM-NEXT:    mul s3, a3, a5
+; RV32IM-NEXT:    mul a1, a3, a6
+; RV32IM-NEXT:    sw a1, 16(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul a1, a3, a7
+; RV32IM-NEXT:    sw a1, 88(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul s0, a3, t0
+; RV32IM-NEXT:    mul t5, a3, t1
+; RV32IM-NEXT:    mul s11, a3, a0
+; RV32IM-NEXT:    mul a0, a3, t6
+; RV32IM-NEXT:    sw a0, 20(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw a0, 84(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a0, a3, a0
+; RV32IM-NEXT:    sw a0, 84(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw a0, 80(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul t1, a3, a0
+; RV32IM-NEXT:    lw a0, 76(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a7, a3, a0
+; RV32IM-NEXT:    mul s1, a3, s1
+; RV32IM-NEXT:    lw a0, 72(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul ra, a3, a0
+; RV32IM-NEXT:    mul a0, a3, s2
+; RV32IM-NEXT:    sw a0, 76(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw a0, 68(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a0, a3, a0
+; RV32IM-NEXT:    sw a0, 80(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw a0, 64(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a4, a3, a0
+; RV32IM-NEXT:    lw a0, 60(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a2, a3, a0
+; RV32IM-NEXT:    mul a6, a3, s7
+; RV32IM-NEXT:    mul t4, a3, s8
+; RV32IM-NEXT:    lw a0, 56(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul s7, a3, a0
+; RV32IM-NEXT:    lw a0, 52(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a1, a3, a0
+; RV32IM-NEXT:    lw a0, 48(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a0, a3, a0
+; RV32IM-NEXT:    lw a5, 44(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a5, a3, a5
+; RV32IM-NEXT:    lw t0, 40(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul t0, a3, t0
+; RV32IM-NEXT:    lw t6, 36(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul t6, a3, t6
+; RV32IM-NEXT:    lw s2, 32(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul s2, a3, s2
+; RV32IM-NEXT:    lw s8, 28(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul s8, a3, s8
+; RV32IM-NEXT:    lw s9, 24(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a3, a3, s9
+; RV32IM-NEXT:    lw s9, 12(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor s10, s10, s9
+; RV32IM-NEXT:    lw s9, 8(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor s6, s9, s6
+; RV32IM-NEXT:    xor s3, s5, s3
+; RV32IM-NEXT:    xor t5, s0, t5
+; RV32IM-NEXT:    xor a7, t1, a7
+; RV32IM-NEXT:    xor a2, a4, a2
+; RV32IM-NEXT:    xor a0, a1, a0
+; RV32IM-NEXT:    xor a1, s10, s6
+; RV32IM-NEXT:    lw a4, 16(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a4, s3, a4
+; RV32IM-NEXT:    xor t1, t5, s11
+; RV32IM-NEXT:    xor a7, a7, s1
+; RV32IM-NEXT:    xor a2, a2, a6
+; RV32IM-NEXT:    xor a0, a0, a5
+; RV32IM-NEXT:    xor a1, a1, a4
+; RV32IM-NEXT:    lw a4, 20(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a4, t1, a4
+; RV32IM-NEXT:    xor a5, a7, ra
+; RV32IM-NEXT:    xor a2, a2, t4
+; RV32IM-NEXT:    xor a0, a0, t0
+; RV32IM-NEXT:    lw a6, 88(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a1, a1, a6
+; RV32IM-NEXT:    lw a6, 84(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a4, a4, a6
+; RV32IM-NEXT:    lw a6, 76(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a5, a5, a6
+; RV32IM-NEXT:    xor a2, a2, s7
+; RV32IM-NEXT:    xor a0, a0, t6
+; RV32IM-NEXT:    lw a6, 80(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a5, a5, a6
+; RV32IM-NEXT:    xor a0, a0, s2
+; RV32IM-NEXT:    xor a4, a1, a4
+; RV32IM-NEXT:    slli a1, a1, 24
+; RV32IM-NEXT:    xor a4, a4, a5
+; RV32IM-NEXT:    xor a0, a0, s8
+; RV32IM-NEXT:    xor a2, a4, a2
+; RV32IM-NEXT:    xor a0, a0, a3
+; RV32IM-NEXT:    and a3, a2, s4
+; RV32IM-NEXT:    srli a4, a2, 8
+; RV32IM-NEXT:    xor a0, a2, a0
+; RV32IM-NEXT:    slli a3, a3, 8
+; RV32IM-NEXT:    and a2, a4, s4
+; RV32IM-NEXT:    srli a0, a0, 24
+; RV32IM-NEXT:    or a1, a1, a3
+; RV32IM-NEXT:    or a0, a2, a0
+; RV32IM-NEXT:    or a0, a1, a0
+; RV32IM-NEXT:    srli a1, a0, 4
+; RV32IM-NEXT:    and a0, a0, t2
+; RV32IM-NEXT:    and a1, a1, t2
+; RV32IM-NEXT:    slli a0, a0, 4
+; RV32IM-NEXT:    or a0, a1, a0
+; RV32IM-NEXT:    srli a1, a0, 2
+; RV32IM-NEXT:    and a0, a0, t3
+; RV32IM-NEXT:    and a1, a1, t3
+; RV32IM-NEXT:    slli a0, a0, 2
+; RV32IM-NEXT:    or a0, a1, a0
+; RV32IM-NEXT:    andi a1, a0, 5
+; RV32IM-NEXT:    srli a0, a0, 1
+; RV32IM-NEXT:    slli a1, a1, 1
+; RV32IM-NEXT:    andi a0, a0, 20
+; RV32IM-NEXT:    or a0, a0, a1
+; RV32IM-NEXT:    srli a0, a0, 1
+; RV32IM-NEXT:    lw ra, 140(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s0, 136(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s1, 132(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s2, 128(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s3, 124(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s4, 120(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s5, 116(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s6, 112(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s7, 108(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s8, 104(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s9, 100(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s10, 96(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s11, 92(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    addi sp, sp, 144
+; RV32IM-NEXT:    ret
+;
+; RV64IM-LABEL: clmulh_i4:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    addi sp, sp, -480
+; RV64IM-NEXT:    sd ra, 472(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s0, 464(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s1, 456(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s2, 448(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s3, 440(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s4, 432(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s5, 424(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s6, 416(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s7, 408(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s8, 400(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s9, 392(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s10, 384(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s11, 376(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    srli a3, a0, 24
+; RV64IM-NEXT:    srli a7, a0, 8
+; RV64IM-NEXT:    li s4, 255
+; RV64IM-NEXT:    srli a4, a0, 40
+; RV64IM-NEXT:    lui s10, 16
+; RV64IM-NEXT:    srli t1, a0, 56
+; RV64IM-NEXT:    srliw t4, a0, 24
+; RV64IM-NEXT:    slli a5, a0, 56
+; RV64IM-NEXT:    lui s3, 61681
+; RV64IM-NEXT:    lui t5, 209715
+; RV64IM-NEXT:    lui s6, 349525
+; RV64IM-NEXT:    srli s9, a1, 24
+; RV64IM-NEXT:    srli s0, a1, 8
+; RV64IM-NEXT:    srli ra, a1, 40
+; RV64IM-NEXT:    srli t2, a1, 56
+; RV64IM-NEXT:    srliw s11, a1, 24
+; RV64IM-NEXT:    slli a6, a1, 56
+; RV64IM-NEXT:    li t0, 1
+; RV64IM-NEXT:    lui s1, 128
+; RV64IM-NEXT:    lui s2, 256
+; RV64IM-NEXT:    lui t6, 4096
+; RV64IM-NEXT:    lui s5, 8192
+; RV64IM-NEXT:    lui s7, 4080
+; RV64IM-NEXT:    and a2, a3, s7
+; RV64IM-NEXT:    slli t3, s4, 24
+; RV64IM-NEXT:    addi s8, s10, -256
+; RV64IM-NEXT:    and a3, a7, t3
+; RV64IM-NEXT:    sd t3, 368(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    or a2, a3, a2
+; RV64IM-NEXT:    and a3, a0, s7
+; RV64IM-NEXT:    slli t4, t4, 32
+; RV64IM-NEXT:    addi s3, s3, -241
+; RV64IM-NEXT:    addi s4, t5, 819
+; RV64IM-NEXT:    addi s6, s6, 1365
+; RV64IM-NEXT:    and a7, s9, s7
+; RV64IM-NEXT:    and a4, a4, s8
+; RV64IM-NEXT:    or a4, a4, t1
+; RV64IM-NEXT:    and t1, a1, s7
+; RV64IM-NEXT:    slli t5, s11, 32
+; RV64IM-NEXT:    slli a3, a3, 24
+; RV64IM-NEXT:    or s9, a3, t4
+; RV64IM-NEXT:    slli a3, s3, 32
+; RV64IM-NEXT:    add s3, s3, a3
+; RV64IM-NEXT:    slli a3, s4, 32
+; RV64IM-NEXT:    add s4, s4, a3
+; RV64IM-NEXT:    slli a3, s6, 32
+; RV64IM-NEXT:    add s6, s6, a3
+; RV64IM-NEXT:    slli t4, t0, 11
+; RV64IM-NEXT:    and a3, s0, t3
+; RV64IM-NEXT:    or a3, a3, a7
+; RV64IM-NEXT:    slli s11, t0, 32
+; RV64IM-NEXT:    and a7, ra, s8
+; RV64IM-NEXT:    or a7, a7, t2
+; RV64IM-NEXT:    slli ra, t0, 33
+; RV64IM-NEXT:    slli t1, t1, 24
+; RV64IM-NEXT:    or t1, t1, t5
+; RV64IM-NEXT:    slli s0, t0, 34
+; RV64IM-NEXT:    or a2, a2, a4
+; RV64IM-NEXT:    slli a4, t0, 35
+; RV64IM-NEXT:    sd a4, 304(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a0, a0, s8
+; RV64IM-NEXT:    sd s8, 344(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a0, a0, 40
+; RV64IM-NEXT:    or a0, a5, a0
+; RV64IM-NEXT:    slli a4, t0, 36
+; RV64IM-NEXT:    sd a4, 296(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    or a3, a3, a7
+; RV64IM-NEXT:    slli a7, t0, 37
+; RV64IM-NEXT:    and a1, a1, s8
+; RV64IM-NEXT:    slli a1, a1, 40
+; RV64IM-NEXT:    or a1, a6, a1
+; RV64IM-NEXT:    slli a6, t0, 38
+; RV64IM-NEXT:    or a0, a0, s9
+; RV64IM-NEXT:    or a1, a1, t1
+; RV64IM-NEXT:    or a0, a0, a2
+; RV64IM-NEXT:    or a1, a1, a3
+; RV64IM-NEXT:    srli a2, a0, 4
+; RV64IM-NEXT:    sd s3, 360(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a0, a0, s3
+; RV64IM-NEXT:    srli a3, a1, 4
+; RV64IM-NEXT:    and a1, a1, s3
+; RV64IM-NEXT:    and a2, a2, s3
+; RV64IM-NEXT:    slli a0, a0, 4
+; RV64IM-NEXT:    and a3, a3, s3
+; RV64IM-NEXT:    slli a1, a1, 4
+; RV64IM-NEXT:    or a0, a2, a0
+; RV64IM-NEXT:    or a1, a3, a1
+; RV64IM-NEXT:    srli a2, a0, 2
+; RV64IM-NEXT:    sd s4, 352(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a0, a0, s4
+; RV64IM-NEXT:    srli a3, a1, 2
+; RV64IM-NEXT:    and a1, a1, s4
+; RV64IM-NEXT:    and a2, a2, s4
+; RV64IM-NEXT:    slli a0, a0, 2
+; RV64IM-NEXT:    and a3, a3, s4
+; RV64IM-NEXT:    slli a1, a1, 2
+; RV64IM-NEXT:    or a0, a2, a0
+; RV64IM-NEXT:    or a1, a3, a1
+; RV64IM-NEXT:    srli a2, a0, 1
+; RV64IM-NEXT:    and a0, a0, s6
+; RV64IM-NEXT:    srli a3, a1, 1
+; RV64IM-NEXT:    and a1, a1, s6
+; RV64IM-NEXT:    and a2, a2, s6
+; RV64IM-NEXT:    slli a0, a0, 1
+; RV64IM-NEXT:    and a3, a3, s6
+; RV64IM-NEXT:    slli a1, a1, 1
+; RV64IM-NEXT:    or a0, a2, a0
+; RV64IM-NEXT:    or s6, a3, a1
+; RV64IM-NEXT:    andi a1, s6, 2
+; RV64IM-NEXT:    andi a2, s6, 1
+; RV64IM-NEXT:    andi a3, s6, 4
+; RV64IM-NEXT:    andi a4, s6, 8
+; RV64IM-NEXT:    andi a5, s6, 16
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    xor a1, a2, a1
+; RV64IM-NEXT:    sd a1, 336(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    andi a1, s6, 32
+; RV64IM-NEXT:    mul a2, a0, a3
+; RV64IM-NEXT:    mul a3, a0, a4
+; RV64IM-NEXT:    xor a2, a2, a3
+; RV64IM-NEXT:    sd a2, 328(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    andi a2, s6, 256
+; RV64IM-NEXT:    mul a3, a0, a5
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    xor a1, a3, a1
+; RV64IM-NEXT:    sd a1, 320(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    andi a1, s6, 512
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    xor a1, a2, a1
+; RV64IM-NEXT:    sd a1, 312(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli s3, t0, 39
+; RV64IM-NEXT:    lui a1, 2
+; RV64IM-NEXT:    and a1, s6, a1
+; RV64IM-NEXT:    lui a2, 4
+; RV64IM-NEXT:    and a2, s6, a2
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    xor a1, a1, a2
+; RV64IM-NEXT:    sd a1, 288(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, t0, 40
+; RV64IM-NEXT:    and a2, s6, s1
+; RV64IM-NEXT:    and a3, s6, s2
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    mul a3, a0, a3
+; RV64IM-NEXT:    xor a2, a2, a3
+; RV64IM-NEXT:    sd a2, 280(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a2, t0, 41
+; RV64IM-NEXT:    and a3, s6, t6
+; RV64IM-NEXT:    and a4, s6, s5
+; RV64IM-NEXT:    mul a3, a0, a3
+; RV64IM-NEXT:    mul a4, a0, a4
+; RV64IM-NEXT:    xor a3, a3, a4
+; RV64IM-NEXT:    sd a3, 272(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a3, t0, 48
+; RV64IM-NEXT:    and a4, s6, s11
+; RV64IM-NEXT:    and a5, s6, ra
+; RV64IM-NEXT:    mul a4, a0, a4
+; RV64IM-NEXT:    mul a5, a0, a5
+; RV64IM-NEXT:    xor a4, a4, a5
+; RV64IM-NEXT:    sd a4, 264(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a4, t0, 49
+; RV64IM-NEXT:    and a1, s6, a1
+; RV64IM-NEXT:    and a2, s6, a2
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    xor a1, a1, a2
+; RV64IM-NEXT:    sd a1, 256(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, t0, 56
+; RV64IM-NEXT:    and a2, s6, a3
+; RV64IM-NEXT:    and a3, s6, a4
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    mul a3, a0, a3
+; RV64IM-NEXT:    xor a2, a2, a3
+; RV64IM-NEXT:    sd a2, 248(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a2, t0, 57
+; RV64IM-NEXT:    and a1, s6, a1
+; RV64IM-NEXT:    and a2, s6, a2
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    xor a1, a1, a2
+; RV64IM-NEXT:    sd a1, 240(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a2, t0, 42
+; RV64IM-NEXT:    slli ra, t0, 43
+; RV64IM-NEXT:    slli a4, t0, 44
+; RV64IM-NEXT:    slli t6, t0, 45
+; RV64IM-NEXT:    slli s1, t0, 46
+; RV64IM-NEXT:    slli s2, t0, 47
+; RV64IM-NEXT:    slli s4, t0, 50
+; RV64IM-NEXT:    slli s5, t0, 51
+; RV64IM-NEXT:    slli a1, t0, 52
+; RV64IM-NEXT:    sd a1, 232(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, t0, 53
+; RV64IM-NEXT:    sd a1, 224(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, t0, 54
+; RV64IM-NEXT:    sd a1, 216(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, t0, 55
+; RV64IM-NEXT:    sd a1, 208(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, t0, 58
+; RV64IM-NEXT:    sd a1, 200(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, t0, 59
+; RV64IM-NEXT:    sd a1, 176(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, t0, 60
+; RV64IM-NEXT:    sd a1, 136(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, t0, 61
+; RV64IM-NEXT:    sd a1, 104(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli t0, t0, 62
+; RV64IM-NEXT:    sd t0, 80(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a1, s6, t4
+; RV64IM-NEXT:    sd a1, 192(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui a3, 1
+; RV64IM-NEXT:    and a1, s6, a3
+; RV64IM-NEXT:    sd a1, 184(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui a3, 8
+; RV64IM-NEXT:    and a1, s6, a3
+; RV64IM-NEXT:    sd a1, 168(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a1, s6, s10
+; RV64IM-NEXT:    sd a1, 160(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui s9, 32
+; RV64IM-NEXT:    and a1, s6, s9
+; RV64IM-NEXT:    sd a1, 152(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui s11, 64
+; RV64IM-NEXT:    and a1, s6, s11
+; RV64IM-NEXT:    sd a1, 144(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui s10, 512
+; RV64IM-NEXT:    and a1, s6, s10
+; RV64IM-NEXT:    sd a1, 128(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui s8, 1024
+; RV64IM-NEXT:    and a1, s6, s8
+; RV64IM-NEXT:    sd a1, 120(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui s7, 2048
+; RV64IM-NEXT:    and a1, s6, s7
+; RV64IM-NEXT:    sd a1, 112(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui t1, 16384
+; RV64IM-NEXT:    and a1, s6, t1
+; RV64IM-NEXT:    sd a1, 96(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui t2, 32768
+; RV64IM-NEXT:    and t2, s6, t2
+; RV64IM-NEXT:    lui t3, 65536
+; RV64IM-NEXT:    and a1, s6, t3
+; RV64IM-NEXT:    sd a1, 88(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui t4, 131072
+; RV64IM-NEXT:    and a5, s6, t4
+; RV64IM-NEXT:    lui t5, 262144
+; RV64IM-NEXT:    and t0, s6, t5
+; RV64IM-NEXT:    and s11, s6, s0
+; RV64IM-NEXT:    ld a1, 304(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a1, s6, a1
+; RV64IM-NEXT:    sd a1, 304(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 296(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a1, s6, a1
+; RV64IM-NEXT:    sd a1, 72(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a1, s6, a7
+; RV64IM-NEXT:    sd a1, 64(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a1, s6, a6
+; RV64IM-NEXT:    sd a1, 56(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a1, s6, s3
+; RV64IM-NEXT:    sd a1, 48(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a1, s6, a2
+; RV64IM-NEXT:    sd a1, 40(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and ra, s6, ra
+; RV64IM-NEXT:    and a1, s6, a4
+; RV64IM-NEXT:    sd a1, 32(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a1, s6, t6
+; RV64IM-NEXT:    sd a1, 24(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a1, s6, s1
+; RV64IM-NEXT:    sd a1, 16(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a1, s6, s2
+; RV64IM-NEXT:    sd a1, 8(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a1, s6, s4
+; RV64IM-NEXT:    sd a1, 0(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and s0, s6, s5
+; RV64IM-NEXT:    ld a1, 232(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s1, s6, a1
+; RV64IM-NEXT:    ld a1, 224(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s2, s6, a1
+; RV64IM-NEXT:    ld a1, 216(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s3, s6, a1
+; RV64IM-NEXT:    ld a1, 208(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s4, s6, a1
+; RV64IM-NEXT:    ld a1, 200(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s5, s6, a1
+; RV64IM-NEXT:    ld a1, 176(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s7, s6, a1
+; RV64IM-NEXT:    ld a1, 136(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s8, s6, a1
+; RV64IM-NEXT:    ld a1, 104(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s9, s6, a1
+; RV64IM-NEXT:    ld a1, 80(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s10, s6, a1
+; RV64IM-NEXT:    andi a1, s6, 64
+; RV64IM-NEXT:    andi a2, s6, 128
+; RV64IM-NEXT:    andi a3, s6, 1024
+; RV64IM-NEXT:    srliw a4, s6, 31
+; RV64IM-NEXT:    srli s6, s6, 63
+; RV64IM-NEXT:    mul t4, a0, a1
+; RV64IM-NEXT:    mul a1, a0, a2
+; RV64IM-NEXT:    sd a1, 216(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul t3, a0, a3
+; RV64IM-NEXT:    ld a1, 192(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 136(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 184(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 208(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 168(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul t1, a0, a1
+; RV64IM-NEXT:    ld a1, 160(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 104(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 152(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 184(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 144(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 296(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 128(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a7, a0, a1
+; RV64IM-NEXT:    ld a1, 120(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul t6, a0, a1
+; RV64IM-NEXT:    ld a1, 112(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 168(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 96(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a6, a0, a1
+; RV64IM-NEXT:    mul t5, a0, t2
+; RV64IM-NEXT:    ld a1, 88(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 160(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul a1, a0, a5
+; RV64IM-NEXT:    sd a1, 176(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul a1, a0, t0
+; RV64IM-NEXT:    sd a1, 232(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a4, a4, 31
+; RV64IM-NEXT:    mul a3, a0, s11
+; RV64IM-NEXT:    ld a1, 304(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul t0, a0, a1
+; RV64IM-NEXT:    ld a1, 72(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 144(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 64(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 152(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 56(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 200(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 48(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 224(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 40(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a2, a0, a1
+; RV64IM-NEXT:    mul a5, a0, ra
+; RV64IM-NEXT:    ld a1, 32(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul t2, a0, a1
+; RV64IM-NEXT:    ld a1, 24(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul s11, a0, a1
+; RV64IM-NEXT:    ld a1, 16(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul ra, a0, a1
+; RV64IM-NEXT:    ld a1, 8(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 192(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 0(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    mul s0, a0, s0
+; RV64IM-NEXT:    mul s1, a0, s1
+; RV64IM-NEXT:    mul s2, a0, s2
+; RV64IM-NEXT:    mul s3, a0, s3
+; RV64IM-NEXT:    mul s4, a0, s4
+; RV64IM-NEXT:    mul s5, a0, s5
+; RV64IM-NEXT:    mul s7, a0, s7
+; RV64IM-NEXT:    mul s8, a0, s8
+; RV64IM-NEXT:    mul s9, a0, s9
+; RV64IM-NEXT:    mul s10, a0, s10
+; RV64IM-NEXT:    slli s6, s6, 63
+; RV64IM-NEXT:    mul a4, a0, a4
+; RV64IM-NEXT:    mul a0, a0, s6
+; RV64IM-NEXT:    sd a0, 304(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld s6, 336(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld a0, 328(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor s6, s6, a0
+; RV64IM-NEXT:    ld a0, 320(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t4, a0, t4
+; RV64IM-NEXT:    ld a0, 312(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t3, a0, t3
+; RV64IM-NEXT:    ld a0, 288(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t1, a0, t1
+; RV64IM-NEXT:    ld a0, 280(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a7, a0, a7
+; RV64IM-NEXT:    ld a0, 272(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a6, a0, a6
+; RV64IM-NEXT:    ld a0, 264(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a3, a0, a3
+; RV64IM-NEXT:    ld a0, 256(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a2, a0, a2
+; RV64IM-NEXT:    ld a0, 248(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a1, a0, a1
+; RV64IM-NEXT:    ld a0, 240(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor s5, a0, s5
+; RV64IM-NEXT:    xor t4, s6, t4
+; RV64IM-NEXT:    ld a0, 136(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t3, t3, a0
+; RV64IM-NEXT:    ld a0, 104(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t1, t1, a0
+; RV64IM-NEXT:    xor a7, a7, t6
+; RV64IM-NEXT:    xor a6, a6, t5
+; RV64IM-NEXT:    xor a3, a3, t0
+; RV64IM-NEXT:    xor a2, a2, a5
+; RV64IM-NEXT:    xor a1, a1, s0
+; RV64IM-NEXT:    xor a5, s5, s7
+; RV64IM-NEXT:    ld a0, 216(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t0, t4, a0
+; RV64IM-NEXT:    ld a0, 208(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t3, t3, a0
+; RV64IM-NEXT:    ld a0, 184(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t1, t1, a0
+; RV64IM-NEXT:    ld a0, 168(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a7, a7, a0
+; RV64IM-NEXT:    ld a0, 160(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a6, a6, a0
+; RV64IM-NEXT:    ld a0, 144(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a3, a3, a0
+; RV64IM-NEXT:    xor a2, a2, t2
+; RV64IM-NEXT:    xor a1, a1, s1
+; RV64IM-NEXT:    xor a5, a5, s8
+; RV64IM-NEXT:    ld a0, 296(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t1, t1, a0
+; RV64IM-NEXT:    ld a0, 176(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a6, a6, a0
+; RV64IM-NEXT:    ld a0, 152(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a3, a3, a0
+; RV64IM-NEXT:    xor a2, a2, s11
+; RV64IM-NEXT:    xor a1, a1, s2
+; RV64IM-NEXT:    xor a5, a5, s9
+; RV64IM-NEXT:    xor t2, t0, t3
+; RV64IM-NEXT:    xor t1, t2, t1
+; RV64IM-NEXT:    ld a0, 232(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a6, a6, a0
+; RV64IM-NEXT:    ld a0, 200(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a3, a3, a0
+; RV64IM-NEXT:    xor a2, a2, ra
+; RV64IM-NEXT:    xor a1, a1, s3
+; RV64IM-NEXT:    xor a5, a5, s10
+; RV64IM-NEXT:    xor a7, t1, a7
+; RV64IM-NEXT:    xor a4, a6, a4
+; RV64IM-NEXT:    ld a0, 224(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a3, a3, a0
+; RV64IM-NEXT:    ld a0, 192(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a2, a2, a0
+; RV64IM-NEXT:    xor a1, a1, s4
+; RV64IM-NEXT:    lui a6, %hi(.LCPI14_0)
+; RV64IM-NEXT:    ld a6, %lo(.LCPI14_0)(a6)
+; RV64IM-NEXT:    slli t0, t0, 56
+; RV64IM-NEXT:    ld a0, 304(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a0, a5, a0
+; RV64IM-NEXT:    ld t1, 344(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a5, a7, t1
+; RV64IM-NEXT:    xor a4, a7, a4
+; RV64IM-NEXT:    slli a5, a5, 40
+; RV64IM-NEXT:    xor a3, a4, a3
+; RV64IM-NEXT:    or a4, t0, a5
+; RV64IM-NEXT:    lui t0, 4080
+; RV64IM-NEXT:    and a5, a3, t0
+; RV64IM-NEXT:    xor a2, a3, a2
+; RV64IM-NEXT:    srli a3, a3, 8
+; RV64IM-NEXT:    slli a5, a5, 24
+; RV64IM-NEXT:    xor a1, a2, a1
+; RV64IM-NEXT:    ld a7, 368(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a3, a3, a7
+; RV64IM-NEXT:    srli a2, a2, 24
+; RV64IM-NEXT:    srliw a7, a1, 24
+; RV64IM-NEXT:    and a2, a2, t0
+; RV64IM-NEXT:    srli t0, a1, 40
+; RV64IM-NEXT:    xor a0, a1, a0
+; RV64IM-NEXT:    slli a7, a7, 32
+; RV64IM-NEXT:    or a2, a3, a2
+; RV64IM-NEXT:    and a1, t0, t1
+; RV64IM-NEXT:    srli a0, a0, 56
+; RV64IM-NEXT:    or a3, a5, a7
+; RV64IM-NEXT:    or a0, a1, a0
+; RV64IM-NEXT:    or a3, a4, a3
+; RV64IM-NEXT:    or a0, a2, a0
+; RV64IM-NEXT:    or a0, a3, a0
+; RV64IM-NEXT:    srli a1, a0, 4
+; RV64IM-NEXT:    ld a2, 360(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a0, a0, a2
+; RV64IM-NEXT:    and a1, a1, a2
+; RV64IM-NEXT:    slli a0, a0, 4
+; RV64IM-NEXT:    or a0, a1, a0
+; RV64IM-NEXT:    srli a1, a0, 2
+; RV64IM-NEXT:    ld a2, 352(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a0, a0, a2
+; RV64IM-NEXT:    and a1, a1, a2
+; RV64IM-NEXT:    slli a0, a0, 2
+; RV64IM-NEXT:    or a0, a1, a0
+; RV64IM-NEXT:    srli a1, a0, 1
+; RV64IM-NEXT:    andi a0, a0, 5
+; RV64IM-NEXT:    and a1, a1, a6
+; RV64IM-NEXT:    slli a0, a0, 1
+; RV64IM-NEXT:    or a0, a1, a0
+; RV64IM-NEXT:    slli a0, a0, 59
+; RV64IM-NEXT:    srli a0, a0, 60
+; RV64IM-NEXT:    ld ra, 472(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s0, 464(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s1, 456(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s2, 448(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s3, 440(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s4, 432(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s5, 424(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s6, 416(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s7, 408(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s8, 400(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s9, 392(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s10, 384(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s11, 376(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    addi sp, sp, 480
+; RV64IM-NEXT:    ret
+  %a.ext = zext i4 %a to i8
+  %b.ext = zext i4 %b to i8
+  %clmul = call i8 @llvm.clmul.i8(i8 %a.ext, i8 %b.ext)
+  %res.ext = lshr i8 %clmul, 4
+  %res = trunc i8 %res.ext to i4
+  ret i4 %res
+}
+
+define i4 @clmulh_i4_bitreverse(i4 %a, i4 %b) nounwind {
+; RV32IM-LABEL: clmulh_i4_bitreverse:
+; RV32IM:       # %bb.0:
+; RV32IM-NEXT:    addi sp, sp, -144
+; RV32IM-NEXT:    sw ra, 140(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s0, 136(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s1, 132(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s2, 128(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s3, 124(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s4, 120(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s5, 116(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s6, 112(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s7, 108(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s8, 104(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s9, 100(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s10, 96(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s11, 92(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    srli t0, a0, 8
+; RV32IM-NEXT:    lui a3, 16
+; RV32IM-NEXT:    srli t1, a0, 24
+; RV32IM-NEXT:    slli a2, a0, 24
+; RV32IM-NEXT:    lui s1, 61681
+; RV32IM-NEXT:    lui s3, 209715
+; RV32IM-NEXT:    lui a6, 349525
+; RV32IM-NEXT:    srli t4, a1, 8
+; RV32IM-NEXT:    srli t6, a1, 24
+; RV32IM-NEXT:    slli a4, a1, 24
+; RV32IM-NEXT:    li t3, 1
+; RV32IM-NEXT:    lui s11, 2
+; RV32IM-NEXT:    lui t2, 4
+; RV32IM-NEXT:    lui s10, 8
+; RV32IM-NEXT:    lui t5, 32
+; RV32IM-NEXT:    lui s0, 64
+; RV32IM-NEXT:    lui s2, 128
+; RV32IM-NEXT:    lui s4, 256
+; RV32IM-NEXT:    lui s5, 512
+; RV32IM-NEXT:    lui s6, 1024
+; RV32IM-NEXT:    lui s7, 2048
+; RV32IM-NEXT:    lui s8, 4096
+; RV32IM-NEXT:    lui s9, 8192
+; RV32IM-NEXT:    lui ra, 16384
+; RV32IM-NEXT:    addi a3, a3, -256
+; RV32IM-NEXT:    lui a5, 16
+; RV32IM-NEXT:    and t0, t0, a3
+; RV32IM-NEXT:    or t1, t0, t1
+; RV32IM-NEXT:    lui a7, 32768
+; RV32IM-NEXT:    and t4, t4, a3
+; RV32IM-NEXT:    or t6, t4, t6
+; RV32IM-NEXT:    lui t0, 65536
+; RV32IM-NEXT:    and a0, a0, a3
+; RV32IM-NEXT:    mv t4, a3
+; RV32IM-NEXT:    sw a3, 88(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    slli a0, a0, 8
+; RV32IM-NEXT:    or a2, a2, a0
+; RV32IM-NEXT:    lui a3, 131072
+; RV32IM-NEXT:    and a1, a1, t4
+; RV32IM-NEXT:    slli a1, a1, 8
+; RV32IM-NEXT:    or a0, a4, a1
+; RV32IM-NEXT:    lui a1, 262144
+; RV32IM-NEXT:    addi s1, s1, -241
+; RV32IM-NEXT:    addi s3, s3, 819
+; RV32IM-NEXT:    or a2, a2, t1
+; RV32IM-NEXT:    addi a4, a6, 1365
+; RV32IM-NEXT:    sw a4, 84(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    or a0, a0, t6
+; RV32IM-NEXT:    srli a6, a2, 4
+; RV32IM-NEXT:    and a2, a2, s1
+; RV32IM-NEXT:    and a6, a6, s1
+; RV32IM-NEXT:    slli a2, a2, 4
+; RV32IM-NEXT:    or a2, a6, a2
+; RV32IM-NEXT:    srli a6, a0, 4
+; RV32IM-NEXT:    and a0, a0, s1
+; RV32IM-NEXT:    and a6, a6, s1
+; RV32IM-NEXT:    slli a0, a0, 4
+; RV32IM-NEXT:    or a0, a6, a0
+; RV32IM-NEXT:    srli a6, a2, 2
+; RV32IM-NEXT:    and a2, a2, s3
+; RV32IM-NEXT:    and a6, a6, s3
+; RV32IM-NEXT:    slli a2, a2, 2
+; RV32IM-NEXT:    or a2, a6, a2
+; RV32IM-NEXT:    srli a6, a0, 2
+; RV32IM-NEXT:    and a0, a0, s3
+; RV32IM-NEXT:    and a6, a6, s3
+; RV32IM-NEXT:    slli a0, a0, 2
+; RV32IM-NEXT:    or a0, a6, a0
+; RV32IM-NEXT:    srli a6, a2, 1
+; RV32IM-NEXT:    and a2, a2, a4
+; RV32IM-NEXT:    and a6, a6, a4
+; RV32IM-NEXT:    slli a2, a2, 1
+; RV32IM-NEXT:    or a6, a6, a2
+; RV32IM-NEXT:    srli a2, a0, 1
+; RV32IM-NEXT:    and a0, a0, a4
+; RV32IM-NEXT:    and a2, a2, a4
+; RV32IM-NEXT:    slli a0, a0, 1
+; RV32IM-NEXT:    or a0, a2, a0
+; RV32IM-NEXT:    lui a2, 524288
+; RV32IM-NEXT:    slli t3, t3, 11
+; RV32IM-NEXT:    and t3, a0, t3
+; RV32IM-NEXT:    lui a4, 1
+; RV32IM-NEXT:    and t4, a0, a4
+; RV32IM-NEXT:    and s11, a0, s11
+; RV32IM-NEXT:    and a4, a0, t2
+; RV32IM-NEXT:    sw a4, 80(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a4, a0, s10
+; RV32IM-NEXT:    sw a4, 72(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a5, a0, a5
+; RV32IM-NEXT:    sw a5, 68(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a4, a0, t5
+; RV32IM-NEXT:    sw a4, 64(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and s0, a0, s0
+; RV32IM-NEXT:    and a4, a0, s2
+; RV32IM-NEXT:    sw a4, 60(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and s4, a0, s4
+; RV32IM-NEXT:    and a4, a0, s5
+; RV32IM-NEXT:    sw a4, 56(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a4, a0, s6
+; RV32IM-NEXT:    sw a4, 52(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a4, a0, s7
+; RV32IM-NEXT:    sw a4, 48(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a4, a0, s8
+; RV32IM-NEXT:    sw a4, 44(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a4, a0, s9
+; RV32IM-NEXT:    sw a4, 40(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a4, a0, ra
+; RV32IM-NEXT:    sw a4, 36(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a4, a0, a7
+; RV32IM-NEXT:    sw a4, 32(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a4, a0, t0
+; RV32IM-NEXT:    sw a4, 28(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a3, a0, a3
+; RV32IM-NEXT:    sw a3, 24(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a1, a0, a1
+; RV32IM-NEXT:    sw a1, 20(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a2, a0, a2
+; RV32IM-NEXT:    sw a2, 16(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    andi ra, a0, 2
+; RV32IM-NEXT:    andi a1, a0, 1
+; RV32IM-NEXT:    andi a2, a0, 4
+; RV32IM-NEXT:    andi a3, a0, 8
+; RV32IM-NEXT:    andi a4, a0, 16
+; RV32IM-NEXT:    andi a5, a0, 32
+; RV32IM-NEXT:    andi a7, a0, 64
+; RV32IM-NEXT:    andi t0, a0, 128
+; RV32IM-NEXT:    andi t1, a0, 256
+; RV32IM-NEXT:    andi t2, a0, 512
+; RV32IM-NEXT:    andi a0, a0, 1024
+; RV32IM-NEXT:    mul ra, a6, ra
+; RV32IM-NEXT:    mul s10, a6, a1
+; RV32IM-NEXT:    mul s9, a6, a2
+; RV32IM-NEXT:    mul s5, a6, a3
+; RV32IM-NEXT:    mul s6, a6, a4
+; RV32IM-NEXT:    mul s2, a6, a5
+; RV32IM-NEXT:    mul a1, a6, a7
+; RV32IM-NEXT:    sw a1, 4(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul a1, a6, t0
+; RV32IM-NEXT:    sw a1, 76(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul t6, a6, t1
+; RV32IM-NEXT:    mul t2, a6, t2
+; RV32IM-NEXT:    mul s7, a6, a0
+; RV32IM-NEXT:    mul a0, a6, t3
+; RV32IM-NEXT:    sw a0, 8(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul a0, a6, t4
+; RV32IM-NEXT:    sw a0, 12(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul t1, a6, s11
+; RV32IM-NEXT:    lw a0, 80(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a7, a6, a0
+; RV32IM-NEXT:    lw a0, 72(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul t5, a6, a0
+; RV32IM-NEXT:    lw a0, 68(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul s8, a6, a0
+; RV32IM-NEXT:    lw a0, 64(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a0, a6, a0
+; RV32IM-NEXT:    sw a0, 68(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul a0, a6, s0
+; RV32IM-NEXT:    sw a0, 72(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw a0, 60(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a3, a6, a0
+; RV32IM-NEXT:    mul a2, a6, s4
+; RV32IM-NEXT:    lw a0, 56(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a5, a6, a0
+; RV32IM-NEXT:    lw a0, 52(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul t3, a6, a0
+; RV32IM-NEXT:    lw a0, 48(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul s4, a6, a0
+; RV32IM-NEXT:    lw a0, 44(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a1, a6, a0
+; RV32IM-NEXT:    lw a0, 40(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a0, a6, a0
+; RV32IM-NEXT:    lw a4, 36(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a4, a6, a4
+; RV32IM-NEXT:    lw t0, 32(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul t0, a6, t0
+; RV32IM-NEXT:    lw t4, 28(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul t4, a6, t4
+; RV32IM-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul s0, a6, s0
+; RV32IM-NEXT:    lw s11, 20(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul s11, a6, s11
+; RV32IM-NEXT:    sw s11, 80(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw s11, 16(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a6, a6, s11
+; RV32IM-NEXT:    xor s10, s10, ra
+; RV32IM-NEXT:    xor s5, s9, s5
+; RV32IM-NEXT:    xor s2, s6, s2
+; RV32IM-NEXT:    xor t2, t6, t2
+; RV32IM-NEXT:    xor a7, t1, a7
+; RV32IM-NEXT:    xor a2, a3, a2
+; RV32IM-NEXT:    xor a0, a1, a0
+; RV32IM-NEXT:    xor a1, s10, s5
+; RV32IM-NEXT:    lw a3, 4(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a3, s2, a3
+; RV32IM-NEXT:    xor t1, t2, s7
+; RV32IM-NEXT:    xor a7, a7, t5
+; RV32IM-NEXT:    xor a2, a2, a5
+; RV32IM-NEXT:    xor a0, a0, a4
+; RV32IM-NEXT:    xor a1, a1, a3
+; RV32IM-NEXT:    lw a3, 8(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a3, t1, a3
+; RV32IM-NEXT:    xor a4, a7, s8
+; RV32IM-NEXT:    xor a2, a2, t3
+; RV32IM-NEXT:    xor a0, a0, t0
+; RV32IM-NEXT:    lw a5, 76(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a1, a1, a5
+; RV32IM-NEXT:    lw a5, 12(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a3, a3, a5
+; RV32IM-NEXT:    lw a5, 68(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a4, a4, a5
+; RV32IM-NEXT:    xor a2, a2, s4
+; RV32IM-NEXT:    xor a0, a0, t4
+; RV32IM-NEXT:    lw a5, 72(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a4, a4, a5
+; RV32IM-NEXT:    xor a0, a0, s0
+; RV32IM-NEXT:    lui a5, 349525
+; RV32IM-NEXT:    addi a5, a5, 1364
+; RV32IM-NEXT:    xor a3, a1, a3
+; RV32IM-NEXT:    slli a1, a1, 24
+; RV32IM-NEXT:    xor a3, a3, a4
+; RV32IM-NEXT:    lw a4, 80(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a0, a0, a4
+; RV32IM-NEXT:    xor a2, a3, a2
+; RV32IM-NEXT:    xor a0, a0, a6
+; RV32IM-NEXT:    lw a6, 88(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    and a3, a2, a6
+; RV32IM-NEXT:    srli a4, a2, 8
+; RV32IM-NEXT:    xor a0, a2, a0
+; RV32IM-NEXT:    slli a3, a3, 8
+; RV32IM-NEXT:    and a2, a4, a6
+; RV32IM-NEXT:    srli a0, a0, 24
+; RV32IM-NEXT:    or a1, a1, a3
+; RV32IM-NEXT:    or a0, a2, a0
+; RV32IM-NEXT:    or a0, a1, a0
+; RV32IM-NEXT:    srli a1, a0, 4
+; RV32IM-NEXT:    and a0, a0, s1
+; RV32IM-NEXT:    and a1, a1, s1
+; RV32IM-NEXT:    slli a0, a0, 4
+; RV32IM-NEXT:    or a0, a1, a0
+; RV32IM-NEXT:    srli a1, a0, 2
+; RV32IM-NEXT:    and a0, a0, s3
+; RV32IM-NEXT:    and a1, a1, s3
+; RV32IM-NEXT:    slli a0, a0, 2
+; RV32IM-NEXT:    or a0, a1, a0
+; RV32IM-NEXT:    srli a1, a0, 1
+; RV32IM-NEXT:    lw a2, 84(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    and a0, a0, a2
+; RV32IM-NEXT:    and a1, a1, a5
+; RV32IM-NEXT:    slli a0, a0, 1
+; RV32IM-NEXT:    or a0, a1, a0
+; RV32IM-NEXT:    srli a0, a0, 1
+; RV32IM-NEXT:    lw ra, 140(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s0, 136(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s1, 132(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s2, 128(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s3, 124(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s4, 120(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s5, 116(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s6, 112(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s7, 108(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s8, 104(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s9, 100(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s10, 96(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s11, 92(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    addi sp, sp, 144
+; RV32IM-NEXT:    ret
+;
+; RV64IM-LABEL: clmulh_i4_bitreverse:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    addi sp, sp, -496
+; RV64IM-NEXT:    sd ra, 488(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s0, 480(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s1, 472(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s2, 464(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s3, 456(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s4, 448(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s5, 440(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s6, 432(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s7, 424(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s8, 416(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s9, 408(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s10, 400(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s11, 392(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    srli a3, a0, 24
+; RV64IM-NEXT:    srli a5, a0, 8
+; RV64IM-NEXT:    li s4, 255
+; RV64IM-NEXT:    srli ra, a0, 40
+; RV64IM-NEXT:    lui s11, 16
+; RV64IM-NEXT:    srli t0, a0, 56
+; RV64IM-NEXT:    srliw t2, a0, 24
+; RV64IM-NEXT:    slli a6, a0, 56
+; RV64IM-NEXT:    lui t3, 61681
+; RV64IM-NEXT:    lui t4, 209715
+; RV64IM-NEXT:    lui s8, 349525
+; RV64IM-NEXT:    srli s3, a1, 24
+; RV64IM-NEXT:    srli t6, a1, 8
+; RV64IM-NEXT:    srli a7, a1, 40
+; RV64IM-NEXT:    srli t5, a1, 56
+; RV64IM-NEXT:    srliw s7, a1, 24
+; RV64IM-NEXT:    slli a4, a1, 56
+; RV64IM-NEXT:    li t1, 1
+; RV64IM-NEXT:    lui s1, 256
+; RV64IM-NEXT:    lui s2, 4096
+; RV64IM-NEXT:    lui s0, 8192
+; RV64IM-NEXT:    lui s9, 4080
+; RV64IM-NEXT:    and a2, a3, s9
+; RV64IM-NEXT:    slli s5, s4, 24
+; RV64IM-NEXT:    addi s10, s11, -256
+; RV64IM-NEXT:    and a3, a5, s5
+; RV64IM-NEXT:    sd s5, 384(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    or a2, a3, a2
+; RV64IM-NEXT:    and a3, a0, s9
+; RV64IM-NEXT:    slli t2, t2, 32
+; RV64IM-NEXT:    addi s4, t3, -241
+; RV64IM-NEXT:    addi s6, t4, 819
+; RV64IM-NEXT:    addi s8, s8, 1365
+; RV64IM-NEXT:    and a5, s3, s9
+; RV64IM-NEXT:    and t3, ra, s10
+; RV64IM-NEXT:    or t0, t3, t0
+; RV64IM-NEXT:    and t3, a1, s9
+; RV64IM-NEXT:    slli t4, s7, 32
+; RV64IM-NEXT:    slli a3, a3, 24
+; RV64IM-NEXT:    or s3, a3, t2
+; RV64IM-NEXT:    slli a3, s4, 32
+; RV64IM-NEXT:    add s4, s4, a3
+; RV64IM-NEXT:    slli a3, s6, 32
+; RV64IM-NEXT:    add s6, s6, a3
+; RV64IM-NEXT:    slli a3, s8, 32
+; RV64IM-NEXT:    add s8, s8, a3
+; RV64IM-NEXT:    slli s7, t1, 11
+; RV64IM-NEXT:    and a3, t6, s5
+; RV64IM-NEXT:    or a3, a3, a5
+; RV64IM-NEXT:    slli t2, t1, 32
+; RV64IM-NEXT:    and a5, a7, s10
+; RV64IM-NEXT:    or a5, a5, t5
+; RV64IM-NEXT:    slli ra, t1, 33
+; RV64IM-NEXT:    slli t3, t3, 24
+; RV64IM-NEXT:    or a7, t3, t4
+; RV64IM-NEXT:    slli t3, t1, 34
+; RV64IM-NEXT:    sd t3, 312(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    or a2, a2, t0
+; RV64IM-NEXT:    slli t0, t1, 35
+; RV64IM-NEXT:    sd t0, 304(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s10, 352(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a0, a0, s10
+; RV64IM-NEXT:    slli a0, a0, 40
+; RV64IM-NEXT:    or a0, a6, a0
+; RV64IM-NEXT:    slli a6, t1, 36
+; RV64IM-NEXT:    or a3, a3, a5
+; RV64IM-NEXT:    slli a5, t1, 37
+; RV64IM-NEXT:    sd a5, 288(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a1, a1, s10
+; RV64IM-NEXT:    slli a1, a1, 40
+; RV64IM-NEXT:    or a1, a4, a1
+; RV64IM-NEXT:    or a0, a0, s3
+; RV64IM-NEXT:    or a1, a1, a7
+; RV64IM-NEXT:    or a0, a0, a2
+; RV64IM-NEXT:    or a1, a1, a3
+; RV64IM-NEXT:    srli a2, a0, 4
+; RV64IM-NEXT:    sd s4, 376(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a0, a0, s4
+; RV64IM-NEXT:    srli a3, a1, 4
+; RV64IM-NEXT:    and a1, a1, s4
+; RV64IM-NEXT:    and a2, a2, s4
+; RV64IM-NEXT:    slli a0, a0, 4
+; RV64IM-NEXT:    and a3, a3, s4
+; RV64IM-NEXT:    slli a1, a1, 4
+; RV64IM-NEXT:    or a0, a2, a0
+; RV64IM-NEXT:    or a1, a3, a1
+; RV64IM-NEXT:    srli a2, a0, 2
+; RV64IM-NEXT:    sd s6, 368(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a0, a0, s6
+; RV64IM-NEXT:    srli a3, a1, 2
+; RV64IM-NEXT:    and a1, a1, s6
+; RV64IM-NEXT:    and a2, a2, s6
+; RV64IM-NEXT:    slli a0, a0, 2
+; RV64IM-NEXT:    and a3, a3, s6
+; RV64IM-NEXT:    slli a1, a1, 2
+; RV64IM-NEXT:    or a0, a2, a0
+; RV64IM-NEXT:    or a1, a3, a1
+; RV64IM-NEXT:    srli a2, a0, 1
+; RV64IM-NEXT:    sd s8, 360(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a0, a0, s8
+; RV64IM-NEXT:    srli a3, a1, 1
+; RV64IM-NEXT:    and a1, a1, s8
+; RV64IM-NEXT:    and a2, a2, s8
+; RV64IM-NEXT:    slli a0, a0, 1
+; RV64IM-NEXT:    and a3, a3, s8
+; RV64IM-NEXT:    slli a1, a1, 1
+; RV64IM-NEXT:    or a0, a2, a0
+; RV64IM-NEXT:    or s5, a3, a1
+; RV64IM-NEXT:    andi a1, s5, 2
+; RV64IM-NEXT:    andi a2, s5, 1
+; RV64IM-NEXT:    andi a3, s5, 4
+; RV64IM-NEXT:    andi a4, s5, 8
+; RV64IM-NEXT:    andi a5, s5, 16
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    xor a1, a2, a1
+; RV64IM-NEXT:    sd a1, 344(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    andi a1, s5, 32
+; RV64IM-NEXT:    mul a2, a0, a3
+; RV64IM-NEXT:    mul a3, a0, a4
+; RV64IM-NEXT:    xor a2, a2, a3
+; RV64IM-NEXT:    sd a2, 336(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    andi a2, s5, 256
+; RV64IM-NEXT:    mul a3, a0, a5
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    xor a1, a3, a1
+; RV64IM-NEXT:    sd a1, 328(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    andi a1, s5, 512
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    xor a1, a2, a1
+; RV64IM-NEXT:    sd a1, 320(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli s3, t1, 38
+; RV64IM-NEXT:    lui a1, 2
+; RV64IM-NEXT:    and a1, s5, a1
+; RV64IM-NEXT:    lui a2, 4
+; RV64IM-NEXT:    and a2, s5, a2
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    xor a1, a1, a2
+; RV64IM-NEXT:    sd a1, 296(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, t1, 40
+; RV64IM-NEXT:    lui a2, 128
+; RV64IM-NEXT:    and a2, s5, a2
+; RV64IM-NEXT:    and a3, s5, s1
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    mul a3, a0, a3
+; RV64IM-NEXT:    xor a2, a2, a3
+; RV64IM-NEXT:    sd a2, 280(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a2, t1, 41
+; RV64IM-NEXT:    and a3, s5, s2
+; RV64IM-NEXT:    and a4, s5, s0
+; RV64IM-NEXT:    mul a3, a0, a3
+; RV64IM-NEXT:    mul a4, a0, a4
+; RV64IM-NEXT:    xor a3, a3, a4
+; RV64IM-NEXT:    sd a3, 272(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a3, t1, 48
+; RV64IM-NEXT:    and a4, s5, t2
+; RV64IM-NEXT:    and a5, s5, ra
+; RV64IM-NEXT:    mul a4, a0, a4
+; RV64IM-NEXT:    mul a5, a0, a5
+; RV64IM-NEXT:    xor a4, a4, a5
+; RV64IM-NEXT:    sd a4, 264(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a4, t1, 49
+; RV64IM-NEXT:    and a1, s5, a1
+; RV64IM-NEXT:    and a2, s5, a2
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    xor a1, a1, a2
+; RV64IM-NEXT:    sd a1, 256(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, t1, 56
+; RV64IM-NEXT:    and a2, s5, a3
+; RV64IM-NEXT:    and a3, s5, a4
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    mul a3, a0, a3
+; RV64IM-NEXT:    xor a2, a2, a3
+; RV64IM-NEXT:    sd a2, 248(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a2, t1, 57
+; RV64IM-NEXT:    and a1, s5, a1
+; RV64IM-NEXT:    and a2, s5, a2
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    xor a1, a1, a2
+; RV64IM-NEXT:    sd a1, 240(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a2, t1, 39
+; RV64IM-NEXT:    slli ra, t1, 42
+; RV64IM-NEXT:    slli a4, t1, 43
+; RV64IM-NEXT:    slli a5, t1, 44
+; RV64IM-NEXT:    slli s0, t1, 45
+; RV64IM-NEXT:    slli s1, t1, 46
+; RV64IM-NEXT:    slli s2, t1, 47
+; RV64IM-NEXT:    slli s4, t1, 50
+; RV64IM-NEXT:    slli s6, t1, 51
+; RV64IM-NEXT:    slli a1, t1, 52
+; RV64IM-NEXT:    sd a1, 232(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, t1, 53
+; RV64IM-NEXT:    sd a1, 224(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, t1, 54
+; RV64IM-NEXT:    sd a1, 216(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, t1, 55
+; RV64IM-NEXT:    sd a1, 208(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, t1, 58
+; RV64IM-NEXT:    sd a1, 200(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, t1, 59
+; RV64IM-NEXT:    sd a1, 184(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, t1, 60
+; RV64IM-NEXT:    sd a1, 144(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, t1, 61
+; RV64IM-NEXT:    sd a1, 112(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli t1, t1, 62
+; RV64IM-NEXT:    sd t1, 88(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and t1, s5, s7
+; RV64IM-NEXT:    lui a3, 1
+; RV64IM-NEXT:    and a1, s5, a3
+; RV64IM-NEXT:    sd a1, 192(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui a3, 8
+; RV64IM-NEXT:    and a1, s5, a3
+; RV64IM-NEXT:    sd a1, 176(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a1, s5, s11
+; RV64IM-NEXT:    sd a1, 168(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui s8, 32
+; RV64IM-NEXT:    and a1, s5, s8
+; RV64IM-NEXT:    sd a1, 160(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui s11, 64
+; RV64IM-NEXT:    and a1, s5, s11
+; RV64IM-NEXT:    sd a1, 152(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui s10, 512
+; RV64IM-NEXT:    and a1, s5, s10
+; RV64IM-NEXT:    sd a1, 136(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui s9, 1024
+; RV64IM-NEXT:    and a1, s5, s9
+; RV64IM-NEXT:    sd a1, 128(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui t0, 2048
+; RV64IM-NEXT:    and a1, s5, t0
+; RV64IM-NEXT:    sd a1, 120(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui t2, 16384
+; RV64IM-NEXT:    and a1, s5, t2
+; RV64IM-NEXT:    sd a1, 104(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui t3, 32768
+; RV64IM-NEXT:    and t3, s5, t3
+; RV64IM-NEXT:    lui t4, 65536
+; RV64IM-NEXT:    and a1, s5, t4
+; RV64IM-NEXT:    sd a1, 96(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui t5, 131072
+; RV64IM-NEXT:    and a7, s5, t5
+; RV64IM-NEXT:    lui t6, 262144
+; RV64IM-NEXT:    and t6, s5, t6
+; RV64IM-NEXT:    ld a1, 312(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s11, s5, a1
+; RV64IM-NEXT:    ld a1, 304(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a1, s5, a1
+; RV64IM-NEXT:    sd a1, 312(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a1, s5, a6
+; RV64IM-NEXT:    sd a1, 80(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 288(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a1, s5, a1
+; RV64IM-NEXT:    sd a1, 72(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a1, s5, s3
+; RV64IM-NEXT:    sd a1, 64(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a1, s5, a2
+; RV64IM-NEXT:    sd a1, 56(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and ra, s5, ra
+; RV64IM-NEXT:    and a1, s5, a4
+; RV64IM-NEXT:    sd a1, 48(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a1, s5, a5
+; RV64IM-NEXT:    sd a1, 40(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a1, s5, s0
+; RV64IM-NEXT:    sd a1, 32(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a1, s5, s1
+; RV64IM-NEXT:    sd a1, 24(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a1, s5, s2
+; RV64IM-NEXT:    sd a1, 16(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a1, s5, s4
+; RV64IM-NEXT:    sd a1, 8(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and s0, s5, s6
+; RV64IM-NEXT:    ld a1, 232(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s1, s5, a1
+; RV64IM-NEXT:    ld a1, 224(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s2, s5, a1
+; RV64IM-NEXT:    ld a1, 216(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s3, s5, a1
+; RV64IM-NEXT:    ld a1, 208(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s4, s5, a1
+; RV64IM-NEXT:    ld a1, 200(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s6, s5, a1
+; RV64IM-NEXT:    ld a1, 184(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s7, s5, a1
+; RV64IM-NEXT:    ld a1, 144(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s8, s5, a1
+; RV64IM-NEXT:    ld a1, 112(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s9, s5, a1
+; RV64IM-NEXT:    ld a1, 88(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s10, s5, a1
+; RV64IM-NEXT:    andi a1, s5, 64
+; RV64IM-NEXT:    andi a2, s5, 128
+; RV64IM-NEXT:    andi a3, s5, 1024
+; RV64IM-NEXT:    srliw a4, s5, 31
+; RV64IM-NEXT:    srli s5, s5, 63
+; RV64IM-NEXT:    mul t4, a0, a1
+; RV64IM-NEXT:    mul a1, a0, a2
+; RV64IM-NEXT:    sd a1, 224(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul t2, a0, a3
+; RV64IM-NEXT:    mul a1, a0, t1
+; RV64IM-NEXT:    sd a1, 144(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 192(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 216(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 176(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul t0, a0, a1
+; RV64IM-NEXT:    ld a1, 168(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 112(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 160(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 192(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 152(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 304(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 136(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a6, a0, a1
+; RV64IM-NEXT:    ld a1, 128(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul t5, a0, a1
+; RV64IM-NEXT:    ld a1, 120(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 176(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 104(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a5, a0, a1
+; RV64IM-NEXT:    mul t3, a0, t3
+; RV64IM-NEXT:    ld a1, 96(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 160(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul a1, a0, a7
+; RV64IM-NEXT:    sd a1, 184(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul a1, a0, t6
+; RV64IM-NEXT:    sd a1, 288(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a4, a4, 31
+; RV64IM-NEXT:    mul a2, a0, s11
+; RV64IM-NEXT:    ld a1, 312(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a7, a0, a1
+; RV64IM-NEXT:    ld a1, 80(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul s11, a0, a1
+; RV64IM-NEXT:    ld a1, 72(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 152(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 64(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 208(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 56(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 232(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul ra, a0, ra
+; RV64IM-NEXT:    ld a1, 48(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a3, a0, a1
+; RV64IM-NEXT:    ld a1, 40(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul t1, a0, a1
+; RV64IM-NEXT:    ld a1, 32(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul t6, a0, a1
+; RV64IM-NEXT:    ld a1, 24(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 168(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 16(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 200(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 8(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    mul s0, a0, s0
+; RV64IM-NEXT:    mul s1, a0, s1
+; RV64IM-NEXT:    mul s2, a0, s2
+; RV64IM-NEXT:    mul s3, a0, s3
+; RV64IM-NEXT:    mul s4, a0, s4
+; RV64IM-NEXT:    mul s6, a0, s6
+; RV64IM-NEXT:    mul s7, a0, s7
+; RV64IM-NEXT:    mul s8, a0, s8
+; RV64IM-NEXT:    mul s9, a0, s9
+; RV64IM-NEXT:    mul s10, a0, s10
+; RV64IM-NEXT:    slli s5, s5, 63
+; RV64IM-NEXT:    mul a4, a0, a4
+; RV64IM-NEXT:    mul a0, a0, s5
+; RV64IM-NEXT:    sd a0, 312(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld s5, 344(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld a0, 336(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor s5, s5, a0
+; RV64IM-NEXT:    ld a0, 328(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t4, a0, t4
+; RV64IM-NEXT:    ld a0, 320(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t2, a0, t2
+; RV64IM-NEXT:    ld a0, 296(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t0, a0, t0
+; RV64IM-NEXT:    ld a0, 280(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a6, a0, a6
+; RV64IM-NEXT:    ld a0, 272(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a5, a0, a5
+; RV64IM-NEXT:    ld a0, 264(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a2, a0, a2
+; RV64IM-NEXT:    ld a0, 256(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor ra, a0, ra
+; RV64IM-NEXT:    ld a0, 248(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a1, a0, a1
+; RV64IM-NEXT:    ld a0, 240(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor s6, a0, s6
+; RV64IM-NEXT:    xor t4, s5, t4
+; RV64IM-NEXT:    ld a0, 144(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t2, t2, a0
+; RV64IM-NEXT:    ld a0, 112(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t0, t0, a0
+; RV64IM-NEXT:    xor a6, a6, t5
+; RV64IM-NEXT:    xor a5, a5, t3
+; RV64IM-NEXT:    xor a2, a2, a7
+; RV64IM-NEXT:    xor a3, ra, a3
+; RV64IM-NEXT:    xor a1, a1, s0
+; RV64IM-NEXT:    xor a7, s6, s7
+; RV64IM-NEXT:    ld a0, 224(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t3, t4, a0
+; RV64IM-NEXT:    ld a0, 216(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t2, t2, a0
+; RV64IM-NEXT:    ld a0, 192(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t0, t0, a0
+; RV64IM-NEXT:    ld a0, 176(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a6, a6, a0
+; RV64IM-NEXT:    ld a0, 160(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a5, a5, a0
+; RV64IM-NEXT:    xor a2, a2, s11
+; RV64IM-NEXT:    xor a3, a3, t1
+; RV64IM-NEXT:    xor a1, a1, s1
+; RV64IM-NEXT:    xor a7, a7, s8
+; RV64IM-NEXT:    ld a0, 304(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t0, t0, a0
+; RV64IM-NEXT:    ld a0, 184(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a5, a5, a0
+; RV64IM-NEXT:    ld a0, 152(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a2, a2, a0
+; RV64IM-NEXT:    xor a3, a3, t6
+; RV64IM-NEXT:    xor a1, a1, s2
+; RV64IM-NEXT:    xor a7, a7, s9
+; RV64IM-NEXT:    xor t1, t3, t2
+; RV64IM-NEXT:    xor t0, t1, t0
+; RV64IM-NEXT:    ld a0, 288(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a5, a5, a0
+; RV64IM-NEXT:    ld a0, 208(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a2, a2, a0
+; RV64IM-NEXT:    ld a0, 168(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a3, a3, a0
+; RV64IM-NEXT:    xor a1, a1, s3
+; RV64IM-NEXT:    xor a7, a7, s10
+; RV64IM-NEXT:    xor a6, t0, a6
+; RV64IM-NEXT:    xor a4, a5, a4
+; RV64IM-NEXT:    ld a0, 232(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a2, a2, a0
+; RV64IM-NEXT:    ld a0, 200(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a3, a3, a0
+; RV64IM-NEXT:    xor a1, a1, s4
+; RV64IM-NEXT:    lui a5, %hi(.LCPI15_0)
+; RV64IM-NEXT:    ld a5, %lo(.LCPI15_0)(a5)
+; RV64IM-NEXT:    slli t3, t3, 56
+; RV64IM-NEXT:    ld a0, 312(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a0, a7, a0
+; RV64IM-NEXT:    ld t1, 352(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a7, a6, t1
+; RV64IM-NEXT:    xor a4, a6, a4
+; RV64IM-NEXT:    slli a7, a7, 40
+; RV64IM-NEXT:    xor a2, a4, a2
+; RV64IM-NEXT:    or a4, t3, a7
+; RV64IM-NEXT:    lui t0, 4080
+; RV64IM-NEXT:    and a6, a2, t0
+; RV64IM-NEXT:    xor a3, a2, a3
+; RV64IM-NEXT:    srli a2, a2, 8
+; RV64IM-NEXT:    slli a6, a6, 24
+; RV64IM-NEXT:    xor a1, a3, a1
+; RV64IM-NEXT:    ld a7, 384(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a2, a2, a7
+; RV64IM-NEXT:    srli a3, a3, 24
+; RV64IM-NEXT:    srliw a7, a1, 24
+; RV64IM-NEXT:    and a3, a3, t0
+; RV64IM-NEXT:    srli t0, a1, 40
+; RV64IM-NEXT:    xor a0, a1, a0
+; RV64IM-NEXT:    slli a7, a7, 32
+; RV64IM-NEXT:    or a2, a2, a3
+; RV64IM-NEXT:    and a1, t0, t1
+; RV64IM-NEXT:    srli a0, a0, 56
+; RV64IM-NEXT:    or a3, a6, a7
+; RV64IM-NEXT:    or a0, a1, a0
+; RV64IM-NEXT:    or a3, a4, a3
+; RV64IM-NEXT:    or a0, a2, a0
+; RV64IM-NEXT:    or a0, a3, a0
+; RV64IM-NEXT:    srli a1, a0, 4
+; RV64IM-NEXT:    ld a2, 376(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a0, a0, a2
+; RV64IM-NEXT:    and a1, a1, a2
+; RV64IM-NEXT:    slli a0, a0, 4
+; RV64IM-NEXT:    or a0, a1, a0
+; RV64IM-NEXT:    srli a1, a0, 2
+; RV64IM-NEXT:    ld a2, 368(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a0, a0, a2
+; RV64IM-NEXT:    and a1, a1, a2
+; RV64IM-NEXT:    slli a0, a0, 2
+; RV64IM-NEXT:    or a0, a1, a0
+; RV64IM-NEXT:    srli a1, a0, 1
+; RV64IM-NEXT:    ld a2, 360(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a0, a0, a2
+; RV64IM-NEXT:    and a1, a1, a5
+; RV64IM-NEXT:    slli a0, a0, 1
+; RV64IM-NEXT:    or a0, a1, a0
+; RV64IM-NEXT:    srli a0, a0, 1
+; RV64IM-NEXT:    ld ra, 488(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s0, 480(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s1, 472(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s2, 464(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s3, 456(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s4, 448(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s5, 440(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s6, 432(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s7, 424(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s8, 416(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s9, 408(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s10, 400(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s11, 392(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    addi sp, sp, 496
+; RV64IM-NEXT:    ret
+  %a.rev = call i4 @llvm.bitreverse.i4(i4 %a)
+  %b.rev = call i4 @llvm.bitreverse.i4(i4 %b)
+  %clmul = call i4 @llvm.clmul.i4(i4 %a.rev, i4 %b.rev)
+  %clmul.rev = call i4 @llvm.bitreverse.i4(i4 %clmul)
+  %res = lshr i4 %clmul.rev, 1
+  ret i4 %res
+}
+
+
+define i8 @clmulh_i8(i8 %a, i8 %b) nounwind {
+; RV32IM-LABEL: clmulh_i8:
+; RV32IM:       # %bb.0:
+; RV32IM-NEXT:    addi sp, sp, -144
+; RV32IM-NEXT:    sw ra, 140(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s0, 136(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s1, 132(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s2, 128(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s3, 124(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s4, 120(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s5, 116(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s6, 112(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s7, 108(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s8, 104(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s9, 100(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s10, 96(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s11, 92(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    srli t0, a0, 8
+; RV32IM-NEXT:    lui a3, 16
+; RV32IM-NEXT:    srli t1, a0, 24
+; RV32IM-NEXT:    slli a2, a0, 24
+; RV32IM-NEXT:    lui t2, 61681
+; RV32IM-NEXT:    lui t3, 209715
+; RV32IM-NEXT:    lui a7, 349525
+; RV32IM-NEXT:    srli t4, a1, 8
+; RV32IM-NEXT:    srli t5, a1, 24
+; RV32IM-NEXT:    slli a4, a1, 24
+; RV32IM-NEXT:    li t6, 1
+; RV32IM-NEXT:    lui s0, 4
+; RV32IM-NEXT:    lui s1, 8
+; RV32IM-NEXT:    lui s2, 32
+; RV32IM-NEXT:    lui s3, 64
+; RV32IM-NEXT:    lui s5, 128
+; RV32IM-NEXT:    lui s6, 256
+; RV32IM-NEXT:    lui s7, 512
+; RV32IM-NEXT:    lui s8, 1024
+; RV32IM-NEXT:    lui s9, 2048
+; RV32IM-NEXT:    lui s10, 4096
+; RV32IM-NEXT:    lui s11, 8192
+; RV32IM-NEXT:    lui ra, 16384
+; RV32IM-NEXT:    addi s4, a3, -256
+; RV32IM-NEXT:    lui a5, 16
+; RV32IM-NEXT:    and t0, t0, s4
+; RV32IM-NEXT:    or a3, t0, t1
+; RV32IM-NEXT:    lui t0, 32768
+; RV32IM-NEXT:    and t1, t4, s4
+; RV32IM-NEXT:    or t4, t1, t5
+; RV32IM-NEXT:    lui a6, 65536
+; RV32IM-NEXT:    and a0, a0, s4
+; RV32IM-NEXT:    slli a0, a0, 8
+; RV32IM-NEXT:    or t5, a2, a0
+; RV32IM-NEXT:    lui a2, 131072
+; RV32IM-NEXT:    and a1, a1, s4
+; RV32IM-NEXT:    slli a1, a1, 8
+; RV32IM-NEXT:    or a0, a4, a1
+; RV32IM-NEXT:    lui a1, 262144
+; RV32IM-NEXT:    addi t2, t2, -241
+; RV32IM-NEXT:    addi t3, t3, 819
+; RV32IM-NEXT:    addi a7, a7, 1365
+; RV32IM-NEXT:    or a3, t5, a3
+; RV32IM-NEXT:    or a0, a0, t4
+; RV32IM-NEXT:    srli t4, a3, 4
+; RV32IM-NEXT:    and a3, a3, t2
+; RV32IM-NEXT:    srli t5, a0, 4
+; RV32IM-NEXT:    and a0, a0, t2
+; RV32IM-NEXT:    and t4, t4, t2
+; RV32IM-NEXT:    slli a3, a3, 4
+; RV32IM-NEXT:    and t5, t5, t2
+; RV32IM-NEXT:    slli a0, a0, 4
+; RV32IM-NEXT:    or a3, t4, a3
+; RV32IM-NEXT:    or a0, t5, a0
+; RV32IM-NEXT:    srli t4, a3, 2
+; RV32IM-NEXT:    and a3, a3, t3
+; RV32IM-NEXT:    srli t5, a0, 2
+; RV32IM-NEXT:    and a0, a0, t3
+; RV32IM-NEXT:    and t4, t4, t3
+; RV32IM-NEXT:    slli a3, a3, 2
+; RV32IM-NEXT:    and t5, t5, t3
+; RV32IM-NEXT:    slli a0, a0, 2
+; RV32IM-NEXT:    or a3, t4, a3
+; RV32IM-NEXT:    or a0, t5, a0
+; RV32IM-NEXT:    srli t4, a3, 1
+; RV32IM-NEXT:    and a3, a3, a7
+; RV32IM-NEXT:    srli t5, a0, 1
+; RV32IM-NEXT:    and a0, a0, a7
+; RV32IM-NEXT:    and t4, t4, a7
+; RV32IM-NEXT:    and a7, t5, a7
+; RV32IM-NEXT:    lui a4, 524288
+; RV32IM-NEXT:    slli t6, t6, 11
+; RV32IM-NEXT:    slli a3, a3, 1
+; RV32IM-NEXT:    slli a0, a0, 1
+; RV32IM-NEXT:    or a3, t4, a3
+; RV32IM-NEXT:    or a0, a7, a0
+; RV32IM-NEXT:    andi t5, a0, 2
+; RV32IM-NEXT:    andi t4, a0, 1
+; RV32IM-NEXT:    and t6, a0, t6
+; RV32IM-NEXT:    lui a7, 1
+; RV32IM-NEXT:    and a7, a0, a7
+; RV32IM-NEXT:    sw a7, 84(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lui a7, 2
+; RV32IM-NEXT:    and a7, a0, a7
+; RV32IM-NEXT:    sw a7, 80(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and s0, a0, s0
+; RV32IM-NEXT:    sw s0, 76(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and s1, a0, s1
+; RV32IM-NEXT:    and a5, a0, a5
+; RV32IM-NEXT:    sw a5, 72(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and s2, a0, s2
+; RV32IM-NEXT:    and a5, a0, s3
+; RV32IM-NEXT:    sw a5, 68(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a5, a0, s5
+; RV32IM-NEXT:    sw a5, 64(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a5, a0, s6
+; RV32IM-NEXT:    sw a5, 60(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and s7, a0, s7
+; RV32IM-NEXT:    and s8, a0, s8
+; RV32IM-NEXT:    and a5, a0, s9
+; RV32IM-NEXT:    sw a5, 56(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a5, a0, s10
+; RV32IM-NEXT:    sw a5, 52(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a5, a0, s11
+; RV32IM-NEXT:    sw a5, 48(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a5, a0, ra
+; RV32IM-NEXT:    sw a5, 44(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a5, a0, t0
+; RV32IM-NEXT:    sw a5, 40(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a5, a0, a6
+; RV32IM-NEXT:    sw a5, 36(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a2, a0, a2
+; RV32IM-NEXT:    sw a2, 32(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a1, a0, a1
+; RV32IM-NEXT:    sw a1, 28(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a4, a0, a4
+; RV32IM-NEXT:    sw a4, 24(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    andi a1, a0, 4
+; RV32IM-NEXT:    andi a2, a0, 8
+; RV32IM-NEXT:    andi a4, a0, 16
+; RV32IM-NEXT:    andi a5, a0, 32
+; RV32IM-NEXT:    andi a6, a0, 64
+; RV32IM-NEXT:    andi a7, a0, 128
+; RV32IM-NEXT:    andi t0, a0, 256
+; RV32IM-NEXT:    andi t1, a0, 512
+; RV32IM-NEXT:    andi a0, a0, 1024
+; RV32IM-NEXT:    mul t5, a3, t5
+; RV32IM-NEXT:    sw t5, 12(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul s10, a3, t4
+; RV32IM-NEXT:    mul a1, a3, a1
+; RV32IM-NEXT:    sw a1, 8(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul s6, a3, a2
+; RV32IM-NEXT:    mul s5, a3, a4
+; RV32IM-NEXT:    mul s3, a3, a5
+; RV32IM-NEXT:    mul a1, a3, a6
+; RV32IM-NEXT:    sw a1, 16(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul a1, a3, a7
+; RV32IM-NEXT:    sw a1, 88(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul s0, a3, t0
+; RV32IM-NEXT:    mul t5, a3, t1
+; RV32IM-NEXT:    mul s11, a3, a0
+; RV32IM-NEXT:    mul a0, a3, t6
+; RV32IM-NEXT:    sw a0, 20(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw a0, 84(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a0, a3, a0
+; RV32IM-NEXT:    sw a0, 84(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw a0, 80(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul t1, a3, a0
+; RV32IM-NEXT:    lw a0, 76(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a7, a3, a0
+; RV32IM-NEXT:    mul s1, a3, s1
+; RV32IM-NEXT:    lw a0, 72(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul ra, a3, a0
+; RV32IM-NEXT:    mul a0, a3, s2
+; RV32IM-NEXT:    sw a0, 76(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw a0, 68(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a0, a3, a0
+; RV32IM-NEXT:    sw a0, 80(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw a0, 64(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a4, a3, a0
+; RV32IM-NEXT:    lw a0, 60(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a2, a3, a0
+; RV32IM-NEXT:    mul a6, a3, s7
+; RV32IM-NEXT:    mul t4, a3, s8
+; RV32IM-NEXT:    lw a0, 56(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul s7, a3, a0
+; RV32IM-NEXT:    lw a0, 52(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a1, a3, a0
+; RV32IM-NEXT:    lw a0, 48(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a0, a3, a0
+; RV32IM-NEXT:    lw a5, 44(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a5, a3, a5
+; RV32IM-NEXT:    lw t0, 40(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul t0, a3, t0
+; RV32IM-NEXT:    lw t6, 36(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul t6, a3, t6
+; RV32IM-NEXT:    lw s2, 32(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul s2, a3, s2
+; RV32IM-NEXT:    lw s8, 28(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul s8, a3, s8
+; RV32IM-NEXT:    lw s9, 24(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a3, a3, s9
+; RV32IM-NEXT:    lw s9, 12(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor s10, s10, s9
+; RV32IM-NEXT:    lw s9, 8(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor s6, s9, s6
+; RV32IM-NEXT:    xor s3, s5, s3
+; RV32IM-NEXT:    xor t5, s0, t5
+; RV32IM-NEXT:    xor a7, t1, a7
+; RV32IM-NEXT:    xor a2, a4, a2
+; RV32IM-NEXT:    xor a0, a1, a0
+; RV32IM-NEXT:    xor a1, s10, s6
+; RV32IM-NEXT:    lw a4, 16(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a4, s3, a4
+; RV32IM-NEXT:    xor t1, t5, s11
+; RV32IM-NEXT:    xor a7, a7, s1
+; RV32IM-NEXT:    xor a2, a2, a6
+; RV32IM-NEXT:    xor a0, a0, a5
+; RV32IM-NEXT:    xor a1, a1, a4
+; RV32IM-NEXT:    lw a4, 20(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a4, t1, a4
+; RV32IM-NEXT:    xor a5, a7, ra
+; RV32IM-NEXT:    xor a2, a2, t4
+; RV32IM-NEXT:    xor a0, a0, t0
+; RV32IM-NEXT:    lw a6, 88(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a1, a1, a6
+; RV32IM-NEXT:    lw a6, 84(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a4, a4, a6
+; RV32IM-NEXT:    lw a6, 76(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a5, a5, a6
+; RV32IM-NEXT:    xor a2, a2, s7
+; RV32IM-NEXT:    xor a0, a0, t6
+; RV32IM-NEXT:    lw a6, 80(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a5, a5, a6
+; RV32IM-NEXT:    xor a0, a0, s2
+; RV32IM-NEXT:    xor a4, a1, a4
+; RV32IM-NEXT:    slli a1, a1, 24
+; RV32IM-NEXT:    xor a4, a4, a5
+; RV32IM-NEXT:    xor a0, a0, s8
+; RV32IM-NEXT:    xor a2, a4, a2
+; RV32IM-NEXT:    xor a0, a0, a3
+; RV32IM-NEXT:    and a3, a2, s4
+; RV32IM-NEXT:    srli a4, a2, 8
+; RV32IM-NEXT:    xor a0, a2, a0
+; RV32IM-NEXT:    slli a3, a3, 8
+; RV32IM-NEXT:    and a2, a4, s4
+; RV32IM-NEXT:    srli a0, a0, 24
+; RV32IM-NEXT:    or a1, a1, a3
+; RV32IM-NEXT:    or a0, a2, a0
+; RV32IM-NEXT:    or a0, a1, a0
+; RV32IM-NEXT:    srli a1, a0, 4
+; RV32IM-NEXT:    and a0, a0, t2
+; RV32IM-NEXT:    and a1, a1, t2
+; RV32IM-NEXT:    slli a0, a0, 4
+; RV32IM-NEXT:    or a0, a1, a0
+; RV32IM-NEXT:    srli a1, a0, 2
+; RV32IM-NEXT:    and a0, a0, t3
+; RV32IM-NEXT:    and a1, a1, t3
+; RV32IM-NEXT:    slli a0, a0, 2
+; RV32IM-NEXT:    or a0, a1, a0
+; RV32IM-NEXT:    andi a1, a0, 85
+; RV32IM-NEXT:    srli a0, a0, 1
+; RV32IM-NEXT:    slli a1, a1, 1
+; RV32IM-NEXT:    andi a0, a0, 340
+; RV32IM-NEXT:    or a0, a0, a1
+; RV32IM-NEXT:    srli a0, a0, 1
+; RV32IM-NEXT:    lw ra, 140(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s0, 136(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s1, 132(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s2, 128(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s3, 124(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s4, 120(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s5, 116(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s6, 112(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s7, 108(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s8, 104(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s9, 100(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s10, 96(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s11, 92(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    addi sp, sp, 144
+; RV32IM-NEXT:    ret
+;
+; RV64IM-LABEL: clmulh_i8:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    addi sp, sp, -480
+; RV64IM-NEXT:    sd ra, 472(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s0, 464(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s1, 456(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s2, 448(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s3, 440(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s4, 432(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s5, 424(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s6, 416(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s7, 408(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s8, 400(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s9, 392(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s10, 384(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s11, 376(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    srli a3, a0, 24
+; RV64IM-NEXT:    srli a7, a0, 8
+; RV64IM-NEXT:    li s4, 255
+; RV64IM-NEXT:    srli a4, a0, 40
+; RV64IM-NEXT:    lui s10, 16
+; RV64IM-NEXT:    srli t1, a0, 56
+; RV64IM-NEXT:    srliw t4, a0, 24
+; RV64IM-NEXT:    slli a5, a0, 56
+; RV64IM-NEXT:    lui s3, 61681
+; RV64IM-NEXT:    lui t5, 209715
+; RV64IM-NEXT:    lui s6, 349525
+; RV64IM-NEXT:    srli s9, a1, 24
+; RV64IM-NEXT:    srli s0, a1, 8
+; RV64IM-NEXT:    srli ra, a1, 40
+; RV64IM-NEXT:    srli t2, a1, 56
+; RV64IM-NEXT:    srliw s11, a1, 24
+; RV64IM-NEXT:    slli a6, a1, 56
+; RV64IM-NEXT:    li t0, 1
+; RV64IM-NEXT:    lui s1, 128
+; RV64IM-NEXT:    lui s2, 256
+; RV64IM-NEXT:    lui t6, 4096
+; RV64IM-NEXT:    lui s5, 8192
+; RV64IM-NEXT:    lui s7, 4080
+; RV64IM-NEXT:    and a2, a3, s7
+; RV64IM-NEXT:    slli t3, s4, 24
+; RV64IM-NEXT:    addi s8, s10, -256
+; RV64IM-NEXT:    and a3, a7, t3
+; RV64IM-NEXT:    sd t3, 368(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    or a2, a3, a2
+; RV64IM-NEXT:    and a3, a0, s7
+; RV64IM-NEXT:    slli t4, t4, 32
+; RV64IM-NEXT:    addi s3, s3, -241
+; RV64IM-NEXT:    addi s4, t5, 819
+; RV64IM-NEXT:    addi s6, s6, 1365
+; RV64IM-NEXT:    and a7, s9, s7
+; RV64IM-NEXT:    and a4, a4, s8
+; RV64IM-NEXT:    or a4, a4, t1
+; RV64IM-NEXT:    and t1, a1, s7
+; RV64IM-NEXT:    slli t5, s11, 32
+; RV64IM-NEXT:    slli a3, a3, 24
+; RV64IM-NEXT:    or s9, a3, t4
+; RV64IM-NEXT:    slli a3, s3, 32
+; RV64IM-NEXT:    add s3, s3, a3
+; RV64IM-NEXT:    slli a3, s4, 32
+; RV64IM-NEXT:    add s4, s4, a3
+; RV64IM-NEXT:    slli a3, s6, 32
+; RV64IM-NEXT:    add s6, s6, a3
+; RV64IM-NEXT:    slli t4, t0, 11
+; RV64IM-NEXT:    and a3, s0, t3
+; RV64IM-NEXT:    or a3, a3, a7
+; RV64IM-NEXT:    slli s11, t0, 32
+; RV64IM-NEXT:    and a7, ra, s8
+; RV64IM-NEXT:    or a7, a7, t2
+; RV64IM-NEXT:    slli ra, t0, 33
+; RV64IM-NEXT:    slli t1, t1, 24
+; RV64IM-NEXT:    or t1, t1, t5
+; RV64IM-NEXT:    slli s0, t0, 34
+; RV64IM-NEXT:    or a2, a2, a4
+; RV64IM-NEXT:    slli a4, t0, 35
+; RV64IM-NEXT:    sd a4, 304(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a0, a0, s8
+; RV64IM-NEXT:    sd s8, 344(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a0, a0, 40
+; RV64IM-NEXT:    or a0, a5, a0
+; RV64IM-NEXT:    slli a4, t0, 36
+; RV64IM-NEXT:    sd a4, 296(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    or a3, a3, a7
+; RV64IM-NEXT:    slli a7, t0, 37
+; RV64IM-NEXT:    and a1, a1, s8
+; RV64IM-NEXT:    slli a1, a1, 40
+; RV64IM-NEXT:    or a1, a6, a1
+; RV64IM-NEXT:    slli a6, t0, 38
+; RV64IM-NEXT:    or a0, a0, s9
+; RV64IM-NEXT:    or a1, a1, t1
+; RV64IM-NEXT:    or a0, a0, a2
+; RV64IM-NEXT:    or a1, a1, a3
+; RV64IM-NEXT:    srli a2, a0, 4
+; RV64IM-NEXT:    sd s3, 360(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a0, a0, s3
+; RV64IM-NEXT:    srli a3, a1, 4
+; RV64IM-NEXT:    and a1, a1, s3
+; RV64IM-NEXT:    and a2, a2, s3
+; RV64IM-NEXT:    slli a0, a0, 4
+; RV64IM-NEXT:    and a3, a3, s3
+; RV64IM-NEXT:    slli a1, a1, 4
+; RV64IM-NEXT:    or a0, a2, a0
+; RV64IM-NEXT:    or a1, a3, a1
+; RV64IM-NEXT:    srli a2, a0, 2
+; RV64IM-NEXT:    sd s4, 352(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a0, a0, s4
+; RV64IM-NEXT:    srli a3, a1, 2
+; RV64IM-NEXT:    and a1, a1, s4
+; RV64IM-NEXT:    and a2, a2, s4
+; RV64IM-NEXT:    slli a0, a0, 2
+; RV64IM-NEXT:    and a3, a3, s4
+; RV64IM-NEXT:    slli a1, a1, 2
+; RV64IM-NEXT:    or a0, a2, a0
+; RV64IM-NEXT:    or a1, a3, a1
+; RV64IM-NEXT:    srli a2, a0, 1
+; RV64IM-NEXT:    and a0, a0, s6
+; RV64IM-NEXT:    srli a3, a1, 1
+; RV64IM-NEXT:    and a1, a1, s6
+; RV64IM-NEXT:    and a2, a2, s6
+; RV64IM-NEXT:    slli a0, a0, 1
+; RV64IM-NEXT:    and a3, a3, s6
+; RV64IM-NEXT:    slli a1, a1, 1
+; RV64IM-NEXT:    or a0, a2, a0
+; RV64IM-NEXT:    or s6, a3, a1
+; RV64IM-NEXT:    andi a1, s6, 2
+; RV64IM-NEXT:    andi a2, s6, 1
+; RV64IM-NEXT:    andi a3, s6, 4
+; RV64IM-NEXT:    andi a4, s6, 8
+; RV64IM-NEXT:    andi a5, s6, 16
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    xor a1, a2, a1
+; RV64IM-NEXT:    sd a1, 336(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    andi a1, s6, 32
+; RV64IM-NEXT:    mul a2, a0, a3
+; RV64IM-NEXT:    mul a3, a0, a4
+; RV64IM-NEXT:    xor a2, a2, a3
+; RV64IM-NEXT:    sd a2, 328(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    andi a2, s6, 256
+; RV64IM-NEXT:    mul a3, a0, a5
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    xor a1, a3, a1
+; RV64IM-NEXT:    sd a1, 320(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    andi a1, s6, 512
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    xor a1, a2, a1
+; RV64IM-NEXT:    sd a1, 312(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli s3, t0, 39
+; RV64IM-NEXT:    lui a1, 2
+; RV64IM-NEXT:    and a1, s6, a1
+; RV64IM-NEXT:    lui a2, 4
+; RV64IM-NEXT:    and a2, s6, a2
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    xor a1, a1, a2
+; RV64IM-NEXT:    sd a1, 288(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, t0, 40
+; RV64IM-NEXT:    and a2, s6, s1
+; RV64IM-NEXT:    and a3, s6, s2
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    mul a3, a0, a3
+; RV64IM-NEXT:    xor a2, a2, a3
+; RV64IM-NEXT:    sd a2, 280(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a2, t0, 41
+; RV64IM-NEXT:    and a3, s6, t6
+; RV64IM-NEXT:    and a4, s6, s5
+; RV64IM-NEXT:    mul a3, a0, a3
+; RV64IM-NEXT:    mul a4, a0, a4
+; RV64IM-NEXT:    xor a3, a3, a4
+; RV64IM-NEXT:    sd a3, 272(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a3, t0, 48
+; RV64IM-NEXT:    and a4, s6, s11
+; RV64IM-NEXT:    and a5, s6, ra
+; RV64IM-NEXT:    mul a4, a0, a4
+; RV64IM-NEXT:    mul a5, a0, a5
+; RV64IM-NEXT:    xor a4, a4, a5
+; RV64IM-NEXT:    sd a4, 264(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a4, t0, 49
+; RV64IM-NEXT:    and a1, s6, a1
+; RV64IM-NEXT:    and a2, s6, a2
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    xor a1, a1, a2
+; RV64IM-NEXT:    sd a1, 256(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, t0, 56
+; RV64IM-NEXT:    and a2, s6, a3
+; RV64IM-NEXT:    and a3, s6, a4
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    mul a3, a0, a3
+; RV64IM-NEXT:    xor a2, a2, a3
+; RV64IM-NEXT:    sd a2, 248(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a2, t0, 57
+; RV64IM-NEXT:    and a1, s6, a1
+; RV64IM-NEXT:    and a2, s6, a2
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    xor a1, a1, a2
+; RV64IM-NEXT:    sd a1, 240(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a2, t0, 42
+; RV64IM-NEXT:    slli ra, t0, 43
+; RV64IM-NEXT:    slli a4, t0, 44
+; RV64IM-NEXT:    slli t6, t0, 45
+; RV64IM-NEXT:    slli s1, t0, 46
+; RV64IM-NEXT:    slli s2, t0, 47
+; RV64IM-NEXT:    slli s4, t0, 50
+; RV64IM-NEXT:    slli s5, t0, 51
+; RV64IM-NEXT:    slli a1, t0, 52
+; RV64IM-NEXT:    sd a1, 232(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, t0, 53
+; RV64IM-NEXT:    sd a1, 224(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, t0, 54
+; RV64IM-NEXT:    sd a1, 216(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, t0, 55
+; RV64IM-NEXT:    sd a1, 208(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, t0, 58
+; RV64IM-NEXT:    sd a1, 200(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, t0, 59
+; RV64IM-NEXT:    sd a1, 176(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, t0, 60
+; RV64IM-NEXT:    sd a1, 136(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, t0, 61
+; RV64IM-NEXT:    sd a1, 104(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli t0, t0, 62
+; RV64IM-NEXT:    sd t0, 80(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a1, s6, t4
+; RV64IM-NEXT:    sd a1, 192(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui a3, 1
+; RV64IM-NEXT:    and a1, s6, a3
+; RV64IM-NEXT:    sd a1, 184(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui a3, 8
+; RV64IM-NEXT:    and a1, s6, a3
+; RV64IM-NEXT:    sd a1, 168(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a1, s6, s10
+; RV64IM-NEXT:    sd a1, 160(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui s9, 32
+; RV64IM-NEXT:    and a1, s6, s9
+; RV64IM-NEXT:    sd a1, 152(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui s11, 64
+; RV64IM-NEXT:    and a1, s6, s11
+; RV64IM-NEXT:    sd a1, 144(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui s10, 512
+; RV64IM-NEXT:    and a1, s6, s10
+; RV64IM-NEXT:    sd a1, 128(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui s8, 1024
+; RV64IM-NEXT:    and a1, s6, s8
+; RV64IM-NEXT:    sd a1, 120(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui s7, 2048
+; RV64IM-NEXT:    and a1, s6, s7
+; RV64IM-NEXT:    sd a1, 112(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui t1, 16384
+; RV64IM-NEXT:    and a1, s6, t1
+; RV64IM-NEXT:    sd a1, 96(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui t2, 32768
+; RV64IM-NEXT:    and t2, s6, t2
+; RV64IM-NEXT:    lui t3, 65536
+; RV64IM-NEXT:    and a1, s6, t3
+; RV64IM-NEXT:    sd a1, 88(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui t4, 131072
+; RV64IM-NEXT:    and a5, s6, t4
+; RV64IM-NEXT:    lui t5, 262144
+; RV64IM-NEXT:    and t0, s6, t5
+; RV64IM-NEXT:    and s11, s6, s0
+; RV64IM-NEXT:    ld a1, 304(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a1, s6, a1
+; RV64IM-NEXT:    sd a1, 304(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 296(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a1, s6, a1
+; RV64IM-NEXT:    sd a1, 72(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a1, s6, a7
+; RV64IM-NEXT:    sd a1, 64(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a1, s6, a6
+; RV64IM-NEXT:    sd a1, 56(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a1, s6, s3
+; RV64IM-NEXT:    sd a1, 48(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a1, s6, a2
+; RV64IM-NEXT:    sd a1, 40(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and ra, s6, ra
+; RV64IM-NEXT:    and a1, s6, a4
+; RV64IM-NEXT:    sd a1, 32(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a1, s6, t6
+; RV64IM-NEXT:    sd a1, 24(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a1, s6, s1
+; RV64IM-NEXT:    sd a1, 16(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a1, s6, s2
+; RV64IM-NEXT:    sd a1, 8(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a1, s6, s4
+; RV64IM-NEXT:    sd a1, 0(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and s0, s6, s5
+; RV64IM-NEXT:    ld a1, 232(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s1, s6, a1
+; RV64IM-NEXT:    ld a1, 224(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s2, s6, a1
+; RV64IM-NEXT:    ld a1, 216(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s3, s6, a1
+; RV64IM-NEXT:    ld a1, 208(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s4, s6, a1
+; RV64IM-NEXT:    ld a1, 200(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s5, s6, a1
+; RV64IM-NEXT:    ld a1, 176(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s7, s6, a1
+; RV64IM-NEXT:    ld a1, 136(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s8, s6, a1
+; RV64IM-NEXT:    ld a1, 104(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s9, s6, a1
+; RV64IM-NEXT:    ld a1, 80(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s10, s6, a1
+; RV64IM-NEXT:    andi a1, s6, 64
+; RV64IM-NEXT:    andi a2, s6, 128
+; RV64IM-NEXT:    andi a3, s6, 1024
+; RV64IM-NEXT:    srliw a4, s6, 31
+; RV64IM-NEXT:    srli s6, s6, 63
+; RV64IM-NEXT:    mul t4, a0, a1
+; RV64IM-NEXT:    mul a1, a0, a2
+; RV64IM-NEXT:    sd a1, 216(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul t3, a0, a3
+; RV64IM-NEXT:    ld a1, 192(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 136(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 184(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 208(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 168(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul t1, a0, a1
+; RV64IM-NEXT:    ld a1, 160(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 104(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 152(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 184(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 144(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 296(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 128(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a7, a0, a1
+; RV64IM-NEXT:    ld a1, 120(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul t6, a0, a1
+; RV64IM-NEXT:    ld a1, 112(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 168(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 96(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a6, a0, a1
+; RV64IM-NEXT:    mul t5, a0, t2
+; RV64IM-NEXT:    ld a1, 88(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 160(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul a1, a0, a5
+; RV64IM-NEXT:    sd a1, 176(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul a1, a0, t0
+; RV64IM-NEXT:    sd a1, 232(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a4, a4, 31
+; RV64IM-NEXT:    mul a3, a0, s11
+; RV64IM-NEXT:    ld a1, 304(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul t0, a0, a1
+; RV64IM-NEXT:    ld a1, 72(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 144(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 64(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 152(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 56(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 200(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 48(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 224(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 40(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a2, a0, a1
+; RV64IM-NEXT:    mul a5, a0, ra
+; RV64IM-NEXT:    ld a1, 32(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul t2, a0, a1
+; RV64IM-NEXT:    ld a1, 24(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul s11, a0, a1
+; RV64IM-NEXT:    ld a1, 16(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul ra, a0, a1
+; RV64IM-NEXT:    ld a1, 8(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 192(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 0(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    mul s0, a0, s0
+; RV64IM-NEXT:    mul s1, a0, s1
+; RV64IM-NEXT:    mul s2, a0, s2
+; RV64IM-NEXT:    mul s3, a0, s3
+; RV64IM-NEXT:    mul s4, a0, s4
+; RV64IM-NEXT:    mul s5, a0, s5
+; RV64IM-NEXT:    mul s7, a0, s7
+; RV64IM-NEXT:    mul s8, a0, s8
+; RV64IM-NEXT:    mul s9, a0, s9
+; RV64IM-NEXT:    mul s10, a0, s10
+; RV64IM-NEXT:    slli s6, s6, 63
+; RV64IM-NEXT:    mul a4, a0, a4
+; RV64IM-NEXT:    mul a0, a0, s6
+; RV64IM-NEXT:    sd a0, 304(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld s6, 336(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld a0, 328(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor s6, s6, a0
+; RV64IM-NEXT:    ld a0, 320(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t4, a0, t4
+; RV64IM-NEXT:    ld a0, 312(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t3, a0, t3
+; RV64IM-NEXT:    ld a0, 288(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t1, a0, t1
+; RV64IM-NEXT:    ld a0, 280(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a7, a0, a7
+; RV64IM-NEXT:    ld a0, 272(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a6, a0, a6
+; RV64IM-NEXT:    ld a0, 264(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a3, a0, a3
+; RV64IM-NEXT:    ld a0, 256(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a2, a0, a2
+; RV64IM-NEXT:    ld a0, 248(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a1, a0, a1
+; RV64IM-NEXT:    ld a0, 240(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor s5, a0, s5
+; RV64IM-NEXT:    xor t4, s6, t4
+; RV64IM-NEXT:    ld a0, 136(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t3, t3, a0
+; RV64IM-NEXT:    ld a0, 104(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t1, t1, a0
+; RV64IM-NEXT:    xor a7, a7, t6
+; RV64IM-NEXT:    xor a6, a6, t5
+; RV64IM-NEXT:    xor a3, a3, t0
+; RV64IM-NEXT:    xor a2, a2, a5
+; RV64IM-NEXT:    xor a1, a1, s0
+; RV64IM-NEXT:    xor a5, s5, s7
+; RV64IM-NEXT:    ld a0, 216(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t0, t4, a0
+; RV64IM-NEXT:    ld a0, 208(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t3, t3, a0
+; RV64IM-NEXT:    ld a0, 184(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t1, t1, a0
+; RV64IM-NEXT:    ld a0, 168(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a7, a7, a0
+; RV64IM-NEXT:    ld a0, 160(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a6, a6, a0
+; RV64IM-NEXT:    ld a0, 144(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a3, a3, a0
+; RV64IM-NEXT:    xor a2, a2, t2
+; RV64IM-NEXT:    xor a1, a1, s1
+; RV64IM-NEXT:    xor a5, a5, s8
+; RV64IM-NEXT:    ld a0, 296(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t1, t1, a0
+; RV64IM-NEXT:    ld a0, 176(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a6, a6, a0
+; RV64IM-NEXT:    ld a0, 152(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a3, a3, a0
+; RV64IM-NEXT:    xor a2, a2, s11
+; RV64IM-NEXT:    xor a1, a1, s2
+; RV64IM-NEXT:    xor a5, a5, s9
+; RV64IM-NEXT:    xor t2, t0, t3
+; RV64IM-NEXT:    xor t1, t2, t1
+; RV64IM-NEXT:    ld a0, 232(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a6, a6, a0
+; RV64IM-NEXT:    ld a0, 200(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a3, a3, a0
+; RV64IM-NEXT:    xor a2, a2, ra
+; RV64IM-NEXT:    xor a1, a1, s3
+; RV64IM-NEXT:    xor a5, a5, s10
+; RV64IM-NEXT:    xor a7, t1, a7
+; RV64IM-NEXT:    xor a4, a6, a4
+; RV64IM-NEXT:    ld a0, 224(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a3, a3, a0
+; RV64IM-NEXT:    ld a0, 192(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a2, a2, a0
+; RV64IM-NEXT:    xor a1, a1, s4
+; RV64IM-NEXT:    lui a6, %hi(.LCPI16_0)
+; RV64IM-NEXT:    ld a6, %lo(.LCPI16_0)(a6)
+; RV64IM-NEXT:    slli t0, t0, 56
+; RV64IM-NEXT:    ld a0, 304(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a0, a5, a0
+; RV64IM-NEXT:    ld t1, 344(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a5, a7, t1
+; RV64IM-NEXT:    xor a4, a7, a4
+; RV64IM-NEXT:    slli a5, a5, 40
+; RV64IM-NEXT:    xor a3, a4, a3
+; RV64IM-NEXT:    or a4, t0, a5
+; RV64IM-NEXT:    lui t0, 4080
+; RV64IM-NEXT:    and a5, a3, t0
+; RV64IM-NEXT:    xor a2, a3, a2
+; RV64IM-NEXT:    srli a3, a3, 8
+; RV64IM-NEXT:    slli a5, a5, 24
+; RV64IM-NEXT:    xor a1, a2, a1
+; RV64IM-NEXT:    ld a7, 368(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a3, a3, a7
+; RV64IM-NEXT:    srli a2, a2, 24
+; RV64IM-NEXT:    srliw a7, a1, 24
+; RV64IM-NEXT:    and a2, a2, t0
+; RV64IM-NEXT:    srli t0, a1, 40
+; RV64IM-NEXT:    xor a0, a1, a0
+; RV64IM-NEXT:    slli a7, a7, 32
+; RV64IM-NEXT:    or a2, a3, a2
+; RV64IM-NEXT:    and a1, t0, t1
+; RV64IM-NEXT:    srli a0, a0, 56
+; RV64IM-NEXT:    or a3, a5, a7
+; RV64IM-NEXT:    or a0, a1, a0
+; RV64IM-NEXT:    or a3, a4, a3
+; RV64IM-NEXT:    or a0, a2, a0
+; RV64IM-NEXT:    or a0, a3, a0
+; RV64IM-NEXT:    srli a1, a0, 4
+; RV64IM-NEXT:    ld a2, 360(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a0, a0, a2
+; RV64IM-NEXT:    and a1, a1, a2
+; RV64IM-NEXT:    slli a0, a0, 4
+; RV64IM-NEXT:    or a0, a1, a0
+; RV64IM-NEXT:    srli a1, a0, 2
+; RV64IM-NEXT:    ld a2, 352(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a0, a0, a2
+; RV64IM-NEXT:    and a1, a1, a2
+; RV64IM-NEXT:    slli a0, a0, 2
+; RV64IM-NEXT:    or a0, a1, a0
+; RV64IM-NEXT:    srli a1, a0, 1
+; RV64IM-NEXT:    andi a0, a0, 85
+; RV64IM-NEXT:    and a1, a1, a6
+; RV64IM-NEXT:    slli a0, a0, 1
+; RV64IM-NEXT:    or a0, a1, a0
+; RV64IM-NEXT:    slli a0, a0, 55
+; RV64IM-NEXT:    srli a0, a0, 56
+; RV64IM-NEXT:    ld ra, 472(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s0, 464(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s1, 456(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s2, 448(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s3, 440(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s4, 432(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s5, 424(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s6, 416(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s7, 408(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s8, 400(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s9, 392(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s10, 384(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s11, 376(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    addi sp, sp, 480
+; RV64IM-NEXT:    ret
+  %a.ext = zext i8 %a to i16
+  %b.ext = zext i8 %b to i16
+  %clmul = call i16 @llvm.clmul.i16(i16 %a.ext, i16 %b.ext)
+  %res.ext = lshr i16 %clmul, 8
+  %res = trunc i16 %res.ext to i8
+  ret i8 %res
+}
+
+define i16 @clmulh_i16(i16 %a, i16 %b) nounwind {
+; RV32IM-LABEL: clmulh_i16:
+; RV32IM:       # %bb.0:
+; RV32IM-NEXT:    addi sp, sp, -144
+; RV32IM-NEXT:    sw ra, 140(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s0, 136(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s1, 132(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s2, 128(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s3, 124(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s4, 120(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s5, 116(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s6, 112(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s7, 108(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s8, 104(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s9, 100(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s10, 96(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s11, 92(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    srli t0, a0, 8
+; RV32IM-NEXT:    lui ra, 16
+; RV32IM-NEXT:    srli t1, a0, 24
+; RV32IM-NEXT:    slli a2, a0, 24
+; RV32IM-NEXT:    lui s10, 61681
+; RV32IM-NEXT:    lui t2, 209715
+; RV32IM-NEXT:    lui a4, 349525
+; RV32IM-NEXT:    srli t4, a1, 8
+; RV32IM-NEXT:    srli t5, a1, 24
+; RV32IM-NEXT:    slli a5, a1, 24
+; RV32IM-NEXT:    li t6, 1
+; RV32IM-NEXT:    lui a7, 2
+; RV32IM-NEXT:    lui a6, 4
+; RV32IM-NEXT:    lui s2, 8
+; RV32IM-NEXT:    lui s0, 32
+; RV32IM-NEXT:    lui s1, 64
+; RV32IM-NEXT:    lui t3, 128
+; RV32IM-NEXT:    lui s3, 256
+; RV32IM-NEXT:    lui s4, 512
+; RV32IM-NEXT:    lui s6, 1024
+; RV32IM-NEXT:    lui s7, 2048
+; RV32IM-NEXT:    lui s8, 4096
+; RV32IM-NEXT:    lui s9, 8192
+; RV32IM-NEXT:    lui s11, 16384
+; RV32IM-NEXT:    addi s5, ra, -256
+; RV32IM-NEXT:    sw s5, 88(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and t0, t0, s5
+; RV32IM-NEXT:    or t1, t0, t1
+; RV32IM-NEXT:    lui a3, 32768
+; RV32IM-NEXT:    and t4, t4, s5
+; RV32IM-NEXT:    or t5, t4, t5
+; RV32IM-NEXT:    lui t0, 65536
+; RV32IM-NEXT:    and a0, a0, s5
+; RV32IM-NEXT:    slli a0, a0, 8
+; RV32IM-NEXT:    or a2, a2, a0
+; RV32IM-NEXT:    lui t4, 131072
+; RV32IM-NEXT:    and a1, a1, s5
+; RV32IM-NEXT:    slli a1, a1, 8
+; RV32IM-NEXT:    or a0, a5, a1
+; RV32IM-NEXT:    lui a5, 262144
+; RV32IM-NEXT:    addi s5, s10, -241
+; RV32IM-NEXT:    addi s10, t2, 819
+; RV32IM-NEXT:    addi a4, a4, 1365
+; RV32IM-NEXT:    or a2, a2, t1
+; RV32IM-NEXT:    or a0, a0, t5
+; RV32IM-NEXT:    srli t1, a2, 4
+; RV32IM-NEXT:    and a2, a2, s5
+; RV32IM-NEXT:    srli t5, a0, 4
+; RV32IM-NEXT:    and a0, a0, s5
+; RV32IM-NEXT:    and t1, t1, s5
+; RV32IM-NEXT:    slli a2, a2, 4
+; RV32IM-NEXT:    and t5, t5, s5
+; RV32IM-NEXT:    slli a0, a0, 4
+; RV32IM-NEXT:    or a2, t1, a2
+; RV32IM-NEXT:    or a0, t5, a0
+; RV32IM-NEXT:    srli t1, a2, 2
+; RV32IM-NEXT:    and a2, a2, s10
+; RV32IM-NEXT:    srli t5, a0, 2
+; RV32IM-NEXT:    and a0, a0, s10
+; RV32IM-NEXT:    and t1, t1, s10
+; RV32IM-NEXT:    slli a2, a2, 2
+; RV32IM-NEXT:    and t5, t5, s10
+; RV32IM-NEXT:    slli a0, a0, 2
+; RV32IM-NEXT:    or a2, t1, a2
+; RV32IM-NEXT:    or a0, t5, a0
+; RV32IM-NEXT:    srli t1, a2, 1
+; RV32IM-NEXT:    and a2, a2, a4
+; RV32IM-NEXT:    srli t5, a0, 1
+; RV32IM-NEXT:    and a0, a0, a4
+; RV32IM-NEXT:    and t1, t1, a4
+; RV32IM-NEXT:    and t5, t5, a4
+; RV32IM-NEXT:    lui a1, 524288
+; RV32IM-NEXT:    slli t6, t6, 11
+; RV32IM-NEXT:    slli a2, a2, 1
+; RV32IM-NEXT:    slli a0, a0, 1
+; RV32IM-NEXT:    or a4, t1, a2
+; RV32IM-NEXT:    or a0, t5, a0
+; RV32IM-NEXT:    andi t2, a0, 2
+; RV32IM-NEXT:    andi t5, a0, 1
+; RV32IM-NEXT:    and t6, a0, t6
+; RV32IM-NEXT:    lui a2, 1
+; RV32IM-NEXT:    and a2, a0, a2
+; RV32IM-NEXT:    sw a2, 84(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a2, a0, a7
+; RV32IM-NEXT:    sw a2, 76(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a2, a0, a6
+; RV32IM-NEXT:    sw a2, 72(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a2, a0, s2
+; RV32IM-NEXT:    sw a2, 68(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and ra, a0, ra
+; RV32IM-NEXT:    and s0, a0, s0
+; RV32IM-NEXT:    and s1, a0, s1
+; RV32IM-NEXT:    sw s1, 64(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a2, a0, t3
+; RV32IM-NEXT:    sw a2, 60(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a2, a0, s3
+; RV32IM-NEXT:    sw a2, 56(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and s4, a0, s4
+; RV32IM-NEXT:    and a2, a0, s6
+; RV32IM-NEXT:    sw a2, 52(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a2, a0, s7
+; RV32IM-NEXT:    sw a2, 48(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a2, a0, s8
+; RV32IM-NEXT:    sw a2, 44(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a2, a0, s9
+; RV32IM-NEXT:    sw a2, 40(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a2, a0, s11
+; RV32IM-NEXT:    sw a2, 36(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a3, a0, a3
+; RV32IM-NEXT:    sw a3, 32(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a2, a0, t0
+; RV32IM-NEXT:    sw a2, 28(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a2, a0, t4
+; RV32IM-NEXT:    sw a2, 24(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a5, a0, a5
+; RV32IM-NEXT:    sw a5, 20(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a1, a0, a1
+; RV32IM-NEXT:    sw a1, 16(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    andi a1, a0, 4
+; RV32IM-NEXT:    andi a2, a0, 8
+; RV32IM-NEXT:    andi a3, a0, 16
+; RV32IM-NEXT:    andi a5, a0, 32
+; RV32IM-NEXT:    andi a6, a0, 64
+; RV32IM-NEXT:    andi a7, a0, 128
+; RV32IM-NEXT:    andi t0, a0, 256
+; RV32IM-NEXT:    andi t1, a0, 512
+; RV32IM-NEXT:    andi a0, a0, 1024
+; RV32IM-NEXT:    mul s11, a4, t2
+; RV32IM-NEXT:    mul s7, a4, t5
+; RV32IM-NEXT:    mul s8, a4, a1
+; RV32IM-NEXT:    mul s3, a4, a2
+; RV32IM-NEXT:    mul s2, a4, a3
+; RV32IM-NEXT:    mul s1, a4, a5
+; RV32IM-NEXT:    mul a1, a4, a6
+; RV32IM-NEXT:    sw a1, 4(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul a1, a4, a7
+; RV32IM-NEXT:    sw a1, 80(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul t5, a4, t0
+; RV32IM-NEXT:    mul t3, a4, t1
+; RV32IM-NEXT:    mul s9, a4, a0
+; RV32IM-NEXT:    mul a0, a4, t6
+; RV32IM-NEXT:    sw a0, 8(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw a0, 84(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a0, a4, a0
+; RV32IM-NEXT:    sw a0, 12(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw a0, 76(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul t1, a4, a0
+; RV32IM-NEXT:    lw a0, 72(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a7, a4, a0
+; RV32IM-NEXT:    lw a0, 68(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul t6, a4, a0
+; RV32IM-NEXT:    mul s6, a4, ra
+; RV32IM-NEXT:    mul a0, a4, s0
+; RV32IM-NEXT:    sw a0, 72(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw a0, 64(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a0, a4, a0
+; RV32IM-NEXT:    sw a0, 76(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw a0, 60(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a3, a4, a0
+; RV32IM-NEXT:    lw a0, 56(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a2, a4, a0
+; RV32IM-NEXT:    mul a6, a4, s4
+; RV32IM-NEXT:    lw a0, 52(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul t2, a4, a0
+; RV32IM-NEXT:    lw a0, 48(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul s4, a4, a0
+; RV32IM-NEXT:    lw a0, 44(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a1, a4, a0
+; RV32IM-NEXT:    lw a0, 40(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a0, a4, a0
+; RV32IM-NEXT:    lw a5, 36(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a5, a4, a5
+; RV32IM-NEXT:    lw t0, 32(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul t0, a4, t0
+; RV32IM-NEXT:    lw t4, 28(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul t4, a4, t4
+; RV32IM-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul s0, a4, s0
+; RV32IM-NEXT:    lw ra, 20(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul ra, a4, ra
+; RV32IM-NEXT:    sw ra, 84(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw ra, 16(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a4, a4, ra
+; RV32IM-NEXT:    xor s7, s7, s11
+; RV32IM-NEXT:    xor s3, s8, s3
+; RV32IM-NEXT:    xor s1, s2, s1
+; RV32IM-NEXT:    xor t3, t5, t3
+; RV32IM-NEXT:    xor a7, t1, a7
+; RV32IM-NEXT:    xor a2, a3, a2
+; RV32IM-NEXT:    xor a0, a1, a0
+; RV32IM-NEXT:    xor a1, s7, s3
+; RV32IM-NEXT:    lw a3, 4(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a3, s1, a3
+; RV32IM-NEXT:    xor t1, t3, s9
+; RV32IM-NEXT:    xor a7, a7, t6
+; RV32IM-NEXT:    xor a2, a2, a6
+; RV32IM-NEXT:    xor a0, a0, a5
+; RV32IM-NEXT:    xor a1, a1, a3
+; RV32IM-NEXT:    lw a3, 8(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a3, t1, a3
+; RV32IM-NEXT:    xor a5, a7, s6
+; RV32IM-NEXT:    xor a2, a2, t2
+; RV32IM-NEXT:    xor a0, a0, t0
+; RV32IM-NEXT:    lw a6, 80(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a1, a1, a6
+; RV32IM-NEXT:    lw a6, 12(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a3, a3, a6
+; RV32IM-NEXT:    lw a6, 72(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a5, a5, a6
+; RV32IM-NEXT:    xor a2, a2, s4
+; RV32IM-NEXT:    xor a0, a0, t4
+; RV32IM-NEXT:    lw a6, 76(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a5, a5, a6
+; RV32IM-NEXT:    xor a0, a0, s0
+; RV32IM-NEXT:    xor a3, a1, a3
+; RV32IM-NEXT:    xor a3, a3, a5
+; RV32IM-NEXT:    lui a5, 21
+; RV32IM-NEXT:    lw a6, 84(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a0, a0, a6
+; RV32IM-NEXT:    lui a6, 5
+; RV32IM-NEXT:    addi a5, a5, 1364
+; RV32IM-NEXT:    addi a6, a6, 1365
+; RV32IM-NEXT:    slli a1, a1, 24
+; RV32IM-NEXT:    xor a2, a3, a2
+; RV32IM-NEXT:    xor a0, a0, a4
+; RV32IM-NEXT:    lw a7, 88(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    and a3, a2, a7
+; RV32IM-NEXT:    srli a4, a2, 8
+; RV32IM-NEXT:    xor a0, a2, a0
+; RV32IM-NEXT:    slli a3, a3, 8
+; RV32IM-NEXT:    and a2, a4, a7
+; RV32IM-NEXT:    srli a0, a0, 24
+; RV32IM-NEXT:    or a1, a1, a3
+; RV32IM-NEXT:    or a0, a2, a0
+; RV32IM-NEXT:    or a0, a1, a0
+; RV32IM-NEXT:    srli a1, a0, 4
+; RV32IM-NEXT:    and a0, a0, s5
+; RV32IM-NEXT:    and a1, a1, s5
+; RV32IM-NEXT:    slli a0, a0, 4
+; RV32IM-NEXT:    or a0, a1, a0
+; RV32IM-NEXT:    srli a1, a0, 2
+; RV32IM-NEXT:    and a0, a0, s10
+; RV32IM-NEXT:    and a1, a1, s10
+; RV32IM-NEXT:    slli a0, a0, 2
+; RV32IM-NEXT:    or a0, a1, a0
+; RV32IM-NEXT:    srli a1, a0, 1
+; RV32IM-NEXT:    and a0, a0, a6
+; RV32IM-NEXT:    and a1, a1, a5
+; RV32IM-NEXT:    slli a0, a0, 1
+; RV32IM-NEXT:    or a0, a1, a0
+; RV32IM-NEXT:    srli a0, a0, 1
+; RV32IM-NEXT:    lw ra, 140(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s0, 136(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s1, 132(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s2, 128(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s3, 124(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s4, 120(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s5, 116(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s6, 112(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s7, 108(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s8, 104(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s9, 100(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s10, 96(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s11, 92(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    addi sp, sp, 144
+; RV32IM-NEXT:    ret
+;
+; RV64IM-LABEL: clmulh_i16:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    addi sp, sp, -480
+; RV64IM-NEXT:    sd ra, 472(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s0, 464(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s1, 456(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s2, 448(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s3, 440(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s4, 432(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s5, 424(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s6, 416(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s7, 408(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s8, 400(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s9, 392(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s10, 384(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s11, 376(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    srli a3, a0, 24
+; RV64IM-NEXT:    srli a6, a0, 8
+; RV64IM-NEXT:    li s4, 255
+; RV64IM-NEXT:    srli a4, a0, 40
+; RV64IM-NEXT:    lui s3, 16
+; RV64IM-NEXT:    srli t1, a0, 56
+; RV64IM-NEXT:    srliw t4, a0, 24
+; RV64IM-NEXT:    slli a7, a0, 56
+; RV64IM-NEXT:    lui t3, 61681
+; RV64IM-NEXT:    lui t5, 209715
+; RV64IM-NEXT:    lui s6, 349525
+; RV64IM-NEXT:    srli s9, a1, 24
+; RV64IM-NEXT:    srli s0, a1, 8
+; RV64IM-NEXT:    srli ra, a1, 40
+; RV64IM-NEXT:    srli t2, a1, 56
+; RV64IM-NEXT:    srliw s11, a1, 24
+; RV64IM-NEXT:    slli a5, a1, 56
+; RV64IM-NEXT:    li t0, 1
+; RV64IM-NEXT:    lui s1, 128
+; RV64IM-NEXT:    lui s2, 256
+; RV64IM-NEXT:    lui t6, 4096
+; RV64IM-NEXT:    lui s5, 8192
+; RV64IM-NEXT:    lui s7, 4080
+; RV64IM-NEXT:    and a2, a3, s7
+; RV64IM-NEXT:    slli s10, s4, 24
+; RV64IM-NEXT:    addi s8, s3, -256
+; RV64IM-NEXT:    and a3, a6, s10
+; RV64IM-NEXT:    sd s10, 368(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    or a2, a3, a2
+; RV64IM-NEXT:    and a3, a0, s7
+; RV64IM-NEXT:    slli t4, t4, 32
+; RV64IM-NEXT:    addi s3, t3, -241
+; RV64IM-NEXT:    addi s4, t5, 819
+; RV64IM-NEXT:    addi s6, s6, 1365
+; RV64IM-NEXT:    and a6, s9, s7
+; RV64IM-NEXT:    and a4, a4, s8
+; RV64IM-NEXT:    or a4, a4, t1
+; RV64IM-NEXT:    and t1, a1, s7
+; RV64IM-NEXT:    slli t3, s11, 32
+; RV64IM-NEXT:    slli a3, a3, 24
+; RV64IM-NEXT:    or s9, a3, t4
+; RV64IM-NEXT:    slli a3, s3, 32
+; RV64IM-NEXT:    add s3, s3, a3
+; RV64IM-NEXT:    slli a3, s4, 32
+; RV64IM-NEXT:    add s4, s4, a3
+; RV64IM-NEXT:    slli a3, s6, 32
+; RV64IM-NEXT:    add s6, s6, a3
+; RV64IM-NEXT:    slli t4, t0, 11
+; RV64IM-NEXT:    and a3, s0, s10
+; RV64IM-NEXT:    or a3, a3, a6
+; RV64IM-NEXT:    slli s11, t0, 32
+; RV64IM-NEXT:    and a6, ra, s8
+; RV64IM-NEXT:    or a6, a6, t2
+; RV64IM-NEXT:    slli ra, t0, 33
+; RV64IM-NEXT:    slli t1, t1, 24
+; RV64IM-NEXT:    or t1, t1, t3
+; RV64IM-NEXT:    slli s0, t0, 34
+; RV64IM-NEXT:    or a2, a2, a4
+; RV64IM-NEXT:    slli a4, t0, 35
+; RV64IM-NEXT:    sd a4, 304(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a0, a0, s8
+; RV64IM-NEXT:    sd s8, 344(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a0, a0, 40
+; RV64IM-NEXT:    or a0, a7, a0
+; RV64IM-NEXT:    slli a7, t0, 36
+; RV64IM-NEXT:    or a3, a3, a6
+; RV64IM-NEXT:    slli a6, t0, 37
+; RV64IM-NEXT:    and a1, a1, s8
+; RV64IM-NEXT:    slli a1, a1, 40
+; RV64IM-NEXT:    or a1, a5, a1
+; RV64IM-NEXT:    slli a4, t0, 38
+; RV64IM-NEXT:    sd a4, 288(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    or a0, a0, s9
+; RV64IM-NEXT:    or a1, a1, t1
+; RV64IM-NEXT:    or a0, a0, a2
+; RV64IM-NEXT:    or a1, a1, a3
+; RV64IM-NEXT:    srli a2, a0, 4
+; RV64IM-NEXT:    sd s3, 360(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a0, a0, s3
+; RV64IM-NEXT:    srli a3, a1, 4
+; RV64IM-NEXT:    and a1, a1, s3
+; RV64IM-NEXT:    and a2, a2, s3
+; RV64IM-NEXT:    slli a0, a0, 4
+; RV64IM-NEXT:    and a3, a3, s3
+; RV64IM-NEXT:    slli a1, a1, 4
+; RV64IM-NEXT:    or a0, a2, a0
+; RV64IM-NEXT:    or a1, a3, a1
+; RV64IM-NEXT:    srli a2, a0, 2
+; RV64IM-NEXT:    sd s4, 352(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a0, a0, s4
+; RV64IM-NEXT:    srli a3, a1, 2
+; RV64IM-NEXT:    and a1, a1, s4
+; RV64IM-NEXT:    and a2, a2, s4
+; RV64IM-NEXT:    slli a0, a0, 2
+; RV64IM-NEXT:    and a3, a3, s4
+; RV64IM-NEXT:    slli a1, a1, 2
+; RV64IM-NEXT:    or a0, a2, a0
+; RV64IM-NEXT:    or a1, a3, a1
+; RV64IM-NEXT:    srli a2, a0, 1
+; RV64IM-NEXT:    and a0, a0, s6
+; RV64IM-NEXT:    srli a3, a1, 1
+; RV64IM-NEXT:    and a1, a1, s6
+; RV64IM-NEXT:    and a2, a2, s6
+; RV64IM-NEXT:    slli a0, a0, 1
+; RV64IM-NEXT:    and a3, a3, s6
+; RV64IM-NEXT:    slli a1, a1, 1
+; RV64IM-NEXT:    or a0, a2, a0
+; RV64IM-NEXT:    or s6, a3, a1
+; RV64IM-NEXT:    andi a1, s6, 2
+; RV64IM-NEXT:    andi a2, s6, 1
+; RV64IM-NEXT:    andi a3, s6, 4
+; RV64IM-NEXT:    andi a4, s6, 8
+; RV64IM-NEXT:    andi a5, s6, 16
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    xor a1, a2, a1
+; RV64IM-NEXT:    sd a1, 336(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    andi a1, s6, 32
+; RV64IM-NEXT:    mul a2, a0, a3
+; RV64IM-NEXT:    mul a3, a0, a4
+; RV64IM-NEXT:    xor a2, a2, a3
+; RV64IM-NEXT:    sd a2, 328(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    andi a2, s6, 256
+; RV64IM-NEXT:    mul a3, a0, a5
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    xor a1, a3, a1
+; RV64IM-NEXT:    sd a1, 320(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    andi a1, s6, 512
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    xor a1, a2, a1
+; RV64IM-NEXT:    sd a1, 312(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli s3, t0, 39
+; RV64IM-NEXT:    lui a1, 2
+; RV64IM-NEXT:    and a1, s6, a1
+; RV64IM-NEXT:    lui a2, 4
+; RV64IM-NEXT:    and a2, s6, a2
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    xor a1, a1, a2
+; RV64IM-NEXT:    sd a1, 296(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, t0, 40
+; RV64IM-NEXT:    and a2, s6, s1
+; RV64IM-NEXT:    and a3, s6, s2
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    mul a3, a0, a3
+; RV64IM-NEXT:    xor a2, a2, a3
+; RV64IM-NEXT:    sd a2, 280(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a2, t0, 41
+; RV64IM-NEXT:    and a3, s6, t6
+; RV64IM-NEXT:    and a4, s6, s5
+; RV64IM-NEXT:    mul a3, a0, a3
+; RV64IM-NEXT:    mul a4, a0, a4
+; RV64IM-NEXT:    xor a3, a3, a4
+; RV64IM-NEXT:    sd a3, 272(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a3, t0, 48
+; RV64IM-NEXT:    and a4, s6, s11
+; RV64IM-NEXT:    and a5, s6, ra
+; RV64IM-NEXT:    mul a4, a0, a4
+; RV64IM-NEXT:    mul a5, a0, a5
+; RV64IM-NEXT:    xor a4, a4, a5
+; RV64IM-NEXT:    sd a4, 264(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a4, t0, 49
+; RV64IM-NEXT:    and a1, s6, a1
+; RV64IM-NEXT:    and a2, s6, a2
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    xor a1, a1, a2
+; RV64IM-NEXT:    sd a1, 256(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, t0, 56
+; RV64IM-NEXT:    and a2, s6, a3
+; RV64IM-NEXT:    and a3, s6, a4
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    mul a3, a0, a3
+; RV64IM-NEXT:    xor a2, a2, a3
+; RV64IM-NEXT:    sd a2, 248(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a2, t0, 57
+; RV64IM-NEXT:    and a1, s6, a1
+; RV64IM-NEXT:    and a2, s6, a2
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    xor a1, a1, a2
+; RV64IM-NEXT:    sd a1, 240(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a2, t0, 42
+; RV64IM-NEXT:    slli ra, t0, 43
+; RV64IM-NEXT:    slli a4, t0, 44
+; RV64IM-NEXT:    slli t6, t0, 45
+; RV64IM-NEXT:    slli s1, t0, 46
+; RV64IM-NEXT:    slli s2, t0, 47
+; RV64IM-NEXT:    slli s4, t0, 50
+; RV64IM-NEXT:    slli s5, t0, 51
+; RV64IM-NEXT:    slli a1, t0, 52
+; RV64IM-NEXT:    sd a1, 232(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, t0, 53
+; RV64IM-NEXT:    sd a1, 224(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, t0, 54
+; RV64IM-NEXT:    sd a1, 216(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, t0, 55
+; RV64IM-NEXT:    sd a1, 208(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, t0, 58
+; RV64IM-NEXT:    sd a1, 200(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, t0, 59
+; RV64IM-NEXT:    sd a1, 176(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, t0, 60
+; RV64IM-NEXT:    sd a1, 136(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, t0, 61
+; RV64IM-NEXT:    sd a1, 104(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli t0, t0, 62
+; RV64IM-NEXT:    sd t0, 80(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a1, s6, t4
+; RV64IM-NEXT:    sd a1, 192(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui a3, 1
+; RV64IM-NEXT:    and a1, s6, a3
+; RV64IM-NEXT:    sd a1, 184(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui a3, 8
+; RV64IM-NEXT:    and a1, s6, a3
+; RV64IM-NEXT:    sd a1, 168(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui a1, 16
+; RV64IM-NEXT:    and a1, s6, a1
+; RV64IM-NEXT:    sd a1, 160(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui s9, 32
+; RV64IM-NEXT:    and a1, s6, s9
+; RV64IM-NEXT:    sd a1, 152(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui s11, 64
+; RV64IM-NEXT:    and a1, s6, s11
+; RV64IM-NEXT:    sd a1, 144(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui s10, 512
+; RV64IM-NEXT:    and a1, s6, s10
+; RV64IM-NEXT:    sd a1, 128(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui s8, 1024
+; RV64IM-NEXT:    and a1, s6, s8
+; RV64IM-NEXT:    sd a1, 120(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui s7, 2048
+; RV64IM-NEXT:    and a1, s6, s7
+; RV64IM-NEXT:    sd a1, 112(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui t1, 16384
+; RV64IM-NEXT:    and a1, s6, t1
+; RV64IM-NEXT:    sd a1, 96(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui t2, 32768
+; RV64IM-NEXT:    and t2, s6, t2
+; RV64IM-NEXT:    lui t3, 65536
+; RV64IM-NEXT:    and a1, s6, t3
+; RV64IM-NEXT:    sd a1, 88(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui t4, 131072
+; RV64IM-NEXT:    and a5, s6, t4
+; RV64IM-NEXT:    lui t5, 262144
+; RV64IM-NEXT:    and t0, s6, t5
+; RV64IM-NEXT:    and s11, s6, s0
+; RV64IM-NEXT:    ld a1, 304(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a1, s6, a1
+; RV64IM-NEXT:    sd a1, 304(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a1, s6, a7
+; RV64IM-NEXT:    sd a1, 72(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a1, s6, a6
+; RV64IM-NEXT:    sd a1, 64(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 288(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a1, s6, a1
+; RV64IM-NEXT:    sd a1, 56(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a1, s6, s3
+; RV64IM-NEXT:    sd a1, 48(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a1, s6, a2
+; RV64IM-NEXT:    sd a1, 40(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and ra, s6, ra
+; RV64IM-NEXT:    and a1, s6, a4
+; RV64IM-NEXT:    sd a1, 32(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a1, s6, t6
+; RV64IM-NEXT:    sd a1, 24(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a1, s6, s1
+; RV64IM-NEXT:    sd a1, 16(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a1, s6, s2
+; RV64IM-NEXT:    sd a1, 8(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a1, s6, s4
+; RV64IM-NEXT:    sd a1, 0(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and s0, s6, s5
+; RV64IM-NEXT:    ld a1, 232(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s1, s6, a1
+; RV64IM-NEXT:    ld a1, 224(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s2, s6, a1
+; RV64IM-NEXT:    ld a1, 216(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s3, s6, a1
+; RV64IM-NEXT:    ld a1, 208(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s4, s6, a1
+; RV64IM-NEXT:    ld a1, 200(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s5, s6, a1
+; RV64IM-NEXT:    ld a1, 176(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s7, s6, a1
+; RV64IM-NEXT:    ld a1, 136(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s8, s6, a1
+; RV64IM-NEXT:    ld a1, 104(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s9, s6, a1
+; RV64IM-NEXT:    ld a1, 80(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s10, s6, a1
+; RV64IM-NEXT:    andi a1, s6, 64
+; RV64IM-NEXT:    andi a2, s6, 128
+; RV64IM-NEXT:    andi a3, s6, 1024
+; RV64IM-NEXT:    srliw a4, s6, 31
+; RV64IM-NEXT:    srli s6, s6, 63
+; RV64IM-NEXT:    mul t4, a0, a1
+; RV64IM-NEXT:    mul a1, a0, a2
+; RV64IM-NEXT:    sd a1, 216(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul t3, a0, a3
+; RV64IM-NEXT:    ld a1, 192(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 136(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 184(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 208(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 168(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul t1, a0, a1
+; RV64IM-NEXT:    ld a1, 160(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 104(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 152(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 184(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 144(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 288(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 128(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a7, a0, a1
+; RV64IM-NEXT:    ld a1, 120(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul t6, a0, a1
+; RV64IM-NEXT:    ld a1, 112(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 168(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 96(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a6, a0, a1
+; RV64IM-NEXT:    mul t5, a0, t2
+; RV64IM-NEXT:    ld a1, 88(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 160(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul a1, a0, a5
+; RV64IM-NEXT:    sd a1, 176(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul a1, a0, t0
+; RV64IM-NEXT:    sd a1, 232(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a4, a4, 31
+; RV64IM-NEXT:    mul a3, a0, s11
+; RV64IM-NEXT:    ld a1, 304(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul t0, a0, a1
+; RV64IM-NEXT:    ld a1, 72(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 144(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 64(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 152(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 56(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 200(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 48(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 224(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 40(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a2, a0, a1
+; RV64IM-NEXT:    mul a5, a0, ra
+; RV64IM-NEXT:    ld a1, 32(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul t2, a0, a1
+; RV64IM-NEXT:    ld a1, 24(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul s11, a0, a1
+; RV64IM-NEXT:    ld a1, 16(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul ra, a0, a1
+; RV64IM-NEXT:    ld a1, 8(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 192(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 0(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    mul s0, a0, s0
+; RV64IM-NEXT:    mul s1, a0, s1
+; RV64IM-NEXT:    mul s2, a0, s2
+; RV64IM-NEXT:    mul s3, a0, s3
+; RV64IM-NEXT:    mul s4, a0, s4
+; RV64IM-NEXT:    mul s5, a0, s5
+; RV64IM-NEXT:    mul s7, a0, s7
+; RV64IM-NEXT:    mul s8, a0, s8
+; RV64IM-NEXT:    mul s9, a0, s9
+; RV64IM-NEXT:    mul s10, a0, s10
+; RV64IM-NEXT:    slli s6, s6, 63
+; RV64IM-NEXT:    mul a4, a0, a4
+; RV64IM-NEXT:    mul a0, a0, s6
+; RV64IM-NEXT:    sd a0, 304(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld s6, 336(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld a0, 328(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor s6, s6, a0
+; RV64IM-NEXT:    ld a0, 320(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t4, a0, t4
+; RV64IM-NEXT:    ld a0, 312(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t3, a0, t3
+; RV64IM-NEXT:    ld a0, 296(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t1, a0, t1
+; RV64IM-NEXT:    ld a0, 280(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a7, a0, a7
+; RV64IM-NEXT:    ld a0, 272(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a6, a0, a6
+; RV64IM-NEXT:    ld a0, 264(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a3, a0, a3
+; RV64IM-NEXT:    ld a0, 256(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a2, a0, a2
+; RV64IM-NEXT:    ld a0, 248(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a1, a0, a1
+; RV64IM-NEXT:    ld a0, 240(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor s5, a0, s5
+; RV64IM-NEXT:    xor t4, s6, t4
+; RV64IM-NEXT:    ld a0, 136(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t3, t3, a0
+; RV64IM-NEXT:    ld a0, 104(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t1, t1, a0
+; RV64IM-NEXT:    xor a7, a7, t6
+; RV64IM-NEXT:    xor a6, a6, t5
+; RV64IM-NEXT:    xor a3, a3, t0
+; RV64IM-NEXT:    xor a2, a2, a5
+; RV64IM-NEXT:    xor a1, a1, s0
+; RV64IM-NEXT:    xor a5, s5, s7
+; RV64IM-NEXT:    ld a0, 216(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t0, t4, a0
+; RV64IM-NEXT:    ld a0, 208(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t3, t3, a0
+; RV64IM-NEXT:    ld a0, 184(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t1, t1, a0
+; RV64IM-NEXT:    ld a0, 168(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a7, a7, a0
+; RV64IM-NEXT:    ld a0, 160(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a6, a6, a0
+; RV64IM-NEXT:    ld a0, 144(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a3, a3, a0
+; RV64IM-NEXT:    xor a2, a2, t2
+; RV64IM-NEXT:    xor a1, a1, s1
+; RV64IM-NEXT:    xor a5, a5, s8
+; RV64IM-NEXT:    ld a0, 288(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t1, t1, a0
+; RV64IM-NEXT:    ld a0, 176(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a6, a6, a0
+; RV64IM-NEXT:    ld a0, 152(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a3, a3, a0
+; RV64IM-NEXT:    xor a2, a2, s11
+; RV64IM-NEXT:    xor a1, a1, s2
+; RV64IM-NEXT:    xor a5, a5, s9
+; RV64IM-NEXT:    xor t2, t0, t3
+; RV64IM-NEXT:    xor t1, t2, t1
+; RV64IM-NEXT:    ld a0, 232(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a6, a6, a0
+; RV64IM-NEXT:    ld a0, 200(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a3, a3, a0
+; RV64IM-NEXT:    xor a2, a2, ra
+; RV64IM-NEXT:    xor a1, a1, s3
+; RV64IM-NEXT:    xor a5, a5, s10
+; RV64IM-NEXT:    xor a7, t1, a7
+; RV64IM-NEXT:    xor a4, a6, a4
+; RV64IM-NEXT:    ld a0, 224(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a3, a3, a0
+; RV64IM-NEXT:    ld a0, 192(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a2, a2, a0
+; RV64IM-NEXT:    xor a1, a1, s4
+; RV64IM-NEXT:    lui a6, %hi(.LCPI17_0)
+; RV64IM-NEXT:    ld a0, 304(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a0, a5, a0
+; RV64IM-NEXT:    lui a5, 5
+; RV64IM-NEXT:    ld a6, %lo(.LCPI17_0)(a6)
+; RV64IM-NEXT:    addi a5, a5, 1365
+; RV64IM-NEXT:    slli t0, t0, 56
+; RV64IM-NEXT:    xor a4, a7, a4
+; RV64IM-NEXT:    ld t2, 344(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a7, a7, t2
+; RV64IM-NEXT:    slli a7, a7, 40
+; RV64IM-NEXT:    xor a3, a4, a3
+; RV64IM-NEXT:    or a4, t0, a7
+; RV64IM-NEXT:    lui t1, 4080
+; RV64IM-NEXT:    and a7, a3, t1
+; RV64IM-NEXT:    xor a2, a3, a2
+; RV64IM-NEXT:    srli a3, a3, 8
+; RV64IM-NEXT:    slli a7, a7, 24
+; RV64IM-NEXT:    xor a1, a2, a1
+; RV64IM-NEXT:    ld t0, 368(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a3, a3, t0
+; RV64IM-NEXT:    srli a2, a2, 24
+; RV64IM-NEXT:    srliw t0, a1, 24
+; RV64IM-NEXT:    and a2, a2, t1
+; RV64IM-NEXT:    srli t1, a1, 40
+; RV64IM-NEXT:    xor a0, a1, a0
+; RV64IM-NEXT:    slli t0, t0, 32
+; RV64IM-NEXT:    or a2, a3, a2
+; RV64IM-NEXT:    and a1, t1, t2
+; RV64IM-NEXT:    srli a0, a0, 56
+; RV64IM-NEXT:    or a3, a7, t0
+; RV64IM-NEXT:    or a0, a1, a0
+; RV64IM-NEXT:    or a3, a4, a3
+; RV64IM-NEXT:    or a0, a2, a0
+; RV64IM-NEXT:    or a0, a3, a0
+; RV64IM-NEXT:    srli a1, a0, 4
+; RV64IM-NEXT:    ld a2, 360(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a0, a0, a2
+; RV64IM-NEXT:    and a1, a1, a2
+; RV64IM-NEXT:    slli a0, a0, 4
+; RV64IM-NEXT:    or a0, a1, a0
+; RV64IM-NEXT:    srli a1, a0, 2
+; RV64IM-NEXT:    ld a2, 352(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a0, a0, a2
+; RV64IM-NEXT:    and a1, a1, a2
+; RV64IM-NEXT:    slli a0, a0, 2
+; RV64IM-NEXT:    or a0, a1, a0
+; RV64IM-NEXT:    srli a1, a0, 1
+; RV64IM-NEXT:    and a0, a0, a5
+; RV64IM-NEXT:    and a1, a1, a6
+; RV64IM-NEXT:    slli a0, a0, 1
+; RV64IM-NEXT:    or a0, a1, a0
+; RV64IM-NEXT:    slli a0, a0, 47
+; RV64IM-NEXT:    srli a0, a0, 48
+; RV64IM-NEXT:    ld ra, 472(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s0, 464(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s1, 456(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s2, 448(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s3, 440(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s4, 432(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s5, 424(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s6, 416(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s7, 408(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s8, 400(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s9, 392(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s10, 384(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s11, 376(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    addi sp, sp, 480
+; RV64IM-NEXT:    ret
+  %a.ext = zext i16 %a to i32
+  %b.ext = zext i16 %b to i32
+  %clmul = call i32 @llvm.clmul.i32(i32 %a.ext, i32 %b.ext)
+  %res.ext = lshr i32 %clmul, 16
+  %res = trunc i32 %res.ext to i16
+  ret i16 %res
+}
+
+define i32 @clmulh_i32(i32 %a, i32 %b) nounwind {
+; RV32IM-LABEL: clmulh_i32:
+; RV32IM:       # %bb.0:
+; RV32IM-NEXT:    addi sp, sp, -144
+; RV32IM-NEXT:    sw ra, 140(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s0, 136(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s1, 132(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s2, 128(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s3, 124(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s4, 120(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s5, 116(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s6, 112(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s7, 108(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s8, 104(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s9, 100(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s10, 96(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    sw s11, 92(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    srli t0, a0, 8
+; RV32IM-NEXT:    lui a3, 16
+; RV32IM-NEXT:    srli t1, a0, 24
+; RV32IM-NEXT:    slli a2, a0, 24
+; RV32IM-NEXT:    lui s1, 61681
+; RV32IM-NEXT:    lui s3, 209715
+; RV32IM-NEXT:    lui a6, 349525
+; RV32IM-NEXT:    srli t4, a1, 8
+; RV32IM-NEXT:    srli t6, a1, 24
+; RV32IM-NEXT:    slli a4, a1, 24
+; RV32IM-NEXT:    li t3, 1
+; RV32IM-NEXT:    lui s11, 2
+; RV32IM-NEXT:    lui t2, 4
+; RV32IM-NEXT:    lui s10, 8
+; RV32IM-NEXT:    lui t5, 32
+; RV32IM-NEXT:    lui s0, 64
+; RV32IM-NEXT:    lui s2, 128
+; RV32IM-NEXT:    lui s4, 256
+; RV32IM-NEXT:    lui s5, 512
+; RV32IM-NEXT:    lui s6, 1024
+; RV32IM-NEXT:    lui s7, 2048
+; RV32IM-NEXT:    lui s8, 4096
+; RV32IM-NEXT:    lui s9, 8192
+; RV32IM-NEXT:    lui ra, 16384
+; RV32IM-NEXT:    addi a3, a3, -256
+; RV32IM-NEXT:    lui a5, 16
+; RV32IM-NEXT:    and t0, t0, a3
+; RV32IM-NEXT:    or t1, t0, t1
+; RV32IM-NEXT:    lui a7, 32768
+; RV32IM-NEXT:    and t4, t4, a3
+; RV32IM-NEXT:    or t6, t4, t6
+; RV32IM-NEXT:    lui t0, 65536
+; RV32IM-NEXT:    and a0, a0, a3
+; RV32IM-NEXT:    mv t4, a3
+; RV32IM-NEXT:    sw a3, 88(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    slli a0, a0, 8
+; RV32IM-NEXT:    or a2, a2, a0
+; RV32IM-NEXT:    lui a3, 131072
+; RV32IM-NEXT:    and a1, a1, t4
+; RV32IM-NEXT:    slli a1, a1, 8
+; RV32IM-NEXT:    or a0, a4, a1
+; RV32IM-NEXT:    lui a1, 262144
+; RV32IM-NEXT:    addi s1, s1, -241
+; RV32IM-NEXT:    addi s3, s3, 819
+; RV32IM-NEXT:    or a2, a2, t1
+; RV32IM-NEXT:    addi a4, a6, 1365
+; RV32IM-NEXT:    sw a4, 84(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    or a0, a0, t6
+; RV32IM-NEXT:    srli a6, a2, 4
+; RV32IM-NEXT:    and a2, a2, s1
+; RV32IM-NEXT:    and a6, a6, s1
+; RV32IM-NEXT:    slli a2, a2, 4
+; RV32IM-NEXT:    or a2, a6, a2
+; RV32IM-NEXT:    srli a6, a0, 4
+; RV32IM-NEXT:    and a0, a0, s1
+; RV32IM-NEXT:    and a6, a6, s1
+; RV32IM-NEXT:    slli a0, a0, 4
+; RV32IM-NEXT:    or a0, a6, a0
+; RV32IM-NEXT:    srli a6, a2, 2
+; RV32IM-NEXT:    and a2, a2, s3
+; RV32IM-NEXT:    and a6, a6, s3
+; RV32IM-NEXT:    slli a2, a2, 2
+; RV32IM-NEXT:    or a2, a6, a2
+; RV32IM-NEXT:    srli a6, a0, 2
+; RV32IM-NEXT:    and a0, a0, s3
+; RV32IM-NEXT:    and a6, a6, s3
+; RV32IM-NEXT:    slli a0, a0, 2
+; RV32IM-NEXT:    or a0, a6, a0
+; RV32IM-NEXT:    srli a6, a2, 1
+; RV32IM-NEXT:    and a2, a2, a4
+; RV32IM-NEXT:    and a6, a6, a4
+; RV32IM-NEXT:    slli a2, a2, 1
+; RV32IM-NEXT:    or a6, a6, a2
+; RV32IM-NEXT:    srli a2, a0, 1
+; RV32IM-NEXT:    and a0, a0, a4
+; RV32IM-NEXT:    and a2, a2, a4
+; RV32IM-NEXT:    slli a0, a0, 1
+; RV32IM-NEXT:    or a0, a2, a0
+; RV32IM-NEXT:    lui a2, 524288
+; RV32IM-NEXT:    slli t3, t3, 11
+; RV32IM-NEXT:    and t3, a0, t3
+; RV32IM-NEXT:    lui a4, 1
+; RV32IM-NEXT:    and t4, a0, a4
+; RV32IM-NEXT:    and s11, a0, s11
+; RV32IM-NEXT:    and a4, a0, t2
+; RV32IM-NEXT:    sw a4, 80(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a4, a0, s10
+; RV32IM-NEXT:    sw a4, 72(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a5, a0, a5
+; RV32IM-NEXT:    sw a5, 68(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a4, a0, t5
+; RV32IM-NEXT:    sw a4, 64(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and s0, a0, s0
+; RV32IM-NEXT:    and a4, a0, s2
+; RV32IM-NEXT:    sw a4, 60(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and s4, a0, s4
+; RV32IM-NEXT:    and a4, a0, s5
+; RV32IM-NEXT:    sw a4, 56(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a4, a0, s6
+; RV32IM-NEXT:    sw a4, 52(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a4, a0, s7
+; RV32IM-NEXT:    sw a4, 48(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a4, a0, s8
+; RV32IM-NEXT:    sw a4, 44(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a4, a0, s9
+; RV32IM-NEXT:    sw a4, 40(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a4, a0, ra
+; RV32IM-NEXT:    sw a4, 36(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a4, a0, a7
+; RV32IM-NEXT:    sw a4, 32(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a4, a0, t0
+; RV32IM-NEXT:    sw a4, 28(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a3, a0, a3
+; RV32IM-NEXT:    sw a3, 24(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a1, a0, a1
+; RV32IM-NEXT:    sw a1, 20(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    and a2, a0, a2
+; RV32IM-NEXT:    sw a2, 16(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    andi ra, a0, 2
+; RV32IM-NEXT:    andi a1, a0, 1
+; RV32IM-NEXT:    andi a2, a0, 4
+; RV32IM-NEXT:    andi a3, a0, 8
+; RV32IM-NEXT:    andi a4, a0, 16
+; RV32IM-NEXT:    andi a5, a0, 32
+; RV32IM-NEXT:    andi a7, a0, 64
+; RV32IM-NEXT:    andi t0, a0, 128
+; RV32IM-NEXT:    andi t1, a0, 256
+; RV32IM-NEXT:    andi t2, a0, 512
+; RV32IM-NEXT:    andi a0, a0, 1024
+; RV32IM-NEXT:    mul ra, a6, ra
+; RV32IM-NEXT:    mul s10, a6, a1
+; RV32IM-NEXT:    mul s9, a6, a2
+; RV32IM-NEXT:    mul s5, a6, a3
+; RV32IM-NEXT:    mul s6, a6, a4
+; RV32IM-NEXT:    mul s2, a6, a5
+; RV32IM-NEXT:    mul a1, a6, a7
+; RV32IM-NEXT:    sw a1, 4(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul a1, a6, t0
+; RV32IM-NEXT:    sw a1, 76(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul t6, a6, t1
+; RV32IM-NEXT:    mul t2, a6, t2
+; RV32IM-NEXT:    mul s7, a6, a0
+; RV32IM-NEXT:    mul a0, a6, t3
+; RV32IM-NEXT:    sw a0, 8(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul a0, a6, t4
+; RV32IM-NEXT:    sw a0, 12(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul t1, a6, s11
+; RV32IM-NEXT:    lw a0, 80(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a7, a6, a0
+; RV32IM-NEXT:    lw a0, 72(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul t5, a6, a0
+; RV32IM-NEXT:    lw a0, 68(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul s8, a6, a0
+; RV32IM-NEXT:    lw a0, 64(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a0, a6, a0
+; RV32IM-NEXT:    sw a0, 68(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    mul a0, a6, s0
+; RV32IM-NEXT:    sw a0, 72(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw a0, 60(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a3, a6, a0
+; RV32IM-NEXT:    mul a2, a6, s4
+; RV32IM-NEXT:    lw a0, 56(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a5, a6, a0
+; RV32IM-NEXT:    lw a0, 52(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul t3, a6, a0
+; RV32IM-NEXT:    lw a0, 48(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul s4, a6, a0
+; RV32IM-NEXT:    lw a0, 44(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a1, a6, a0
+; RV32IM-NEXT:    lw a0, 40(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a0, a6, a0
+; RV32IM-NEXT:    lw a4, 36(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a4, a6, a4
+; RV32IM-NEXT:    lw t0, 32(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul t0, a6, t0
+; RV32IM-NEXT:    lw t4, 28(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul t4, a6, t4
+; RV32IM-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul s0, a6, s0
+; RV32IM-NEXT:    lw s11, 20(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul s11, a6, s11
+; RV32IM-NEXT:    sw s11, 80(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:    lw s11, 16(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    mul a6, a6, s11
+; RV32IM-NEXT:    xor s10, s10, ra
+; RV32IM-NEXT:    xor s5, s9, s5
+; RV32IM-NEXT:    xor s2, s6, s2
+; RV32IM-NEXT:    xor t2, t6, t2
+; RV32IM-NEXT:    xor a7, t1, a7
+; RV32IM-NEXT:    xor a2, a3, a2
+; RV32IM-NEXT:    xor a0, a1, a0
+; RV32IM-NEXT:    xor a1, s10, s5
+; RV32IM-NEXT:    lw a3, 4(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a3, s2, a3
+; RV32IM-NEXT:    xor t1, t2, s7
+; RV32IM-NEXT:    xor a7, a7, t5
+; RV32IM-NEXT:    xor a2, a2, a5
+; RV32IM-NEXT:    xor a0, a0, a4
+; RV32IM-NEXT:    xor a1, a1, a3
+; RV32IM-NEXT:    lw a3, 8(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a3, t1, a3
+; RV32IM-NEXT:    xor a4, a7, s8
+; RV32IM-NEXT:    xor a2, a2, t3
+; RV32IM-NEXT:    xor a0, a0, t0
+; RV32IM-NEXT:    lw a5, 76(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a1, a1, a5
+; RV32IM-NEXT:    lw a5, 12(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a3, a3, a5
+; RV32IM-NEXT:    lw a5, 68(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a4, a4, a5
+; RV32IM-NEXT:    xor a2, a2, s4
+; RV32IM-NEXT:    xor a0, a0, t4
+; RV32IM-NEXT:    lw a5, 72(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a4, a4, a5
+; RV32IM-NEXT:    xor a0, a0, s0
+; RV32IM-NEXT:    lui a5, 349525
+; RV32IM-NEXT:    addi a5, a5, 1364
+; RV32IM-NEXT:    xor a3, a1, a3
+; RV32IM-NEXT:    slli a1, a1, 24
+; RV32IM-NEXT:    xor a3, a3, a4
+; RV32IM-NEXT:    lw a4, 80(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    xor a0, a0, a4
+; RV32IM-NEXT:    xor a2, a3, a2
+; RV32IM-NEXT:    xor a0, a0, a6
+; RV32IM-NEXT:    lw a6, 88(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    and a3, a2, a6
+; RV32IM-NEXT:    srli a4, a2, 8
+; RV32IM-NEXT:    xor a0, a2, a0
+; RV32IM-NEXT:    slli a3, a3, 8
+; RV32IM-NEXT:    and a2, a4, a6
+; RV32IM-NEXT:    srli a0, a0, 24
+; RV32IM-NEXT:    or a1, a1, a3
+; RV32IM-NEXT:    or a0, a2, a0
+; RV32IM-NEXT:    or a0, a1, a0
+; RV32IM-NEXT:    srli a1, a0, 4
+; RV32IM-NEXT:    and a0, a0, s1
+; RV32IM-NEXT:    and a1, a1, s1
+; RV32IM-NEXT:    slli a0, a0, 4
+; RV32IM-NEXT:    or a0, a1, a0
+; RV32IM-NEXT:    srli a1, a0, 2
+; RV32IM-NEXT:    and a0, a0, s3
+; RV32IM-NEXT:    and a1, a1, s3
+; RV32IM-NEXT:    slli a0, a0, 2
+; RV32IM-NEXT:    or a0, a1, a0
+; RV32IM-NEXT:    srli a1, a0, 1
+; RV32IM-NEXT:    lw a2, 84(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    and a0, a0, a2
+; RV32IM-NEXT:    and a1, a1, a5
+; RV32IM-NEXT:    slli a0, a0, 1
+; RV32IM-NEXT:    or a0, a1, a0
+; RV32IM-NEXT:    srli a0, a0, 1
+; RV32IM-NEXT:    lw ra, 140(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s0, 136(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s1, 132(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s2, 128(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s3, 124(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s4, 120(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s5, 116(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s6, 112(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s7, 108(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s8, 104(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s9, 100(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s10, 96(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s11, 92(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    addi sp, sp, 144
+; RV32IM-NEXT:    ret
+;
+; RV64IM-LABEL: clmulh_i32:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    addi sp, sp, -512
+; RV64IM-NEXT:    sd ra, 504(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s0, 496(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s1, 488(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s2, 480(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s3, 472(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s4, 464(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s5, 456(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s6, 448(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s7, 440(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s8, 432(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s9, 424(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s10, 416(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    sd s11, 408(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    srli a3, a0, 24
+; RV64IM-NEXT:    srli t0, a0, 8
+; RV64IM-NEXT:    li s1, 255
+; RV64IM-NEXT:    srli a5, a0, 40
+; RV64IM-NEXT:    lui a4, 16
+; RV64IM-NEXT:    srli t2, a0, 56
+; RV64IM-NEXT:    srliw t3, a0, 24
+; RV64IM-NEXT:    slli a2, a0, 56
+; RV64IM-NEXT:    lui t4, 61681
+; RV64IM-NEXT:    lui s0, 209715
+; RV64IM-NEXT:    lui s9, 349525
+; RV64IM-NEXT:    srli s7, a1, 24
+; RV64IM-NEXT:    srli s5, a1, 8
+; RV64IM-NEXT:    srli t5, a1, 40
+; RV64IM-NEXT:    srli a7, a1, 56
+; RV64IM-NEXT:    srliw ra, a1, 24
+; RV64IM-NEXT:    slli a6, a1, 56
+; RV64IM-NEXT:    li t1, 1
+; RV64IM-NEXT:    lui s11, 128
+; RV64IM-NEXT:    lui s2, 256
+; RV64IM-NEXT:    lui s3, 4096
+; RV64IM-NEXT:    lui t6, 8192
+; RV64IM-NEXT:    lui s8, 4080
+; RV64IM-NEXT:    and a3, a3, s8
+; RV64IM-NEXT:    slli s1, s1, 24
+; RV64IM-NEXT:    addi s10, a4, -256
+; RV64IM-NEXT:    and t0, t0, s1
+; RV64IM-NEXT:    sd s1, 400(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    or a3, t0, a3
+; RV64IM-NEXT:    and t0, a0, s8
+; RV64IM-NEXT:    slli t3, t3, 32
+; RV64IM-NEXT:    addi s4, t4, -241
+; RV64IM-NEXT:    addi s6, s0, 819
+; RV64IM-NEXT:    addi a4, s9, 1365
+; RV64IM-NEXT:    and t4, s7, s8
+; RV64IM-NEXT:    and a5, a5, s10
+; RV64IM-NEXT:    or a5, a5, t2
+; RV64IM-NEXT:    and t2, a1, s8
+; RV64IM-NEXT:    slli s0, ra, 32
+; RV64IM-NEXT:    slli t0, t0, 24
+; RV64IM-NEXT:    or s9, t0, t3
+; RV64IM-NEXT:    slli t0, s4, 32
+; RV64IM-NEXT:    add s4, s4, t0
+; RV64IM-NEXT:    slli t0, s6, 32
+; RV64IM-NEXT:    add s6, s6, t0
+; RV64IM-NEXT:    slli s7, t1, 11
+; RV64IM-NEXT:    and t0, s5, s1
+; RV64IM-NEXT:    or t0, t0, t4
+; RV64IM-NEXT:    slli t4, t1, 32
+; RV64IM-NEXT:    and t3, t5, s10
+; RV64IM-NEXT:    or a7, t3, a7
+; RV64IM-NEXT:    slli ra, t1, 33
+; RV64IM-NEXT:    slli t2, t2, 24
+; RV64IM-NEXT:    or t2, t2, s0
+; RV64IM-NEXT:    slli s0, t1, 34
+; RV64IM-NEXT:    or a3, a3, a5
+; RV64IM-NEXT:    slli s1, t1, 35
+; RV64IM-NEXT:    sd s10, 368(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a0, a0, s10
+; RV64IM-NEXT:    slli a0, a0, 40
+; RV64IM-NEXT:    or a0, a2, a0
+; RV64IM-NEXT:    slli a2, t1, 36
+; RV64IM-NEXT:    sd a2, 320(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    or a2, t0, a7
+; RV64IM-NEXT:    slli a7, t1, 37
+; RV64IM-NEXT:    and a1, a1, s10
+; RV64IM-NEXT:    slli a1, a1, 40
+; RV64IM-NEXT:    or a1, a6, a1
+; RV64IM-NEXT:    sd a4, 392(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a5, a4, 32
+; RV64IM-NEXT:    add a5, a4, a5
+; RV64IM-NEXT:    or a0, a0, s9
+; RV64IM-NEXT:    or a1, a1, t2
+; RV64IM-NEXT:    or a0, a0, a3
+; RV64IM-NEXT:    or a1, a1, a2
+; RV64IM-NEXT:    srli a2, a0, 4
+; RV64IM-NEXT:    sd s4, 384(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a0, a0, s4
+; RV64IM-NEXT:    srli a3, a1, 4
+; RV64IM-NEXT:    and a1, a1, s4
+; RV64IM-NEXT:    and a2, a2, s4
+; RV64IM-NEXT:    slli a0, a0, 4
+; RV64IM-NEXT:    and a3, a3, s4
+; RV64IM-NEXT:    slli a1, a1, 4
+; RV64IM-NEXT:    or a0, a2, a0
+; RV64IM-NEXT:    or a1, a3, a1
+; RV64IM-NEXT:    srli a2, a0, 2
+; RV64IM-NEXT:    sd s6, 376(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a0, a0, s6
+; RV64IM-NEXT:    srli a3, a1, 2
+; RV64IM-NEXT:    and a1, a1, s6
+; RV64IM-NEXT:    and a2, a2, s6
+; RV64IM-NEXT:    slli a0, a0, 2
+; RV64IM-NEXT:    and a3, a3, s6
+; RV64IM-NEXT:    slli a1, a1, 2
+; RV64IM-NEXT:    or a0, a2, a0
+; RV64IM-NEXT:    or a1, a3, a1
+; RV64IM-NEXT:    srli a2, a0, 1
+; RV64IM-NEXT:    and a0, a0, a5
+; RV64IM-NEXT:    srli a3, a1, 1
+; RV64IM-NEXT:    and a1, a1, a5
+; RV64IM-NEXT:    and a2, a2, a5
+; RV64IM-NEXT:    slli a0, a0, 1
+; RV64IM-NEXT:    and a3, a3, a5
+; RV64IM-NEXT:    slli a1, a1, 1
+; RV64IM-NEXT:    or a0, a2, a0
+; RV64IM-NEXT:    or s5, a3, a1
+; RV64IM-NEXT:    andi a1, s5, 2
+; RV64IM-NEXT:    andi a2, s5, 1
+; RV64IM-NEXT:    andi a3, s5, 4
+; RV64IM-NEXT:    andi a5, s5, 8
+; RV64IM-NEXT:    andi a6, s5, 16
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    xor a1, a2, a1
+; RV64IM-NEXT:    sd a1, 360(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    andi a1, s5, 32
+; RV64IM-NEXT:    mul a2, a0, a3
+; RV64IM-NEXT:    mul a3, a0, a5
+; RV64IM-NEXT:    xor a2, a2, a3
+; RV64IM-NEXT:    sd a2, 352(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    andi a2, s5, 256
+; RV64IM-NEXT:    mul a3, a0, a6
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    xor a1, a3, a1
+; RV64IM-NEXT:    sd a1, 344(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    andi a1, s5, 512
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    xor a1, a2, a1
+; RV64IM-NEXT:    sd a1, 336(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli s4, t1, 38
+; RV64IM-NEXT:    lui a1, 2
+; RV64IM-NEXT:    and a1, s5, a1
+; RV64IM-NEXT:    lui a2, 4
+; RV64IM-NEXT:    and a2, s5, a2
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    xor a1, a1, a2
+; RV64IM-NEXT:    sd a1, 328(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, t1, 40
+; RV64IM-NEXT:    and a2, s5, s11
+; RV64IM-NEXT:    and a3, s5, s2
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    mul a3, a0, a3
+; RV64IM-NEXT:    xor a2, a2, a3
+; RV64IM-NEXT:    sd a2, 312(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a2, t1, 41
+; RV64IM-NEXT:    and a3, s5, s3
+; RV64IM-NEXT:    and a4, s5, t6
+; RV64IM-NEXT:    mul a3, a0, a3
+; RV64IM-NEXT:    mul a4, a0, a4
+; RV64IM-NEXT:    xor a3, a3, a4
+; RV64IM-NEXT:    sd a3, 304(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a3, t1, 48
+; RV64IM-NEXT:    and a4, s5, t4
+; RV64IM-NEXT:    and a5, s5, ra
+; RV64IM-NEXT:    mul a4, a0, a4
+; RV64IM-NEXT:    mul a5, a0, a5
+; RV64IM-NEXT:    xor a4, a4, a5
+; RV64IM-NEXT:    sd a4, 296(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a4, t1, 49
+; RV64IM-NEXT:    and a1, s5, a1
+; RV64IM-NEXT:    and a2, s5, a2
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    xor a1, a1, a2
+; RV64IM-NEXT:    sd a1, 288(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, t1, 56
+; RV64IM-NEXT:    and a2, s5, a3
+; RV64IM-NEXT:    and a3, s5, a4
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    mul a3, a0, a3
+; RV64IM-NEXT:    xor a2, a2, a3
+; RV64IM-NEXT:    sd a2, 280(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a2, t1, 57
+; RV64IM-NEXT:    and a1, s5, a1
+; RV64IM-NEXT:    and a2, s5, a2
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    mul a2, a0, a2
+; RV64IM-NEXT:    xor a1, a1, a2
+; RV64IM-NEXT:    sd a1, 272(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli ra, t1, 39
+; RV64IM-NEXT:    slli a2, t1, 42
+; RV64IM-NEXT:    slli a4, t1, 43
+; RV64IM-NEXT:    slli s2, t1, 44
+; RV64IM-NEXT:    slli s3, t1, 45
+; RV64IM-NEXT:    slli s6, t1, 46
+; RV64IM-NEXT:    slli a1, t1, 47
+; RV64IM-NEXT:    sd a1, 264(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, t1, 50
+; RV64IM-NEXT:    sd a1, 256(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, t1, 51
+; RV64IM-NEXT:    sd a1, 248(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, t1, 52
+; RV64IM-NEXT:    sd a1, 240(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, t1, 53
+; RV64IM-NEXT:    sd a1, 232(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, t1, 54
+; RV64IM-NEXT:    sd a1, 224(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, t1, 55
+; RV64IM-NEXT:    sd a1, 216(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, t1, 58
+; RV64IM-NEXT:    sd a1, 208(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, t1, 59
+; RV64IM-NEXT:    sd a1, 192(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, t1, 60
+; RV64IM-NEXT:    sd a1, 152(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a1, t1, 61
+; RV64IM-NEXT:    sd a1, 128(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli t1, t1, 62
+; RV64IM-NEXT:    sd t1, 104(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and t1, s5, s7
+; RV64IM-NEXT:    lui a3, 1
+; RV64IM-NEXT:    and a1, s5, a3
+; RV64IM-NEXT:    sd a1, 200(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui a3, 8
+; RV64IM-NEXT:    and a1, s5, a3
+; RV64IM-NEXT:    sd a1, 184(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui a1, 16
+; RV64IM-NEXT:    and a1, s5, a1
+; RV64IM-NEXT:    sd a1, 176(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui s9, 32
+; RV64IM-NEXT:    and a1, s5, s9
+; RV64IM-NEXT:    sd a1, 168(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui s11, 64
+; RV64IM-NEXT:    and a1, s5, s11
+; RV64IM-NEXT:    sd a1, 160(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui s10, 512
+; RV64IM-NEXT:    and a1, s5, s10
+; RV64IM-NEXT:    sd a1, 144(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui s8, 1024
+; RV64IM-NEXT:    and a1, s5, s8
+; RV64IM-NEXT:    sd a1, 136(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui t0, 2048
+; RV64IM-NEXT:    and t0, s5, t0
+; RV64IM-NEXT:    lui t2, 16384
+; RV64IM-NEXT:    and t2, s5, t2
+; RV64IM-NEXT:    lui t3, 32768
+; RV64IM-NEXT:    and a1, s5, t3
+; RV64IM-NEXT:    sd a1, 120(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui t4, 65536
+; RV64IM-NEXT:    and a1, s5, t4
+; RV64IM-NEXT:    sd a1, 112(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    lui t5, 131072
+; RV64IM-NEXT:    and a5, s5, t5
+; RV64IM-NEXT:    lui t6, 262144
+; RV64IM-NEXT:    and a6, s5, t6
+; RV64IM-NEXT:    and s11, s5, s0
+; RV64IM-NEXT:    and t5, s5, s1
+; RV64IM-NEXT:    ld a1, 320(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and t6, s5, a1
+; RV64IM-NEXT:    and a1, s5, a7
+; RV64IM-NEXT:    sd a1, 320(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a1, s5, s4
+; RV64IM-NEXT:    sd a1, 96(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and ra, s5, ra
+; RV64IM-NEXT:    and a1, s5, a2
+; RV64IM-NEXT:    sd a1, 88(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a1, s5, a4
+; RV64IM-NEXT:    sd a1, 80(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a1, s5, s2
+; RV64IM-NEXT:    sd a1, 72(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a1, s5, s3
+; RV64IM-NEXT:    sd a1, 64(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    and a1, s5, s6
+; RV64IM-NEXT:    sd a1, 56(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 264(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a1, s5, a1
+; RV64IM-NEXT:    sd a1, 48(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 256(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a1, s5, a1
+; RV64IM-NEXT:    sd a1, 40(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 248(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a1, s5, a1
+; RV64IM-NEXT:    sd a1, 32(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 240(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a1, s5, a1
+; RV64IM-NEXT:    sd a1, 24(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 232(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a1, s5, a1
+; RV64IM-NEXT:    sd a1, 16(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 224(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a1, s5, a1
+; RV64IM-NEXT:    sd a1, 8(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 216(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s4, s5, a1
+; RV64IM-NEXT:    ld a1, 208(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s6, s5, a1
+; RV64IM-NEXT:    ld a1, 192(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s7, s5, a1
+; RV64IM-NEXT:    ld a1, 152(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s8, s5, a1
+; RV64IM-NEXT:    ld a1, 128(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s9, s5, a1
+; RV64IM-NEXT:    ld a1, 104(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and s10, s5, a1
+; RV64IM-NEXT:    andi a1, s5, 64
+; RV64IM-NEXT:    andi a2, s5, 128
+; RV64IM-NEXT:    andi a3, s5, 1024
+; RV64IM-NEXT:    srliw a4, s5, 31
+; RV64IM-NEXT:    srli t3, s5, 63
+; RV64IM-NEXT:    mul s2, a0, a1
+; RV64IM-NEXT:    mul a1, a0, a2
+; RV64IM-NEXT:    sd a1, 240(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul s0, a0, a3
+; RV64IM-NEXT:    mul a1, a0, t1
+; RV64IM-NEXT:    sd a1, 152(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 200(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 232(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 184(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul t4, a0, a1
+; RV64IM-NEXT:    ld a1, 176(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 128(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 168(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 208(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 160(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 264(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 144(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul t1, a0, a1
+; RV64IM-NEXT:    ld a1, 136(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul s3, a0, a1
+; RV64IM-NEXT:    mul a1, a0, t0
+; RV64IM-NEXT:    sd a1, 192(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul a7, a0, t2
+; RV64IM-NEXT:    ld a1, 120(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul s1, a0, a1
+; RV64IM-NEXT:    ld a1, 112(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 176(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul a1, a0, a5
+; RV64IM-NEXT:    sd a1, 200(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul a1, a0, a6
+; RV64IM-NEXT:    sd a1, 256(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    slli a4, a4, 31
+; RV64IM-NEXT:    mul a5, a0, s11
+; RV64IM-NEXT:    mul t2, a0, t5
+; RV64IM-NEXT:    mul s11, a0, t6
+; RV64IM-NEXT:    ld a1, 320(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 168(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 96(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 224(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    mul a1, a0, ra
+; RV64IM-NEXT:    sd a1, 248(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 88(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a2, a0, a1
+; RV64IM-NEXT:    ld a1, 80(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a6, a0, a1
+; RV64IM-NEXT:    ld a1, 72(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul t5, a0, a1
+; RV64IM-NEXT:    ld a1, 64(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul s5, a0, a1
+; RV64IM-NEXT:    ld a1, 56(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 184(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 48(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    sd a1, 216(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld a1, 40(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a1, a0, a1
+; RV64IM-NEXT:    ld a3, 32(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul a3, a0, a3
+; RV64IM-NEXT:    ld t0, 24(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul t0, a0, t0
+; RV64IM-NEXT:    ld t6, 16(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul t6, a0, t6
+; RV64IM-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    mul ra, a0, ra
+; RV64IM-NEXT:    mul s4, a0, s4
+; RV64IM-NEXT:    mul s6, a0, s6
+; RV64IM-NEXT:    mul s7, a0, s7
+; RV64IM-NEXT:    mul s8, a0, s8
+; RV64IM-NEXT:    mul s9, a0, s9
+; RV64IM-NEXT:    mul s10, a0, s10
+; RV64IM-NEXT:    slli t3, t3, 63
+; RV64IM-NEXT:    mul a4, a0, a4
+; RV64IM-NEXT:    mul a0, a0, t3
+; RV64IM-NEXT:    sd a0, 320(sp) # 8-byte Folded Spill
+; RV64IM-NEXT:    ld t3, 360(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld a0, 352(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t3, t3, a0
+; RV64IM-NEXT:    ld a0, 344(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor s2, a0, s2
+; RV64IM-NEXT:    ld a0, 336(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor s0, a0, s0
+; RV64IM-NEXT:    ld a0, 328(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t4, a0, t4
+; RV64IM-NEXT:    ld a0, 312(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t1, a0, t1
+; RV64IM-NEXT:    ld a0, 304(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a7, a0, a7
+; RV64IM-NEXT:    ld a0, 296(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a5, a0, a5
+; RV64IM-NEXT:    ld a0, 288(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a2, a0, a2
+; RV64IM-NEXT:    ld a0, 280(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a1, a0, a1
+; RV64IM-NEXT:    ld a0, 272(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor s6, a0, s6
+; RV64IM-NEXT:    xor t3, t3, s2
+; RV64IM-NEXT:    ld a0, 152(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor s0, s0, a0
+; RV64IM-NEXT:    ld a0, 128(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t4, t4, a0
+; RV64IM-NEXT:    xor t1, t1, s3
+; RV64IM-NEXT:    xor a7, a7, s1
+; RV64IM-NEXT:    xor a5, a5, t2
+; RV64IM-NEXT:    xor a2, a2, a6
+; RV64IM-NEXT:    xor a1, a1, a3
+; RV64IM-NEXT:    xor a3, s6, s7
+; RV64IM-NEXT:    ld a0, 240(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a6, t3, a0
+; RV64IM-NEXT:    ld a0, 232(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t2, s0, a0
+; RV64IM-NEXT:    ld a0, 208(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t3, t4, a0
+; RV64IM-NEXT:    ld a0, 192(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t1, t1, a0
+; RV64IM-NEXT:    ld a0, 176(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a7, a7, a0
+; RV64IM-NEXT:    xor a5, a5, s11
+; RV64IM-NEXT:    xor a2, a2, t5
+; RV64IM-NEXT:    xor a1, a1, t0
+; RV64IM-NEXT:    xor a3, a3, s8
+; RV64IM-NEXT:    ld a0, 264(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor t0, t3, a0
+; RV64IM-NEXT:    ld a0, 200(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a7, a7, a0
+; RV64IM-NEXT:    ld a0, 168(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a5, a5, a0
+; RV64IM-NEXT:    xor a2, a2, s5
+; RV64IM-NEXT:    xor a1, a1, t6
+; RV64IM-NEXT:    xor a3, a3, s9
+; RV64IM-NEXT:    xor t2, a6, t2
+; RV64IM-NEXT:    xor t0, t2, t0
+; RV64IM-NEXT:    ld a0, 256(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a7, a7, a0
+; RV64IM-NEXT:    ld a0, 224(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a5, a5, a0
+; RV64IM-NEXT:    ld a0, 184(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a2, a2, a0
+; RV64IM-NEXT:    xor a1, a1, ra
+; RV64IM-NEXT:    xor a3, a3, s10
+; RV64IM-NEXT:    xor t0, t0, t1
+; RV64IM-NEXT:    xor a4, a7, a4
+; RV64IM-NEXT:    ld a0, 248(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a5, a5, a0
+; RV64IM-NEXT:    ld a0, 216(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a2, a2, a0
+; RV64IM-NEXT:    xor a1, a1, s4
+; RV64IM-NEXT:    lui a7, %hi(.LCPI18_0)
+; RV64IM-NEXT:    ld a7, %lo(.LCPI18_0)(a7)
+; RV64IM-NEXT:    slli a6, a6, 56
+; RV64IM-NEXT:    ld a0, 320(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    xor a0, a3, a0
+; RV64IM-NEXT:    ld t1, 368(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a3, t0, t1
+; RV64IM-NEXT:    xor a4, t0, a4
+; RV64IM-NEXT:    slli a3, a3, 40
+; RV64IM-NEXT:    xor a4, a4, a5
+; RV64IM-NEXT:    or a3, a6, a3
+; RV64IM-NEXT:    lui t0, 4080
+; RV64IM-NEXT:    and a5, a4, t0
+; RV64IM-NEXT:    xor a2, a4, a2
+; RV64IM-NEXT:    srli a4, a4, 8
+; RV64IM-NEXT:    slli a5, a5, 24
+; RV64IM-NEXT:    xor a1, a2, a1
+; RV64IM-NEXT:    ld a6, 400(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a4, a4, a6
+; RV64IM-NEXT:    srli a2, a2, 24
+; RV64IM-NEXT:    srliw a6, a1, 24
+; RV64IM-NEXT:    and a2, a2, t0
+; RV64IM-NEXT:    srli t0, a1, 40
+; RV64IM-NEXT:    xor a0, a1, a0
+; RV64IM-NEXT:    slli a6, a6, 32
+; RV64IM-NEXT:    or a2, a4, a2
+; RV64IM-NEXT:    and a1, t0, t1
+; RV64IM-NEXT:    srli a0, a0, 56
+; RV64IM-NEXT:    or a4, a5, a6
+; RV64IM-NEXT:    or a0, a1, a0
+; RV64IM-NEXT:    or a3, a3, a4
+; RV64IM-NEXT:    or a0, a2, a0
+; RV64IM-NEXT:    or a0, a3, a0
+; RV64IM-NEXT:    srli a1, a0, 4
+; RV64IM-NEXT:    ld a2, 384(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a0, a0, a2
+; RV64IM-NEXT:    and a1, a1, a2
+; RV64IM-NEXT:    slli a0, a0, 4
+; RV64IM-NEXT:    or a0, a1, a0
+; RV64IM-NEXT:    srli a1, a0, 2
+; RV64IM-NEXT:    ld a2, 376(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a0, a0, a2
+; RV64IM-NEXT:    and a1, a1, a2
+; RV64IM-NEXT:    slli a0, a0, 2
+; RV64IM-NEXT:    or a0, a1, a0
+; RV64IM-NEXT:    srli a1, a0, 1
+; RV64IM-NEXT:    ld a2, 392(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    and a0, a0, a2
+; RV64IM-NEXT:    and a1, a1, a7
+; RV64IM-NEXT:    slli a0, a0, 1
+; RV64IM-NEXT:    or a0, a1, a0
+; RV64IM-NEXT:    slli a0, a0, 31
+; RV64IM-NEXT:    srli a0, a0, 32
+; RV64IM-NEXT:    ld ra, 504(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s0, 496(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s1, 488(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s2, 480(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s3, 472(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s4, 464(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s5, 456(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s6, 448(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s7, 440(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s8, 432(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s9, 424(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s10, 416(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    ld s11, 408(sp) # 8-byte Folded Reload
+; RV64IM-NEXT:    addi sp, sp, 512
+; RV64IM-NEXT:    ret
+  %a.ext = zext i32 %a to i64
+  %b.ext = zext i32 %b to i64
+  %clmul = call i64 @llvm.clmul.i64(i64 %a.ext, i64 %b.ext)
+  %res.ext = lshr i64 %clmul, 32
+  %res = trunc i64 %res.ext to i32
+  ret i32 %res
+}
+
+define i4 @clmulh_constfold_i4() nounwind {
+; CHECK-LABEL: clmulh_constfold_i4:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a0, 0
+; CHECK-NEXT:    ret
+  %clmul = call i8 @llvm.clmul.i8(i8 1, i8 2)
+  %res.ext = lshr i8 %clmul, 4
+  %res = trunc i8 %res.ext to i4
+  ret i4 %res
+}
+
+define i16 @clmulh_constfold_i16() nounwind {
+; CHECK-LABEL: clmulh_constfold_i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, 11
+; CHECK-NEXT:    addi a0, a0, -1366
+; CHECK-NEXT:    ret
+  %clmul = call i32 @llvm.clmul.i16(i32 -2, i32 -1)
+  %res.ext = lshr i32 %clmul, 16
+  %res = trunc i32 %res.ext to i16
+  ret i16 %res
+}

>From 26e6706998dd0de2a295649f0996dcaf39f929e0 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Thu, 20 Nov 2025 13:56:27 +0000
Subject: [PATCH 5/5] [ISel] Strip bad tests

---
 llvm/test/CodeGen/RISCV/clmul.ll | 46 --------------------------------
 1 file changed, 46 deletions(-)

diff --git a/llvm/test/CodeGen/RISCV/clmul.ll b/llvm/test/CodeGen/RISCV/clmul.ll
index 429d34a0f9851..8961e630700f3 100644
--- a/llvm/test/CodeGen/RISCV/clmul.ll
+++ b/llvm/test/CodeGen/RISCV/clmul.ll
@@ -7334,29 +7334,6 @@ define i32 @clmulr_i32(i32 %a, i32 %b) nounwind {
   ret i32 %res
 }
 
-define i4 @clmulr_constfold_i4() nounwind {
-; CHECK-LABEL: clmulr_constfold_i4:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    li a0, 0
-; CHECK-NEXT:    ret
-  %clmul = call i8 @llvm.clmul.i8(i8 1, i8 2)
-  %res.ext = lshr i8 %clmul, 3
-  %res = trunc i8 %res.ext to i4
-  ret i4 %res
-}
-
-define i16 @clmulr_constfold_i16() nounwind {
-; CHECK-LABEL: clmulr_constfold_i16:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, 5
-; CHECK-NEXT:    addi a0, a0, 1365
-; CHECK-NEXT:    ret
-  %clmul = call i32 @llvm.clmul.i16(i32 -2, i32 -1)
-  %res.ext = lshr i32 %clmul, 15
-  %res = trunc i32 %res.ext to i16
-  ret i16 %res
-}
-
 define i4 @clmulh_i4(i4 %a, i4 %b) nounwind {
 ; RV32IM-LABEL: clmulh_i4:
 ; RV32IM:       # %bb.0:
@@ -11483,26 +11460,3 @@ define i32 @clmulh_i32(i32 %a, i32 %b) nounwind {
   %res = trunc i64 %res.ext to i32
   ret i32 %res
 }
-
-define i4 @clmulh_constfold_i4() nounwind {
-; CHECK-LABEL: clmulh_constfold_i4:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    li a0, 0
-; CHECK-NEXT:    ret
-  %clmul = call i8 @llvm.clmul.i8(i8 1, i8 2)
-  %res.ext = lshr i8 %clmul, 4
-  %res = trunc i8 %res.ext to i4
-  ret i4 %res
-}
-
-define i16 @clmulh_constfold_i16() nounwind {
-; CHECK-LABEL: clmulh_constfold_i16:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, 11
-; CHECK-NEXT:    addi a0, a0, -1366
-; CHECK-NEXT:    ret
-  %clmul = call i32 @llvm.clmul.i16(i32 -2, i32 -1)
-  %res.ext = lshr i32 %clmul, 16
-  %res = trunc i32 %res.ext to i16
-  ret i16 %res
-}



More information about the llvm-commits mailing list