[llvm] 3b5430e - [RISCV] Add a VL output to vleff intrinsics.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Thu Jan 21 17:20:53 PST 2021


Author: Craig Topper
Date: 2021-01-21T17:19:58-08:00
New Revision: 3b5430eb0dad5b239d0671503f73f6b713aaaf40

URL: https://github.com/llvm/llvm-project/commit/3b5430eb0dad5b239d0671503f73f6b713aaaf40
DIFF: https://github.com/llvm/llvm-project/commit/3b5430eb0dad5b239d0671503f73f6b713aaaf40.diff

LOG: [RISCV] Add a VL output to vleff intrinsics.

The fault-only-first-load instructions can reduce VL if an element
other than element 0 triggers a memory fault. This can be used to
vectorize loops with data dependent exit conditions like strcmp or
strlen.

This patch adds a VL output to these intrinsics so that the new
VL value can be captured by software. This will be expanded to
'csrr gpr, vl' after the vleff instruction during SelectionDAG.

By doing this with one intrinsic we are able to guarantee that the
csrr reads the VL value produced by the vleff instruction. Having
it as a separate intrinsic would make it impossible to guarantee
ordering without making every other vector intrinsic have side
effects.

The intrinsics are expanded during lowering into two ISD nodes
that are glued together. These ISD nodes will go
through isel separately, but should maintain the glue so that they
get emitted adjacently by InstrEmitter.

I've only ran the chain through the vleff instruction, allowing
the READ_VL to be deleted if it is unused.

Reviewed By: HsiangKai

Differential Revision: https://reviews.llvm.org/D94286

Added: 
    

Modified: 
    llvm/include/llvm/IR/IntrinsicsRISCV.td
    llvm/lib/Target/RISCV/RISCVISelLowering.cpp
    llvm/lib/Target/RISCV/RISCVISelLowering.h
    llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
    llvm/lib/Target/RISCV/RISCVMCInstLower.cpp
    llvm/test/CodeGen/RISCV/rvv/vleff-rv32.ll
    llvm/test/CodeGen/RISCV/rvv/vleff-rv64.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td
index 38828baead12..659010399977 100644
--- a/llvm/include/llvm/IR/IntrinsicsRISCV.td
+++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td
@@ -101,6 +101,16 @@ let TargetPrefix = "riscv" in {
                     [LLVMPointerType<LLVMMatchType<0>>,
                      llvm_anyint_ty],
                     [NoCapture<ArgIndex<0>>, IntrReadMem]>, RISCVVIntrinsic;
+  // For unit stride fault-only-first load
+  // Input: (pointer, vl)
+  // Output: (data, vl)
+  // NOTE: We model this with default memory properties since we model writing
+  // VL as a side effect. IntrReadMem, IntrHasSideEffects does not work.
+  class RISCVUSLoadFF
+        : Intrinsic<[llvm_anyvector_ty, llvm_anyint_ty],
+                    [LLVMPointerType<LLVMMatchType<0>>, LLVMMatchType<1>],
+                    [NoCapture<ArgIndex<0>>]>,
+                    RISCVVIntrinsic;
   // For unit stride load with mask
   // Input: (maskedoff, pointer, mask, vl)
   class RISCVUSLoadMask
@@ -110,6 +120,18 @@ let TargetPrefix = "riscv" in {
                      LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
                      llvm_anyint_ty],
                     [NoCapture<ArgIndex<1>>, IntrReadMem]>, RISCVVIntrinsic;
+  // For unit stride fault-only-first load with mask
+  // Input: (maskedoff, pointer, mask, vl)
+  // Output: (data, vl)
+  // NOTE: We model this with default memory properties since we model writing
+  // VL as a side effect. IntrReadMem, IntrHasSideEffects does not work.
+  class RISCVUSLoadFFMask
+        : Intrinsic<[llvm_anyvector_ty, llvm_anyint_ty],
+                    [LLVMMatchType<0>,
+                     LLVMPointerType<LLVMMatchType<0>>,
+                     LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+                     LLVMMatchType<1>],
+                    [NoCapture<ArgIndex<1>>]>, RISCVVIntrinsic;
   // For strided load
   // Input: (pointer, stride, vl)
   class RISCVSLoad
@@ -564,6 +586,10 @@ let TargetPrefix = "riscv" in {
     def "int_riscv_" # NAME : RISCVUSLoad;
     def "int_riscv_" # NAME # "_mask" : RISCVUSLoadMask;
   }
+  multiclass RISCVUSLoadFF {
+    def "int_riscv_" # NAME : RISCVUSLoadFF;
+    def "int_riscv_" # NAME # "_mask" : RISCVUSLoadFFMask;
+  }
   multiclass RISCVSLoad {
     def "int_riscv_" # NAME : RISCVSLoad;
     def "int_riscv_" # NAME # "_mask" : RISCVSLoadMask;
@@ -680,7 +706,7 @@ let TargetPrefix = "riscv" in {
   }
 
   defm vle : RISCVUSLoad;
-  defm vleff : RISCVUSLoad;
+  defm vleff : RISCVUSLoadFF;
   defm vse : RISCVUSStore;
   defm vlse: RISCVSLoad;
   defm vsse: RISCVSStore;

diff  --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 897b56f97085..f78e0a236b63 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -362,6 +362,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom);
     setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i32, Custom);
 
+    setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
+
     if (Subtarget.is64Bit()) {
       setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
       setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
@@ -1367,7 +1369,29 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
     }
   }
 
-  return SDValue();
+  switch (IntNo) {
+  default:
+    return SDValue(); // Don't custom lower most intrinsics.
+  case Intrinsic::riscv_vleff: {
+    SDLoc DL(Op);
+    SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Other, MVT::Glue);
+    SDValue Load = DAG.getNode(RISCVISD::VLEFF, DL, VTs, Op.getOperand(0),
+                               Op.getOperand(2), Op.getOperand(3));
+    VTs = DAG.getVTList(Op->getValueType(1), MVT::Other);
+    SDValue ReadVL = DAG.getNode(RISCVISD::READ_VL, DL, VTs, Load.getValue(2));
+    return DAG.getMergeValues({Load, ReadVL, Load.getValue(1)}, DL);
+  }
+  case Intrinsic::riscv_vleff_mask: {
+    SDLoc DL(Op);
+    SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Other, MVT::Glue);
+    SDValue Load = DAG.getNode(RISCVISD::VLEFF_MASK, DL, VTs, Op.getOperand(0),
+                               Op.getOperand(2), Op.getOperand(3),
+                               Op.getOperand(4), Op.getOperand(5));
+    VTs = DAG.getVTList(Op->getValueType(1), MVT::Other);
+    SDValue ReadVL = DAG.getNode(RISCVISD::READ_VL, DL, VTs, Load.getValue(2));
+    return DAG.getMergeValues({Load, ReadVL, Load.getValue(1)}, DL);
+  }
+  }
 }
 
 // Returns the opcode of the target-specific SDNode that implements the 32-bit
@@ -3815,6 +3839,9 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
   NODE_NAME_CASE(SPLAT_VECTOR_I64)
   NODE_NAME_CASE(READ_VLENB)
   NODE_NAME_CASE(TRUNCATE_VECTOR)
+  NODE_NAME_CASE(VLEFF)
+  NODE_NAME_CASE(VLEFF_MASK)
+  NODE_NAME_CASE(READ_VL)
   }
   // clang-format on
   return nullptr;

diff  --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 58bb331ccd01..d0a0ea43a1f0 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -95,6 +95,11 @@ enum NodeType : unsigned {
   READ_VLENB,
   // Truncates a RVV integer vector by one power-of-two.
   TRUNCATE_VECTOR,
+  // Unit-stride fault-only-first load
+  VLEFF,
+  VLEFF_MASK,
+  // read vl CSR
+  READ_VL,
 };
 } // namespace RISCVISD
 

diff  --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
index 029561cd2fff..4650c75b77fc 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
@@ -20,6 +20,23 @@ def riscv_vmv_x_s : SDNode<"RISCVISD::VMV_X_S",
 def riscv_read_vlenb : SDNode<"RISCVISD::READ_VLENB",
                               SDTypeProfile<1, 0, [SDTCisVT<0, XLenVT>]>>;
 
+def riscv_vleff : SDNode<"RISCVISD::VLEFF",
+                         SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisPtrTy<1>,
+                                              SDTCisVT<2, XLenVT>]>,
+                         [SDNPHasChain, SDNPOutGlue, SDNPMayLoad,
+                          SDNPSideEffect]>;
+def riscv_vleff_mask : SDNode<"RISCVISD::VLEFF_MASK",
+                              SDTypeProfile<1, 4, [SDTCisVec<0>,
+                                                   SDTCisSameAs<0, 1>,
+                                                   SDTCisPtrTy<2>,
+                                                   SDTCVecEltisVT<3, i1>,
+                                                   SDTCisVT<4, XLenVT>]>,
+                              [SDNPHasChain, SDNPOutGlue, SDNPMayLoad,
+                               SDNPSideEffect]>;
+def riscv_read_vl : SDNode<"RISCVISD::READ_VL",
+                           SDTypeProfile<1, 0, [SDTCisVT<0, XLenVT>]>,
+                           [SDNPInGlue]>;
+
 // X0 has special meaning for vsetvl/vsetvli.
 //  rd | rs1 |   AVL value | Effect on vl
 //--------------------------------------------------------------
@@ -1903,6 +1920,23 @@ multiclass VPatUSLoad<string intrinsic,
                                 $rs1, (mask_type V0), (NoX0 GPR:$vl), sew)>;
 }
 
+multiclass VPatUSLoadFF<string inst,
+                        LLVMType type,
+                        LLVMType mask_type,
+                        int sew,
+                        LMULInfo vlmul,
+                        VReg reg_class>
+{
+    defvar Pseudo = !cast<Instruction>(inst#"_V_"#vlmul.MX);
+    def : Pat<(type (riscv_vleff GPR:$rs1, GPR:$vl)),
+                    (Pseudo $rs1, (NoX0 GPR:$vl), sew)>;
+    defvar PseudoMask = !cast<Instruction>(inst#"_V_"#vlmul.MX#"_MASK");
+    def : Pat<(type (riscv_vleff_mask (type GetVRegNoV0<reg_class>.R:$merge),
+                                      GPR:$rs1, (mask_type V0), GPR:$vl)),
+                    (PseudoMask $merge,
+                                $rs1, (mask_type V0), (NoX0 GPR:$vl), sew)>;
+}
+
 multiclass VPatSLoad<string intrinsic,
                      string inst,
                      LLVMType type,
@@ -2817,6 +2851,11 @@ let hasSideEffects = 0, mayLoad = 0, mayStore = 0, isCodeGenOnly = 1 in {
                                [(set GPR:$rd, (riscv_read_vlenb))]>;
 }
 
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0, isCodeGenOnly = 1,
+    Uses = [VL] in
+def PseudoReadVL : Pseudo<(outs GPR:$rd), (ins),
+                          [(set GPR:$rd, (riscv_read_vl))]>;
+
 //===----------------------------------------------------------------------===//
 // 6. Configuration-Setting Instructions
 //===----------------------------------------------------------------------===//
@@ -3388,9 +3427,8 @@ foreach vti = AllVectors in
   defm : VPatUSLoad<"int_riscv_vle",
                     "PseudoVLE" # vti.SEW,
                     vti.Vector, vti.Mask, vti.SEW, vti.LMul, vti.RegClass>;
-  defm : VPatUSLoad<"int_riscv_vleff",
-                    "PseudoVLE" # vti.SEW # "FF",
-                    vti.Vector, vti.Mask, vti.SEW, vti.LMul, vti.RegClass>;
+  defm : VPatUSLoadFF<"PseudoVLE" # vti.SEW # "FF",
+                      vti.Vector, vti.Mask, vti.SEW, vti.LMul, vti.RegClass>;
   defm : VPatUSStore<"int_riscv_vse",
                      "PseudoVSE" # vti.SEW,
                      vti.Vector, vti.Mask, vti.SEW, vti.LMul, vti.RegClass>;

diff  --git a/llvm/lib/Target/RISCV/RISCVMCInstLower.cpp b/llvm/lib/Target/RISCV/RISCVMCInstLower.cpp
index 31dc47a15787..50510347f89a 100644
--- a/llvm/lib/Target/RISCV/RISCVMCInstLower.cpp
+++ b/llvm/lib/Target/RISCV/RISCVMCInstLower.cpp
@@ -219,4 +219,11 @@ void llvm::LowerRISCVMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
     return;
   }
 
+  if (OutMI.getOpcode() == RISCV::PseudoReadVL) {
+    OutMI.setOpcode(RISCV::CSRRS);
+    OutMI.addOperand(MCOperand::createImm(
+        RISCVSysReg::lookupSysRegByName("VL")->Encoding));
+    OutMI.addOperand(MCOperand::createReg(RISCV::X0));
+    return;
+  }
 }

diff  --git a/llvm/test/CodeGen/RISCV/rvv/vleff-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vleff-rv32.ll
index c8bd5718882a..d2b1b86d2ec6 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vleff-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vleff-rv32.ll
@@ -1,1162 +1,1719 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f,+experimental-zfh,+f,+d -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
-declare <vscale x 1 x i32> @llvm.riscv.vleff.nxv1i32(
+declare { <vscale x 1 x double>, i32 } @llvm.riscv.vleff.nxv1f64(
+  <vscale x 1 x double>*,
+  i32);
+
+define <vscale x 1 x double> @intrinsic_vleff_v_nxv1f64_nxv1f64(<vscale x 1 x double>* %0, i32 %1, i32* %2) nounwind {
+; CHECK-LABEL: intrinsic_vleff_v_nxv1f64_nxv1f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a1, a1, e64,m1,ta,mu
+; CHECK-NEXT:    vle64ff.v v8, (a0)
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sw a0, 0(a2)
+; CHECK-NEXT:    ret
+entry:
+  %a = call { <vscale x 1 x double>, i32 } @llvm.riscv.vleff.nxv1f64(
+    <vscale x 1 x double>* %0,
+    i32 %1)
+  %b = extractvalue { <vscale x 1 x double>, i32 } %a, 0
+  %c = extractvalue { <vscale x 1 x double>, i32 } %a, 1
+  store i32 %c, i32* %2
+  ret <vscale x 1 x double> %b
+}
+
+declare { <vscale x 1 x double>, i32 } @llvm.riscv.vleff.mask.nxv1f64(
+  <vscale x 1 x double>,
+  <vscale x 1 x double>*,
+  <vscale x 1 x i1>,
+  i32);
+
+define <vscale x 1 x double> @intrinsic_vleff_mask_v_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double>* %1, <vscale x 1 x i1> %2, i32 %3, i32* %4) nounwind {
+; CHECK-LABEL: intrinsic_vleff_mask_v_nxv1f64_nxv1f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a1, a1, e64,m1,tu,mu
+; CHECK-NEXT:    vle64ff.v v8, (a0), v0.t
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sw a0, 0(a2)
+; CHECK-NEXT:    ret
+entry:
+  %a = call { <vscale x 1 x double>, i32 } @llvm.riscv.vleff.mask.nxv1f64(
+    <vscale x 1 x double> %0,
+    <vscale x 1 x double>* %1,
+    <vscale x 1 x i1> %2,
+    i32 %3)
+  %b = extractvalue { <vscale x 1 x double>, i32 } %a, 0
+  %c = extractvalue { <vscale x 1 x double>, i32 } %a, 1
+  store i32 %c, i32* %4
+
+  ret <vscale x 1 x double> %b
+}
+
+declare { <vscale x 2 x double>, i32 } @llvm.riscv.vleff.nxv2f64(
+  <vscale x 2 x double>*,
+  i32);
+
+define <vscale x 2 x double> @intrinsic_vleff_v_nxv2f64_nxv2f64(<vscale x 2 x double>* %0, i32 %1, i32* %2) nounwind {
+; CHECK-LABEL: intrinsic_vleff_v_nxv2f64_nxv2f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a1, a1, e64,m2,ta,mu
+; CHECK-NEXT:    vle64ff.v v8, (a0)
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sw a0, 0(a2)
+; CHECK-NEXT:    ret
+entry:
+  %a = call { <vscale x 2 x double>, i32 } @llvm.riscv.vleff.nxv2f64(
+    <vscale x 2 x double>* %0,
+    i32 %1)
+  %b = extractvalue { <vscale x 2 x double>, i32 } %a, 0
+  %c = extractvalue { <vscale x 2 x double>, i32 } %a, 1
+  store i32 %c, i32* %2
+  ret <vscale x 2 x double> %b
+}
+
+declare { <vscale x 2 x double>, i32 } @llvm.riscv.vleff.mask.nxv2f64(
+  <vscale x 2 x double>,
+  <vscale x 2 x double>*,
+  <vscale x 2 x i1>,
+  i32);
+
+define <vscale x 2 x double> @intrinsic_vleff_mask_v_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double>* %1, <vscale x 2 x i1> %2, i32 %3, i32* %4) nounwind {
+; CHECK-LABEL: intrinsic_vleff_mask_v_nxv2f64_nxv2f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a1, a1, e64,m2,tu,mu
+; CHECK-NEXT:    vle64ff.v v8, (a0), v0.t
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sw a0, 0(a2)
+; CHECK-NEXT:    ret
+entry:
+  %a = call { <vscale x 2 x double>, i32 } @llvm.riscv.vleff.mask.nxv2f64(
+    <vscale x 2 x double> %0,
+    <vscale x 2 x double>* %1,
+    <vscale x 2 x i1> %2,
+    i32 %3)
+  %b = extractvalue { <vscale x 2 x double>, i32 } %a, 0
+  %c = extractvalue { <vscale x 2 x double>, i32 } %a, 1
+  store i32 %c, i32* %4
+
+  ret <vscale x 2 x double> %b
+}
+
+declare { <vscale x 4 x double>, i32 } @llvm.riscv.vleff.nxv4f64(
+  <vscale x 4 x double>*,
+  i32);
+
+define <vscale x 4 x double> @intrinsic_vleff_v_nxv4f64_nxv4f64(<vscale x 4 x double>* %0, i32 %1, i32* %2) nounwind {
+; CHECK-LABEL: intrinsic_vleff_v_nxv4f64_nxv4f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a1, a1, e64,m4,ta,mu
+; CHECK-NEXT:    vle64ff.v v8, (a0)
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sw a0, 0(a2)
+; CHECK-NEXT:    ret
+entry:
+  %a = call { <vscale x 4 x double>, i32 } @llvm.riscv.vleff.nxv4f64(
+    <vscale x 4 x double>* %0,
+    i32 %1)
+  %b = extractvalue { <vscale x 4 x double>, i32 } %a, 0
+  %c = extractvalue { <vscale x 4 x double>, i32 } %a, 1
+  store i32 %c, i32* %2
+  ret <vscale x 4 x double> %b
+}
+
+declare { <vscale x 4 x double>, i32 } @llvm.riscv.vleff.mask.nxv4f64(
+  <vscale x 4 x double>,
+  <vscale x 4 x double>*,
+  <vscale x 4 x i1>,
+  i32);
+
+define <vscale x 4 x double> @intrinsic_vleff_mask_v_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double>* %1, <vscale x 4 x i1> %2, i32 %3, i32* %4) nounwind {
+; CHECK-LABEL: intrinsic_vleff_mask_v_nxv4f64_nxv4f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a1, a1, e64,m4,tu,mu
+; CHECK-NEXT:    vle64ff.v v8, (a0), v0.t
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sw a0, 0(a2)
+; CHECK-NEXT:    ret
+entry:
+  %a = call { <vscale x 4 x double>, i32 } @llvm.riscv.vleff.mask.nxv4f64(
+    <vscale x 4 x double> %0,
+    <vscale x 4 x double>* %1,
+    <vscale x 4 x i1> %2,
+    i32 %3)
+  %b = extractvalue { <vscale x 4 x double>, i32 } %a, 0
+  %c = extractvalue { <vscale x 4 x double>, i32 } %a, 1
+  store i32 %c, i32* %4
+
+  ret <vscale x 4 x double> %b
+}
+
+declare { <vscale x 8 x double>, i32 } @llvm.riscv.vleff.nxv8f64(
+  <vscale x 8 x double>*,
+  i32);
+
+define <vscale x 8 x double> @intrinsic_vleff_v_nxv8f64_nxv8f64(<vscale x 8 x double>* %0, i32 %1, i32* %2) nounwind {
+; CHECK-LABEL: intrinsic_vleff_v_nxv8f64_nxv8f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a1, a1, e64,m8,ta,mu
+; CHECK-NEXT:    vle64ff.v v8, (a0)
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sw a0, 0(a2)
+; CHECK-NEXT:    ret
+entry:
+  %a = call { <vscale x 8 x double>, i32 } @llvm.riscv.vleff.nxv8f64(
+    <vscale x 8 x double>* %0,
+    i32 %1)
+  %b = extractvalue { <vscale x 8 x double>, i32 } %a, 0
+  %c = extractvalue { <vscale x 8 x double>, i32 } %a, 1
+  store i32 %c, i32* %2
+  ret <vscale x 8 x double> %b
+}
+
+declare { <vscale x 8 x double>, i32 } @llvm.riscv.vleff.mask.nxv8f64(
+  <vscale x 8 x double>,
+  <vscale x 8 x double>*,
+  <vscale x 8 x i1>,
+  i32);
+
+define <vscale x 8 x double> @intrinsic_vleff_mask_v_nxv8f64_nxv8f64(<vscale x 8 x double> %0, <vscale x 8 x double>* %1, <vscale x 8 x i1> %2, i32 %3, i32* %4) nounwind {
+; CHECK-LABEL: intrinsic_vleff_mask_v_nxv8f64_nxv8f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a1, a1, e64,m8,tu,mu
+; CHECK-NEXT:    vle64ff.v v8, (a0), v0.t
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sw a0, 0(a2)
+; CHECK-NEXT:    ret
+entry:
+  %a = call { <vscale x 8 x double>, i32 } @llvm.riscv.vleff.mask.nxv8f64(
+    <vscale x 8 x double> %0,
+    <vscale x 8 x double>* %1,
+    <vscale x 8 x i1> %2,
+    i32 %3)
+  %b = extractvalue { <vscale x 8 x double>, i32 } %a, 0
+  %c = extractvalue { <vscale x 8 x double>, i32 } %a, 1
+  store i32 %c, i32* %4
+
+  ret <vscale x 8 x double> %b
+}
+
+declare { <vscale x 1 x i32>, i32 } @llvm.riscv.vleff.nxv1i32(
   <vscale x 1 x i32>*,
   i32);
 
-define <vscale x 1 x i32> @intrinsic_vleff_v_nxv1i32_nxv1i32(<vscale x 1 x i32>* %0, i32 %1) nounwind {
+define <vscale x 1 x i32> @intrinsic_vleff_v_nxv1i32_nxv1i32(<vscale x 1 x i32>* %0, i32 %1, i32* %2) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_v_nxv1i32_nxv1i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,mf2,ta,mu
 ; CHECK-NEXT:    vle32ff.v v8, (a0)
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sw a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 1 x i32> @llvm.riscv.vleff.nxv1i32(
+  %a = call { <vscale x 1 x i32>, i32 } @llvm.riscv.vleff.nxv1i32(
     <vscale x 1 x i32>* %0,
     i32 %1)
-
-  ret <vscale x 1 x i32> %a
+  %b = extractvalue { <vscale x 1 x i32>, i32 } %a, 0
+  %c = extractvalue { <vscale x 1 x i32>, i32 } %a, 1
+  store i32 %c, i32* %2
+  ret <vscale x 1 x i32> %b
 }
 
-declare <vscale x 1 x i32> @llvm.riscv.vleff.mask.nxv1i32(
+declare { <vscale x 1 x i32>, i32 } @llvm.riscv.vleff.mask.nxv1i32(
   <vscale x 1 x i32>,
   <vscale x 1 x i32>*,
   <vscale x 1 x i1>,
   i32);
 
-define <vscale x 1 x i32> @intrinsic_vleff_mask_v_nxv1i32_nxv1i32(<vscale x 1 x i32> %0, <vscale x 1 x i32>* %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
+define <vscale x 1 x i32> @intrinsic_vleff_mask_v_nxv1i32_nxv1i32(<vscale x 1 x i32> %0, <vscale x 1 x i32>* %1, <vscale x 1 x i1> %2, i32 %3, i32* %4) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_mask_v_nxv1i32_nxv1i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,mf2,tu,mu
 ; CHECK-NEXT:    vle32ff.v v8, (a0), v0.t
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sw a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 1 x i32> @llvm.riscv.vleff.mask.nxv1i32(
+  %a = call { <vscale x 1 x i32>, i32 } @llvm.riscv.vleff.mask.nxv1i32(
     <vscale x 1 x i32> %0,
     <vscale x 1 x i32>* %1,
     <vscale x 1 x i1> %2,
     i32 %3)
+  %b = extractvalue { <vscale x 1 x i32>, i32 } %a, 0
+  %c = extractvalue { <vscale x 1 x i32>, i32 } %a, 1
+  store i32 %c, i32* %4
 
-  ret <vscale x 1 x i32> %a
+  ret <vscale x 1 x i32> %b
 }
 
-declare <vscale x 2 x i32> @llvm.riscv.vleff.nxv2i32(
+declare { <vscale x 2 x i32>, i32 } @llvm.riscv.vleff.nxv2i32(
   <vscale x 2 x i32>*,
   i32);
 
-define <vscale x 2 x i32> @intrinsic_vleff_v_nxv2i32_nxv2i32(<vscale x 2 x i32>* %0, i32 %1) nounwind {
+define <vscale x 2 x i32> @intrinsic_vleff_v_nxv2i32_nxv2i32(<vscale x 2 x i32>* %0, i32 %1, i32* %2) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_v_nxv2i32_nxv2i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m1,ta,mu
 ; CHECK-NEXT:    vle32ff.v v8, (a0)
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sw a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 2 x i32> @llvm.riscv.vleff.nxv2i32(
+  %a = call { <vscale x 2 x i32>, i32 } @llvm.riscv.vleff.nxv2i32(
     <vscale x 2 x i32>* %0,
     i32 %1)
-
-  ret <vscale x 2 x i32> %a
+  %b = extractvalue { <vscale x 2 x i32>, i32 } %a, 0
+  %c = extractvalue { <vscale x 2 x i32>, i32 } %a, 1
+  store i32 %c, i32* %2
+  ret <vscale x 2 x i32> %b
 }
 
-declare <vscale x 2 x i32> @llvm.riscv.vleff.mask.nxv2i32(
+declare { <vscale x 2 x i32>, i32 } @llvm.riscv.vleff.mask.nxv2i32(
   <vscale x 2 x i32>,
   <vscale x 2 x i32>*,
   <vscale x 2 x i1>,
   i32);
 
-define <vscale x 2 x i32> @intrinsic_vleff_mask_v_nxv2i32_nxv2i32(<vscale x 2 x i32> %0, <vscale x 2 x i32>* %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
+define <vscale x 2 x i32> @intrinsic_vleff_mask_v_nxv2i32_nxv2i32(<vscale x 2 x i32> %0, <vscale x 2 x i32>* %1, <vscale x 2 x i1> %2, i32 %3, i32* %4) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_mask_v_nxv2i32_nxv2i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m1,tu,mu
 ; CHECK-NEXT:    vle32ff.v v8, (a0), v0.t
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sw a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 2 x i32> @llvm.riscv.vleff.mask.nxv2i32(
+  %a = call { <vscale x 2 x i32>, i32 } @llvm.riscv.vleff.mask.nxv2i32(
     <vscale x 2 x i32> %0,
     <vscale x 2 x i32>* %1,
     <vscale x 2 x i1> %2,
     i32 %3)
+  %b = extractvalue { <vscale x 2 x i32>, i32 } %a, 0
+  %c = extractvalue { <vscale x 2 x i32>, i32 } %a, 1
+  store i32 %c, i32* %4
 
-  ret <vscale x 2 x i32> %a
+  ret <vscale x 2 x i32> %b
 }
 
-declare <vscale x 4 x i32> @llvm.riscv.vleff.nxv4i32(
+declare { <vscale x 4 x i32>, i32 } @llvm.riscv.vleff.nxv4i32(
   <vscale x 4 x i32>*,
   i32);
 
-define <vscale x 4 x i32> @intrinsic_vleff_v_nxv4i32_nxv4i32(<vscale x 4 x i32>* %0, i32 %1) nounwind {
+define <vscale x 4 x i32> @intrinsic_vleff_v_nxv4i32_nxv4i32(<vscale x 4 x i32>* %0, i32 %1, i32* %2) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_v_nxv4i32_nxv4i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m2,ta,mu
 ; CHECK-NEXT:    vle32ff.v v8, (a0)
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sw a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 4 x i32> @llvm.riscv.vleff.nxv4i32(
+  %a = call { <vscale x 4 x i32>, i32 } @llvm.riscv.vleff.nxv4i32(
     <vscale x 4 x i32>* %0,
     i32 %1)
-
-  ret <vscale x 4 x i32> %a
+  %b = extractvalue { <vscale x 4 x i32>, i32 } %a, 0
+  %c = extractvalue { <vscale x 4 x i32>, i32 } %a, 1
+  store i32 %c, i32* %2
+  ret <vscale x 4 x i32> %b
 }
 
-declare <vscale x 4 x i32> @llvm.riscv.vleff.mask.nxv4i32(
+declare { <vscale x 4 x i32>, i32 } @llvm.riscv.vleff.mask.nxv4i32(
   <vscale x 4 x i32>,
   <vscale x 4 x i32>*,
   <vscale x 4 x i1>,
   i32);
 
-define <vscale x 4 x i32> @intrinsic_vleff_mask_v_nxv4i32_nxv4i32(<vscale x 4 x i32> %0, <vscale x 4 x i32>* %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
+define <vscale x 4 x i32> @intrinsic_vleff_mask_v_nxv4i32_nxv4i32(<vscale x 4 x i32> %0, <vscale x 4 x i32>* %1, <vscale x 4 x i1> %2, i32 %3, i32* %4) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_mask_v_nxv4i32_nxv4i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m2,tu,mu
 ; CHECK-NEXT:    vle32ff.v v8, (a0), v0.t
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sw a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 4 x i32> @llvm.riscv.vleff.mask.nxv4i32(
+  %a = call { <vscale x 4 x i32>, i32 } @llvm.riscv.vleff.mask.nxv4i32(
     <vscale x 4 x i32> %0,
     <vscale x 4 x i32>* %1,
     <vscale x 4 x i1> %2,
     i32 %3)
+  %b = extractvalue { <vscale x 4 x i32>, i32 } %a, 0
+  %c = extractvalue { <vscale x 4 x i32>, i32 } %a, 1
+  store i32 %c, i32* %4
 
-  ret <vscale x 4 x i32> %a
+  ret <vscale x 4 x i32> %b
 }
 
-declare <vscale x 8 x i32> @llvm.riscv.vleff.nxv8i32(
+declare { <vscale x 8 x i32>, i32 } @llvm.riscv.vleff.nxv8i32(
   <vscale x 8 x i32>*,
   i32);
 
-define <vscale x 8 x i32> @intrinsic_vleff_v_nxv8i32_nxv8i32(<vscale x 8 x i32>* %0, i32 %1) nounwind {
+define <vscale x 8 x i32> @intrinsic_vleff_v_nxv8i32_nxv8i32(<vscale x 8 x i32>* %0, i32 %1, i32* %2) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_v_nxv8i32_nxv8i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m4,ta,mu
 ; CHECK-NEXT:    vle32ff.v v8, (a0)
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sw a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 8 x i32> @llvm.riscv.vleff.nxv8i32(
+  %a = call { <vscale x 8 x i32>, i32 } @llvm.riscv.vleff.nxv8i32(
     <vscale x 8 x i32>* %0,
     i32 %1)
-
-  ret <vscale x 8 x i32> %a
+  %b = extractvalue { <vscale x 8 x i32>, i32 } %a, 0
+  %c = extractvalue { <vscale x 8 x i32>, i32 } %a, 1
+  store i32 %c, i32* %2
+  ret <vscale x 8 x i32> %b
 }
 
-declare <vscale x 8 x i32> @llvm.riscv.vleff.mask.nxv8i32(
+declare { <vscale x 8 x i32>, i32 } @llvm.riscv.vleff.mask.nxv8i32(
   <vscale x 8 x i32>,
   <vscale x 8 x i32>*,
   <vscale x 8 x i1>,
   i32);
 
-define <vscale x 8 x i32> @intrinsic_vleff_mask_v_nxv8i32_nxv8i32(<vscale x 8 x i32> %0, <vscale x 8 x i32>* %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
+define <vscale x 8 x i32> @intrinsic_vleff_mask_v_nxv8i32_nxv8i32(<vscale x 8 x i32> %0, <vscale x 8 x i32>* %1, <vscale x 8 x i1> %2, i32 %3, i32* %4) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_mask_v_nxv8i32_nxv8i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m4,tu,mu
 ; CHECK-NEXT:    vle32ff.v v8, (a0), v0.t
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sw a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 8 x i32> @llvm.riscv.vleff.mask.nxv8i32(
+  %a = call { <vscale x 8 x i32>, i32 } @llvm.riscv.vleff.mask.nxv8i32(
     <vscale x 8 x i32> %0,
     <vscale x 8 x i32>* %1,
     <vscale x 8 x i1> %2,
     i32 %3)
+  %b = extractvalue { <vscale x 8 x i32>, i32 } %a, 0
+  %c = extractvalue { <vscale x 8 x i32>, i32 } %a, 1
+  store i32 %c, i32* %4
 
-  ret <vscale x 8 x i32> %a
+  ret <vscale x 8 x i32> %b
 }
 
-declare <vscale x 16 x i32> @llvm.riscv.vleff.nxv16i32(
+declare { <vscale x 16 x i32>, i32 } @llvm.riscv.vleff.nxv16i32(
   <vscale x 16 x i32>*,
   i32);
 
-define <vscale x 16 x i32> @intrinsic_vleff_v_nxv16i32_nxv16i32(<vscale x 16 x i32>* %0, i32 %1) nounwind {
+define <vscale x 16 x i32> @intrinsic_vleff_v_nxv16i32_nxv16i32(<vscale x 16 x i32>* %0, i32 %1, i32* %2) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_v_nxv16i32_nxv16i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m8,ta,mu
 ; CHECK-NEXT:    vle32ff.v v8, (a0)
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sw a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 16 x i32> @llvm.riscv.vleff.nxv16i32(
+  %a = call { <vscale x 16 x i32>, i32 } @llvm.riscv.vleff.nxv16i32(
     <vscale x 16 x i32>* %0,
     i32 %1)
-
-  ret <vscale x 16 x i32> %a
+  %b = extractvalue { <vscale x 16 x i32>, i32 } %a, 0
+  %c = extractvalue { <vscale x 16 x i32>, i32 } %a, 1
+  store i32 %c, i32* %2
+  ret <vscale x 16 x i32> %b
 }
 
-declare <vscale x 16 x i32> @llvm.riscv.vleff.mask.nxv16i32(
+declare { <vscale x 16 x i32>, i32 } @llvm.riscv.vleff.mask.nxv16i32(
   <vscale x 16 x i32>,
   <vscale x 16 x i32>*,
   <vscale x 16 x i1>,
   i32);
 
-define <vscale x 16 x i32> @intrinsic_vleff_mask_v_nxv16i32_nxv16i32(<vscale x 16 x i32> %0, <vscale x 16 x i32>* %1, <vscale x 16 x i1> %2, i32 %3) nounwind {
+define <vscale x 16 x i32> @intrinsic_vleff_mask_v_nxv16i32_nxv16i32(<vscale x 16 x i32> %0, <vscale x 16 x i32>* %1, <vscale x 16 x i1> %2, i32 %3, i32* %4) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_mask_v_nxv16i32_nxv16i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m8,tu,mu
 ; CHECK-NEXT:    vle32ff.v v8, (a0), v0.t
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sw a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 16 x i32> @llvm.riscv.vleff.mask.nxv16i32(
+  %a = call { <vscale x 16 x i32>, i32 } @llvm.riscv.vleff.mask.nxv16i32(
     <vscale x 16 x i32> %0,
     <vscale x 16 x i32>* %1,
     <vscale x 16 x i1> %2,
     i32 %3)
+  %b = extractvalue { <vscale x 16 x i32>, i32 } %a, 0
+  %c = extractvalue { <vscale x 16 x i32>, i32 } %a, 1
+  store i32 %c, i32* %4
 
-  ret <vscale x 16 x i32> %a
+  ret <vscale x 16 x i32> %b
 }
 
-declare <vscale x 1 x float> @llvm.riscv.vleff.nxv1f32(
+declare { <vscale x 1 x float>, i32 } @llvm.riscv.vleff.nxv1f32(
   <vscale x 1 x float>*,
   i32);
 
-define <vscale x 1 x float> @intrinsic_vleff_v_nxv1f32_nxv1f32(<vscale x 1 x float>* %0, i32 %1) nounwind {
+define <vscale x 1 x float> @intrinsic_vleff_v_nxv1f32_nxv1f32(<vscale x 1 x float>* %0, i32 %1, i32* %2) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_v_nxv1f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,mf2,ta,mu
 ; CHECK-NEXT:    vle32ff.v v8, (a0)
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sw a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vleff.nxv1f32(
+  %a = call { <vscale x 1 x float>, i32 } @llvm.riscv.vleff.nxv1f32(
     <vscale x 1 x float>* %0,
     i32 %1)
-
-  ret <vscale x 1 x float> %a
+  %b = extractvalue { <vscale x 1 x float>, i32 } %a, 0
+  %c = extractvalue { <vscale x 1 x float>, i32 } %a, 1
+  store i32 %c, i32* %2
+  ret <vscale x 1 x float> %b
 }
 
-declare <vscale x 1 x float> @llvm.riscv.vleff.mask.nxv1f32(
+declare { <vscale x 1 x float>, i32 } @llvm.riscv.vleff.mask.nxv1f32(
   <vscale x 1 x float>,
   <vscale x 1 x float>*,
   <vscale x 1 x i1>,
   i32);
 
-define <vscale x 1 x float> @intrinsic_vleff_mask_v_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float>* %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
+define <vscale x 1 x float> @intrinsic_vleff_mask_v_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float>* %1, <vscale x 1 x i1> %2, i32 %3, i32* %4) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_mask_v_nxv1f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,mf2,tu,mu
 ; CHECK-NEXT:    vle32ff.v v8, (a0), v0.t
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sw a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vleff.mask.nxv1f32(
+  %a = call { <vscale x 1 x float>, i32 } @llvm.riscv.vleff.mask.nxv1f32(
     <vscale x 1 x float> %0,
     <vscale x 1 x float>* %1,
     <vscale x 1 x i1> %2,
     i32 %3)
+  %b = extractvalue { <vscale x 1 x float>, i32 } %a, 0
+  %c = extractvalue { <vscale x 1 x float>, i32 } %a, 1
+  store i32 %c, i32* %4
 
-  ret <vscale x 1 x float> %a
+  ret <vscale x 1 x float> %b
 }
 
-declare <vscale x 2 x float> @llvm.riscv.vleff.nxv2f32(
+declare { <vscale x 2 x float>, i32 } @llvm.riscv.vleff.nxv2f32(
   <vscale x 2 x float>*,
   i32);
 
-define <vscale x 2 x float> @intrinsic_vleff_v_nxv2f32_nxv2f32(<vscale x 2 x float>* %0, i32 %1) nounwind {
+define <vscale x 2 x float> @intrinsic_vleff_v_nxv2f32_nxv2f32(<vscale x 2 x float>* %0, i32 %1, i32* %2) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_v_nxv2f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m1,ta,mu
 ; CHECK-NEXT:    vle32ff.v v8, (a0)
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sw a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vleff.nxv2f32(
+  %a = call { <vscale x 2 x float>, i32 } @llvm.riscv.vleff.nxv2f32(
     <vscale x 2 x float>* %0,
     i32 %1)
-
-  ret <vscale x 2 x float> %a
+  %b = extractvalue { <vscale x 2 x float>, i32 } %a, 0
+  %c = extractvalue { <vscale x 2 x float>, i32 } %a, 1
+  store i32 %c, i32* %2
+  ret <vscale x 2 x float> %b
 }
 
-declare <vscale x 2 x float> @llvm.riscv.vleff.mask.nxv2f32(
+declare { <vscale x 2 x float>, i32 } @llvm.riscv.vleff.mask.nxv2f32(
   <vscale x 2 x float>,
   <vscale x 2 x float>*,
   <vscale x 2 x i1>,
   i32);
 
-define <vscale x 2 x float> @intrinsic_vleff_mask_v_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float>* %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
+define <vscale x 2 x float> @intrinsic_vleff_mask_v_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float>* %1, <vscale x 2 x i1> %2, i32 %3, i32* %4) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_mask_v_nxv2f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m1,tu,mu
 ; CHECK-NEXT:    vle32ff.v v8, (a0), v0.t
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sw a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vleff.mask.nxv2f32(
+  %a = call { <vscale x 2 x float>, i32 } @llvm.riscv.vleff.mask.nxv2f32(
     <vscale x 2 x float> %0,
     <vscale x 2 x float>* %1,
     <vscale x 2 x i1> %2,
     i32 %3)
+  %b = extractvalue { <vscale x 2 x float>, i32 } %a, 0
+  %c = extractvalue { <vscale x 2 x float>, i32 } %a, 1
+  store i32 %c, i32* %4
 
-  ret <vscale x 2 x float> %a
+  ret <vscale x 2 x float> %b
 }
 
-declare <vscale x 4 x float> @llvm.riscv.vleff.nxv4f32(
+declare { <vscale x 4 x float>, i32 } @llvm.riscv.vleff.nxv4f32(
   <vscale x 4 x float>*,
   i32);
 
-define <vscale x 4 x float> @intrinsic_vleff_v_nxv4f32_nxv4f32(<vscale x 4 x float>* %0, i32 %1) nounwind {
+define <vscale x 4 x float> @intrinsic_vleff_v_nxv4f32_nxv4f32(<vscale x 4 x float>* %0, i32 %1, i32* %2) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_v_nxv4f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m2,ta,mu
 ; CHECK-NEXT:    vle32ff.v v8, (a0)
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sw a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vleff.nxv4f32(
+  %a = call { <vscale x 4 x float>, i32 } @llvm.riscv.vleff.nxv4f32(
     <vscale x 4 x float>* %0,
     i32 %1)
-
-  ret <vscale x 4 x float> %a
+  %b = extractvalue { <vscale x 4 x float>, i32 } %a, 0
+  %c = extractvalue { <vscale x 4 x float>, i32 } %a, 1
+  store i32 %c, i32* %2
+  ret <vscale x 4 x float> %b
 }
 
-declare <vscale x 4 x float> @llvm.riscv.vleff.mask.nxv4f32(
+declare { <vscale x 4 x float>, i32 } @llvm.riscv.vleff.mask.nxv4f32(
   <vscale x 4 x float>,
   <vscale x 4 x float>*,
   <vscale x 4 x i1>,
   i32);
 
-define <vscale x 4 x float> @intrinsic_vleff_mask_v_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float>* %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
+define <vscale x 4 x float> @intrinsic_vleff_mask_v_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float>* %1, <vscale x 4 x i1> %2, i32 %3, i32* %4) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_mask_v_nxv4f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m2,tu,mu
 ; CHECK-NEXT:    vle32ff.v v8, (a0), v0.t
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sw a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vleff.mask.nxv4f32(
+  %a = call { <vscale x 4 x float>, i32 } @llvm.riscv.vleff.mask.nxv4f32(
     <vscale x 4 x float> %0,
     <vscale x 4 x float>* %1,
     <vscale x 4 x i1> %2,
     i32 %3)
+  %b = extractvalue { <vscale x 4 x float>, i32 } %a, 0
+  %c = extractvalue { <vscale x 4 x float>, i32 } %a, 1
+  store i32 %c, i32* %4
 
-  ret <vscale x 4 x float> %a
+  ret <vscale x 4 x float> %b
 }
 
-declare <vscale x 8 x float> @llvm.riscv.vleff.nxv8f32(
+declare { <vscale x 8 x float>, i32 } @llvm.riscv.vleff.nxv8f32(
   <vscale x 8 x float>*,
   i32);
 
-define <vscale x 8 x float> @intrinsic_vleff_v_nxv8f32_nxv8f32(<vscale x 8 x float>* %0, i32 %1) nounwind {
+define <vscale x 8 x float> @intrinsic_vleff_v_nxv8f32_nxv8f32(<vscale x 8 x float>* %0, i32 %1, i32* %2) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_v_nxv8f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m4,ta,mu
 ; CHECK-NEXT:    vle32ff.v v8, (a0)
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sw a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vleff.nxv8f32(
+  %a = call { <vscale x 8 x float>, i32 } @llvm.riscv.vleff.nxv8f32(
     <vscale x 8 x float>* %0,
     i32 %1)
-
-  ret <vscale x 8 x float> %a
+  %b = extractvalue { <vscale x 8 x float>, i32 } %a, 0
+  %c = extractvalue { <vscale x 8 x float>, i32 } %a, 1
+  store i32 %c, i32* %2
+  ret <vscale x 8 x float> %b
 }
 
-declare <vscale x 8 x float> @llvm.riscv.vleff.mask.nxv8f32(
+declare { <vscale x 8 x float>, i32 } @llvm.riscv.vleff.mask.nxv8f32(
   <vscale x 8 x float>,
   <vscale x 8 x float>*,
   <vscale x 8 x i1>,
   i32);
 
-define <vscale x 8 x float> @intrinsic_vleff_mask_v_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float>* %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
+define <vscale x 8 x float> @intrinsic_vleff_mask_v_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float>* %1, <vscale x 8 x i1> %2, i32 %3, i32* %4) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_mask_v_nxv8f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m4,tu,mu
 ; CHECK-NEXT:    vle32ff.v v8, (a0), v0.t
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sw a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vleff.mask.nxv8f32(
+  %a = call { <vscale x 8 x float>, i32 } @llvm.riscv.vleff.mask.nxv8f32(
     <vscale x 8 x float> %0,
     <vscale x 8 x float>* %1,
     <vscale x 8 x i1> %2,
     i32 %3)
+  %b = extractvalue { <vscale x 8 x float>, i32 } %a, 0
+  %c = extractvalue { <vscale x 8 x float>, i32 } %a, 1
+  store i32 %c, i32* %4
 
-  ret <vscale x 8 x float> %a
+  ret <vscale x 8 x float> %b
 }
 
-declare <vscale x 16 x float> @llvm.riscv.vleff.nxv16f32(
+declare { <vscale x 16 x float>, i32 } @llvm.riscv.vleff.nxv16f32(
   <vscale x 16 x float>*,
   i32);
 
-define <vscale x 16 x float> @intrinsic_vleff_v_nxv16f32_nxv16f32(<vscale x 16 x float>* %0, i32 %1) nounwind {
+define <vscale x 16 x float> @intrinsic_vleff_v_nxv16f32_nxv16f32(<vscale x 16 x float>* %0, i32 %1, i32* %2) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_v_nxv16f32_nxv16f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m8,ta,mu
 ; CHECK-NEXT:    vle32ff.v v8, (a0)
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sw a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vleff.nxv16f32(
+  %a = call { <vscale x 16 x float>, i32 } @llvm.riscv.vleff.nxv16f32(
     <vscale x 16 x float>* %0,
     i32 %1)
-
-  ret <vscale x 16 x float> %a
+  %b = extractvalue { <vscale x 16 x float>, i32 } %a, 0
+  %c = extractvalue { <vscale x 16 x float>, i32 } %a, 1
+  store i32 %c, i32* %2
+  ret <vscale x 16 x float> %b
 }
 
-declare <vscale x 16 x float> @llvm.riscv.vleff.mask.nxv16f32(
+declare { <vscale x 16 x float>, i32 } @llvm.riscv.vleff.mask.nxv16f32(
   <vscale x 16 x float>,
   <vscale x 16 x float>*,
   <vscale x 16 x i1>,
   i32);
 
-define <vscale x 16 x float> @intrinsic_vleff_mask_v_nxv16f32_nxv16f32(<vscale x 16 x float> %0, <vscale x 16 x float>* %1, <vscale x 16 x i1> %2, i32 %3) nounwind {
+define <vscale x 16 x float> @intrinsic_vleff_mask_v_nxv16f32_nxv16f32(<vscale x 16 x float> %0, <vscale x 16 x float>* %1, <vscale x 16 x i1> %2, i32 %3, i32* %4) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_mask_v_nxv16f32_nxv16f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m8,tu,mu
 ; CHECK-NEXT:    vle32ff.v v8, (a0), v0.t
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sw a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vleff.mask.nxv16f32(
+  %a = call { <vscale x 16 x float>, i32 } @llvm.riscv.vleff.mask.nxv16f32(
     <vscale x 16 x float> %0,
     <vscale x 16 x float>* %1,
     <vscale x 16 x i1> %2,
     i32 %3)
+  %b = extractvalue { <vscale x 16 x float>, i32 } %a, 0
+  %c = extractvalue { <vscale x 16 x float>, i32 } %a, 1
+  store i32 %c, i32* %4
 
-  ret <vscale x 16 x float> %a
+  ret <vscale x 16 x float> %b
 }
 
-declare <vscale x 1 x i16> @llvm.riscv.vleff.nxv1i16(
+declare { <vscale x 1 x i16>, i32 } @llvm.riscv.vleff.nxv1i16(
   <vscale x 1 x i16>*,
   i32);
 
-define <vscale x 1 x i16> @intrinsic_vleff_v_nxv1i16_nxv1i16(<vscale x 1 x i16>* %0, i32 %1) nounwind {
+define <vscale x 1 x i16> @intrinsic_vleff_v_nxv1i16_nxv1i16(<vscale x 1 x i16>* %0, i32 %1, i32* %2) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_v_nxv1i16_nxv1i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,mf4,ta,mu
 ; CHECK-NEXT:    vle16ff.v v8, (a0)
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sw a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 1 x i16> @llvm.riscv.vleff.nxv1i16(
+  %a = call { <vscale x 1 x i16>, i32 } @llvm.riscv.vleff.nxv1i16(
     <vscale x 1 x i16>* %0,
     i32 %1)
-
-  ret <vscale x 1 x i16> %a
+  %b = extractvalue { <vscale x 1 x i16>, i32 } %a, 0
+  %c = extractvalue { <vscale x 1 x i16>, i32 } %a, 1
+  store i32 %c, i32* %2
+  ret <vscale x 1 x i16> %b
 }
 
-declare <vscale x 1 x i16> @llvm.riscv.vleff.mask.nxv1i16(
+declare { <vscale x 1 x i16>, i32 } @llvm.riscv.vleff.mask.nxv1i16(
   <vscale x 1 x i16>,
   <vscale x 1 x i16>*,
   <vscale x 1 x i1>,
   i32);
 
-define <vscale x 1 x i16> @intrinsic_vleff_mask_v_nxv1i16_nxv1i16(<vscale x 1 x i16> %0, <vscale x 1 x i16>* %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
+define <vscale x 1 x i16> @intrinsic_vleff_mask_v_nxv1i16_nxv1i16(<vscale x 1 x i16> %0, <vscale x 1 x i16>* %1, <vscale x 1 x i1> %2, i32 %3, i32* %4) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_mask_v_nxv1i16_nxv1i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,mf4,tu,mu
 ; CHECK-NEXT:    vle16ff.v v8, (a0), v0.t
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sw a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 1 x i16> @llvm.riscv.vleff.mask.nxv1i16(
+  %a = call { <vscale x 1 x i16>, i32 } @llvm.riscv.vleff.mask.nxv1i16(
     <vscale x 1 x i16> %0,
     <vscale x 1 x i16>* %1,
     <vscale x 1 x i1> %2,
     i32 %3)
+  %b = extractvalue { <vscale x 1 x i16>, i32 } %a, 0
+  %c = extractvalue { <vscale x 1 x i16>, i32 } %a, 1
+  store i32 %c, i32* %4
 
-  ret <vscale x 1 x i16> %a
+  ret <vscale x 1 x i16> %b
 }
 
-declare <vscale x 2 x i16> @llvm.riscv.vleff.nxv2i16(
+declare { <vscale x 2 x i16>, i32 } @llvm.riscv.vleff.nxv2i16(
   <vscale x 2 x i16>*,
   i32);
 
-define <vscale x 2 x i16> @intrinsic_vleff_v_nxv2i16_nxv2i16(<vscale x 2 x i16>* %0, i32 %1) nounwind {
+define <vscale x 2 x i16> @intrinsic_vleff_v_nxv2i16_nxv2i16(<vscale x 2 x i16>* %0, i32 %1, i32* %2) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_v_nxv2i16_nxv2i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,mf2,ta,mu
 ; CHECK-NEXT:    vle16ff.v v8, (a0)
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sw a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 2 x i16> @llvm.riscv.vleff.nxv2i16(
+  %a = call { <vscale x 2 x i16>, i32 } @llvm.riscv.vleff.nxv2i16(
     <vscale x 2 x i16>* %0,
     i32 %1)
-
-  ret <vscale x 2 x i16> %a
+  %b = extractvalue { <vscale x 2 x i16>, i32 } %a, 0
+  %c = extractvalue { <vscale x 2 x i16>, i32 } %a, 1
+  store i32 %c, i32* %2
+  ret <vscale x 2 x i16> %b
 }
 
-declare <vscale x 2 x i16> @llvm.riscv.vleff.mask.nxv2i16(
+declare { <vscale x 2 x i16>, i32 } @llvm.riscv.vleff.mask.nxv2i16(
   <vscale x 2 x i16>,
   <vscale x 2 x i16>*,
   <vscale x 2 x i1>,
   i32);
 
-define <vscale x 2 x i16> @intrinsic_vleff_mask_v_nxv2i16_nxv2i16(<vscale x 2 x i16> %0, <vscale x 2 x i16>* %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
+define <vscale x 2 x i16> @intrinsic_vleff_mask_v_nxv2i16_nxv2i16(<vscale x 2 x i16> %0, <vscale x 2 x i16>* %1, <vscale x 2 x i1> %2, i32 %3, i32* %4) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_mask_v_nxv2i16_nxv2i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,mf2,tu,mu
 ; CHECK-NEXT:    vle16ff.v v8, (a0), v0.t
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sw a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 2 x i16> @llvm.riscv.vleff.mask.nxv2i16(
+  %a = call { <vscale x 2 x i16>, i32 } @llvm.riscv.vleff.mask.nxv2i16(
     <vscale x 2 x i16> %0,
     <vscale x 2 x i16>* %1,
     <vscale x 2 x i1> %2,
     i32 %3)
+  %b = extractvalue { <vscale x 2 x i16>, i32 } %a, 0
+  %c = extractvalue { <vscale x 2 x i16>, i32 } %a, 1
+  store i32 %c, i32* %4
 
-  ret <vscale x 2 x i16> %a
+  ret <vscale x 2 x i16> %b
 }
 
-declare <vscale x 4 x i16> @llvm.riscv.vleff.nxv4i16(
+declare { <vscale x 4 x i16>, i32 } @llvm.riscv.vleff.nxv4i16(
   <vscale x 4 x i16>*,
   i32);
 
-define <vscale x 4 x i16> @intrinsic_vleff_v_nxv4i16_nxv4i16(<vscale x 4 x i16>* %0, i32 %1) nounwind {
+define <vscale x 4 x i16> @intrinsic_vleff_v_nxv4i16_nxv4i16(<vscale x 4 x i16>* %0, i32 %1, i32* %2) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_v_nxv4i16_nxv4i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,m1,ta,mu
 ; CHECK-NEXT:    vle16ff.v v8, (a0)
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sw a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 4 x i16> @llvm.riscv.vleff.nxv4i16(
+  %a = call { <vscale x 4 x i16>, i32 } @llvm.riscv.vleff.nxv4i16(
     <vscale x 4 x i16>* %0,
     i32 %1)
-
-  ret <vscale x 4 x i16> %a
+  %b = extractvalue { <vscale x 4 x i16>, i32 } %a, 0
+  %c = extractvalue { <vscale x 4 x i16>, i32 } %a, 1
+  store i32 %c, i32* %2
+  ret <vscale x 4 x i16> %b
 }
 
-declare <vscale x 4 x i16> @llvm.riscv.vleff.mask.nxv4i16(
+declare { <vscale x 4 x i16>, i32 } @llvm.riscv.vleff.mask.nxv4i16(
   <vscale x 4 x i16>,
   <vscale x 4 x i16>*,
   <vscale x 4 x i1>,
   i32);
 
-define <vscale x 4 x i16> @intrinsic_vleff_mask_v_nxv4i16_nxv4i16(<vscale x 4 x i16> %0, <vscale x 4 x i16>* %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
+define <vscale x 4 x i16> @intrinsic_vleff_mask_v_nxv4i16_nxv4i16(<vscale x 4 x i16> %0, <vscale x 4 x i16>* %1, <vscale x 4 x i1> %2, i32 %3, i32* %4) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_mask_v_nxv4i16_nxv4i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,m1,tu,mu
 ; CHECK-NEXT:    vle16ff.v v8, (a0), v0.t
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sw a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 4 x i16> @llvm.riscv.vleff.mask.nxv4i16(
+  %a = call { <vscale x 4 x i16>, i32 } @llvm.riscv.vleff.mask.nxv4i16(
     <vscale x 4 x i16> %0,
     <vscale x 4 x i16>* %1,
     <vscale x 4 x i1> %2,
     i32 %3)
+  %b = extractvalue { <vscale x 4 x i16>, i32 } %a, 0
+  %c = extractvalue { <vscale x 4 x i16>, i32 } %a, 1
+  store i32 %c, i32* %4
 
-  ret <vscale x 4 x i16> %a
+  ret <vscale x 4 x i16> %b
 }
 
-declare <vscale x 8 x i16> @llvm.riscv.vleff.nxv8i16(
+declare { <vscale x 8 x i16>, i32 } @llvm.riscv.vleff.nxv8i16(
   <vscale x 8 x i16>*,
   i32);
 
-define <vscale x 8 x i16> @intrinsic_vleff_v_nxv8i16_nxv8i16(<vscale x 8 x i16>* %0, i32 %1) nounwind {
+define <vscale x 8 x i16> @intrinsic_vleff_v_nxv8i16_nxv8i16(<vscale x 8 x i16>* %0, i32 %1, i32* %2) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_v_nxv8i16_nxv8i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,m2,ta,mu
 ; CHECK-NEXT:    vle16ff.v v8, (a0)
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sw a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 8 x i16> @llvm.riscv.vleff.nxv8i16(
+  %a = call { <vscale x 8 x i16>, i32 } @llvm.riscv.vleff.nxv8i16(
     <vscale x 8 x i16>* %0,
     i32 %1)
-
-  ret <vscale x 8 x i16> %a
+  %b = extractvalue { <vscale x 8 x i16>, i32 } %a, 0
+  %c = extractvalue { <vscale x 8 x i16>, i32 } %a, 1
+  store i32 %c, i32* %2
+  ret <vscale x 8 x i16> %b
 }
 
-declare <vscale x 8 x i16> @llvm.riscv.vleff.mask.nxv8i16(
+declare { <vscale x 8 x i16>, i32 } @llvm.riscv.vleff.mask.nxv8i16(
   <vscale x 8 x i16>,
   <vscale x 8 x i16>*,
   <vscale x 8 x i1>,
   i32);
 
-define <vscale x 8 x i16> @intrinsic_vleff_mask_v_nxv8i16_nxv8i16(<vscale x 8 x i16> %0, <vscale x 8 x i16>* %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
+define <vscale x 8 x i16> @intrinsic_vleff_mask_v_nxv8i16_nxv8i16(<vscale x 8 x i16> %0, <vscale x 8 x i16>* %1, <vscale x 8 x i1> %2, i32 %3, i32* %4) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_mask_v_nxv8i16_nxv8i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,m2,tu,mu
 ; CHECK-NEXT:    vle16ff.v v8, (a0), v0.t
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sw a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 8 x i16> @llvm.riscv.vleff.mask.nxv8i16(
+  %a = call { <vscale x 8 x i16>, i32 } @llvm.riscv.vleff.mask.nxv8i16(
     <vscale x 8 x i16> %0,
     <vscale x 8 x i16>* %1,
     <vscale x 8 x i1> %2,
     i32 %3)
+  %b = extractvalue { <vscale x 8 x i16>, i32 } %a, 0
+  %c = extractvalue { <vscale x 8 x i16>, i32 } %a, 1
+  store i32 %c, i32* %4
 
-  ret <vscale x 8 x i16> %a
+  ret <vscale x 8 x i16> %b
 }
 
-declare <vscale x 16 x i16> @llvm.riscv.vleff.nxv16i16(
+declare { <vscale x 16 x i16>, i32 } @llvm.riscv.vleff.nxv16i16(
   <vscale x 16 x i16>*,
   i32);
 
-define <vscale x 16 x i16> @intrinsic_vleff_v_nxv16i16_nxv16i16(<vscale x 16 x i16>* %0, i32 %1) nounwind {
+define <vscale x 16 x i16> @intrinsic_vleff_v_nxv16i16_nxv16i16(<vscale x 16 x i16>* %0, i32 %1, i32* %2) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_v_nxv16i16_nxv16i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,m4,ta,mu
 ; CHECK-NEXT:    vle16ff.v v8, (a0)
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sw a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 16 x i16> @llvm.riscv.vleff.nxv16i16(
+  %a = call { <vscale x 16 x i16>, i32 } @llvm.riscv.vleff.nxv16i16(
     <vscale x 16 x i16>* %0,
     i32 %1)
-
-  ret <vscale x 16 x i16> %a
+  %b = extractvalue { <vscale x 16 x i16>, i32 } %a, 0
+  %c = extractvalue { <vscale x 16 x i16>, i32 } %a, 1
+  store i32 %c, i32* %2
+  ret <vscale x 16 x i16> %b
 }
 
-declare <vscale x 16 x i16> @llvm.riscv.vleff.mask.nxv16i16(
+declare { <vscale x 16 x i16>, i32 } @llvm.riscv.vleff.mask.nxv16i16(
   <vscale x 16 x i16>,
   <vscale x 16 x i16>*,
   <vscale x 16 x i1>,
   i32);
 
-define <vscale x 16 x i16> @intrinsic_vleff_mask_v_nxv16i16_nxv16i16(<vscale x 16 x i16> %0, <vscale x 16 x i16>* %1, <vscale x 16 x i1> %2, i32 %3) nounwind {
+define <vscale x 16 x i16> @intrinsic_vleff_mask_v_nxv16i16_nxv16i16(<vscale x 16 x i16> %0, <vscale x 16 x i16>* %1, <vscale x 16 x i1> %2, i32 %3, i32* %4) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_mask_v_nxv16i16_nxv16i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,m4,tu,mu
 ; CHECK-NEXT:    vle16ff.v v8, (a0), v0.t
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sw a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 16 x i16> @llvm.riscv.vleff.mask.nxv16i16(
+  %a = call { <vscale x 16 x i16>, i32 } @llvm.riscv.vleff.mask.nxv16i16(
     <vscale x 16 x i16> %0,
     <vscale x 16 x i16>* %1,
     <vscale x 16 x i1> %2,
     i32 %3)
+  %b = extractvalue { <vscale x 16 x i16>, i32 } %a, 0
+  %c = extractvalue { <vscale x 16 x i16>, i32 } %a, 1
+  store i32 %c, i32* %4
 
-  ret <vscale x 16 x i16> %a
+  ret <vscale x 16 x i16> %b
 }
 
-declare <vscale x 32 x i16> @llvm.riscv.vleff.nxv32i16(
+declare { <vscale x 32 x i16>, i32 } @llvm.riscv.vleff.nxv32i16(
   <vscale x 32 x i16>*,
   i32);
 
-define <vscale x 32 x i16> @intrinsic_vleff_v_nxv32i16_nxv32i16(<vscale x 32 x i16>* %0, i32 %1) nounwind {
+define <vscale x 32 x i16> @intrinsic_vleff_v_nxv32i16_nxv32i16(<vscale x 32 x i16>* %0, i32 %1, i32* %2) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_v_nxv32i16_nxv32i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,m8,ta,mu
 ; CHECK-NEXT:    vle16ff.v v8, (a0)
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sw a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 32 x i16> @llvm.riscv.vleff.nxv32i16(
+  %a = call { <vscale x 32 x i16>, i32 } @llvm.riscv.vleff.nxv32i16(
     <vscale x 32 x i16>* %0,
     i32 %1)
-
-  ret <vscale x 32 x i16> %a
+  %b = extractvalue { <vscale x 32 x i16>, i32 } %a, 0
+  %c = extractvalue { <vscale x 32 x i16>, i32 } %a, 1
+  store i32 %c, i32* %2
+  ret <vscale x 32 x i16> %b
 }
 
-declare <vscale x 32 x i16> @llvm.riscv.vleff.mask.nxv32i16(
+declare { <vscale x 32 x i16>, i32 } @llvm.riscv.vleff.mask.nxv32i16(
   <vscale x 32 x i16>,
   <vscale x 32 x i16>*,
   <vscale x 32 x i1>,
   i32);
 
-define <vscale x 32 x i16> @intrinsic_vleff_mask_v_nxv32i16_nxv32i16(<vscale x 32 x i16> %0, <vscale x 32 x i16>* %1, <vscale x 32 x i1> %2, i32 %3) nounwind {
+define <vscale x 32 x i16> @intrinsic_vleff_mask_v_nxv32i16_nxv32i16(<vscale x 32 x i16> %0, <vscale x 32 x i16>* %1, <vscale x 32 x i1> %2, i32 %3, i32* %4) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_mask_v_nxv32i16_nxv32i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,m8,tu,mu
 ; CHECK-NEXT:    vle16ff.v v8, (a0), v0.t
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sw a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 32 x i16> @llvm.riscv.vleff.mask.nxv32i16(
+  %a = call { <vscale x 32 x i16>, i32 } @llvm.riscv.vleff.mask.nxv32i16(
     <vscale x 32 x i16> %0,
     <vscale x 32 x i16>* %1,
     <vscale x 32 x i1> %2,
     i32 %3)
+  %b = extractvalue { <vscale x 32 x i16>, i32 } %a, 0
+  %c = extractvalue { <vscale x 32 x i16>, i32 } %a, 1
+  store i32 %c, i32* %4
 
-  ret <vscale x 32 x i16> %a
+  ret <vscale x 32 x i16> %b
 }
 
-declare <vscale x 1 x half> @llvm.riscv.vleff.nxv1f16(
+declare { <vscale x 1 x half>, i32 } @llvm.riscv.vleff.nxv1f16(
   <vscale x 1 x half>*,
   i32);
 
-define <vscale x 1 x half> @intrinsic_vleff_v_nxv1f16_nxv1f16(<vscale x 1 x half>* %0, i32 %1) nounwind {
-; CHECK-LABEL: intrinsic_vleff_v_nxv1f16_nxv1f16:
+define <vscale x 1 x half> @intrinsic_vleff_v_nxv1half_nxv1f16(<vscale x 1 x half>* %0, i32 %1, i32* %2) nounwind {
+; CHECK-LABEL: intrinsic_vleff_v_nxv1half_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,mf4,ta,mu
 ; CHECK-NEXT:    vle16ff.v v8, (a0)
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sw a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vleff.nxv1f16(
+  %a = call { <vscale x 1 x half>, i32 } @llvm.riscv.vleff.nxv1f16(
     <vscale x 1 x half>* %0,
     i32 %1)
-
-  ret <vscale x 1 x half> %a
+  %b = extractvalue { <vscale x 1 x half>, i32 } %a, 0
+  %c = extractvalue { <vscale x 1 x half>, i32 } %a, 1
+  store i32 %c, i32* %2
+  ret <vscale x 1 x half> %b
 }
 
-declare <vscale x 1 x half> @llvm.riscv.vleff.mask.nxv1f16(
+declare { <vscale x 1 x half>, i32 } @llvm.riscv.vleff.mask.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>*,
   <vscale x 1 x i1>,
   i32);
 
-define <vscale x 1 x half> @intrinsic_vleff_mask_v_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half>* %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vleff_mask_v_nxv1f16_nxv1f16:
+define <vscale x 1 x half> @intrinsic_vleff_mask_v_nxv1half_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half>* %1, <vscale x 1 x i1> %2, i32 %3, i32* %4) nounwind {
+; CHECK-LABEL: intrinsic_vleff_mask_v_nxv1half_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,mf4,tu,mu
 ; CHECK-NEXT:    vle16ff.v v8, (a0), v0.t
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sw a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vleff.mask.nxv1f16(
+  %a = call { <vscale x 1 x half>, i32 } @llvm.riscv.vleff.mask.nxv1f16(
     <vscale x 1 x half> %0,
     <vscale x 1 x half>* %1,
     <vscale x 1 x i1> %2,
     i32 %3)
+  %b = extractvalue { <vscale x 1 x half>, i32 } %a, 0
+  %c = extractvalue { <vscale x 1 x half>, i32 } %a, 1
+  store i32 %c, i32* %4
 
-  ret <vscale x 1 x half> %a
+  ret <vscale x 1 x half> %b
 }
 
-declare <vscale x 2 x half> @llvm.riscv.vleff.nxv2f16(
+declare { <vscale x 2 x half>, i32 } @llvm.riscv.vleff.nxv2f16(
   <vscale x 2 x half>*,
   i32);
 
-define <vscale x 2 x half> @intrinsic_vleff_v_nxv2f16_nxv2f16(<vscale x 2 x half>* %0, i32 %1) nounwind {
-; CHECK-LABEL: intrinsic_vleff_v_nxv2f16_nxv2f16:
+define <vscale x 2 x half> @intrinsic_vleff_v_nxv2half_nxv2f16(<vscale x 2 x half>* %0, i32 %1, i32* %2) nounwind {
+; CHECK-LABEL: intrinsic_vleff_v_nxv2half_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,mf2,ta,mu
 ; CHECK-NEXT:    vle16ff.v v8, (a0)
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sw a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vleff.nxv2f16(
+  %a = call { <vscale x 2 x half>, i32 } @llvm.riscv.vleff.nxv2f16(
     <vscale x 2 x half>* %0,
     i32 %1)
-
-  ret <vscale x 2 x half> %a
+  %b = extractvalue { <vscale x 2 x half>, i32 } %a, 0
+  %c = extractvalue { <vscale x 2 x half>, i32 } %a, 1
+  store i32 %c, i32* %2
+  ret <vscale x 2 x half> %b
 }
 
-declare <vscale x 2 x half> @llvm.riscv.vleff.mask.nxv2f16(
+declare { <vscale x 2 x half>, i32 } @llvm.riscv.vleff.mask.nxv2f16(
   <vscale x 2 x half>,
   <vscale x 2 x half>*,
   <vscale x 2 x i1>,
   i32);
 
-define <vscale x 2 x half> @intrinsic_vleff_mask_v_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half>* %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vleff_mask_v_nxv2f16_nxv2f16:
+define <vscale x 2 x half> @intrinsic_vleff_mask_v_nxv2half_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half>* %1, <vscale x 2 x i1> %2, i32 %3, i32* %4) nounwind {
+; CHECK-LABEL: intrinsic_vleff_mask_v_nxv2half_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,mf2,tu,mu
 ; CHECK-NEXT:    vle16ff.v v8, (a0), v0.t
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sw a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vleff.mask.nxv2f16(
+  %a = call { <vscale x 2 x half>, i32 } @llvm.riscv.vleff.mask.nxv2f16(
     <vscale x 2 x half> %0,
     <vscale x 2 x half>* %1,
     <vscale x 2 x i1> %2,
     i32 %3)
+  %b = extractvalue { <vscale x 2 x half>, i32 } %a, 0
+  %c = extractvalue { <vscale x 2 x half>, i32 } %a, 1
+  store i32 %c, i32* %4
 
-  ret <vscale x 2 x half> %a
+  ret <vscale x 2 x half> %b
 }
 
-declare <vscale x 4 x half> @llvm.riscv.vleff.nxv4f16(
+declare { <vscale x 4 x half>, i32 } @llvm.riscv.vleff.nxv4f16(
   <vscale x 4 x half>*,
   i32);
 
-define <vscale x 4 x half> @intrinsic_vleff_v_nxv4f16_nxv4f16(<vscale x 4 x half>* %0, i32 %1) nounwind {
-; CHECK-LABEL: intrinsic_vleff_v_nxv4f16_nxv4f16:
+define <vscale x 4 x half> @intrinsic_vleff_v_nxv4half_nxv4f16(<vscale x 4 x half>* %0, i32 %1, i32* %2) nounwind {
+; CHECK-LABEL: intrinsic_vleff_v_nxv4half_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,m1,ta,mu
 ; CHECK-NEXT:    vle16ff.v v8, (a0)
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sw a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vleff.nxv4f16(
+  %a = call { <vscale x 4 x half>, i32 } @llvm.riscv.vleff.nxv4f16(
     <vscale x 4 x half>* %0,
     i32 %1)
-
-  ret <vscale x 4 x half> %a
+  %b = extractvalue { <vscale x 4 x half>, i32 } %a, 0
+  %c = extractvalue { <vscale x 4 x half>, i32 } %a, 1
+  store i32 %c, i32* %2
+  ret <vscale x 4 x half> %b
 }
 
-declare <vscale x 4 x half> @llvm.riscv.vleff.mask.nxv4f16(
+declare { <vscale x 4 x half>, i32 } @llvm.riscv.vleff.mask.nxv4f16(
   <vscale x 4 x half>,
   <vscale x 4 x half>*,
   <vscale x 4 x i1>,
   i32);
 
-define <vscale x 4 x half> @intrinsic_vleff_mask_v_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half>* %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vleff_mask_v_nxv4f16_nxv4f16:
+define <vscale x 4 x half> @intrinsic_vleff_mask_v_nxv4half_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half>* %1, <vscale x 4 x i1> %2, i32 %3, i32* %4) nounwind {
+; CHECK-LABEL: intrinsic_vleff_mask_v_nxv4half_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,m1,tu,mu
 ; CHECK-NEXT:    vle16ff.v v8, (a0), v0.t
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sw a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vleff.mask.nxv4f16(
+  %a = call { <vscale x 4 x half>, i32 } @llvm.riscv.vleff.mask.nxv4f16(
     <vscale x 4 x half> %0,
     <vscale x 4 x half>* %1,
     <vscale x 4 x i1> %2,
     i32 %3)
+  %b = extractvalue { <vscale x 4 x half>, i32 } %a, 0
+  %c = extractvalue { <vscale x 4 x half>, i32 } %a, 1
+  store i32 %c, i32* %4
 
-  ret <vscale x 4 x half> %a
+  ret <vscale x 4 x half> %b
 }
 
-declare <vscale x 8 x half> @llvm.riscv.vleff.nxv8f16(
+declare { <vscale x 8 x half>, i32 } @llvm.riscv.vleff.nxv8f16(
   <vscale x 8 x half>*,
   i32);
 
-define <vscale x 8 x half> @intrinsic_vleff_v_nxv8f16_nxv8f16(<vscale x 8 x half>* %0, i32 %1) nounwind {
-; CHECK-LABEL: intrinsic_vleff_v_nxv8f16_nxv8f16:
+define <vscale x 8 x half> @intrinsic_vleff_v_nxv8half_nxv8f16(<vscale x 8 x half>* %0, i32 %1, i32* %2) nounwind {
+; CHECK-LABEL: intrinsic_vleff_v_nxv8half_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,m2,ta,mu
 ; CHECK-NEXT:    vle16ff.v v8, (a0)
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sw a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vleff.nxv8f16(
+  %a = call { <vscale x 8 x half>, i32 } @llvm.riscv.vleff.nxv8f16(
     <vscale x 8 x half>* %0,
     i32 %1)
-
-  ret <vscale x 8 x half> %a
+  %b = extractvalue { <vscale x 8 x half>, i32 } %a, 0
+  %c = extractvalue { <vscale x 8 x half>, i32 } %a, 1
+  store i32 %c, i32* %2
+  ret <vscale x 8 x half> %b
 }
 
-declare <vscale x 8 x half> @llvm.riscv.vleff.mask.nxv8f16(
+declare { <vscale x 8 x half>, i32 } @llvm.riscv.vleff.mask.nxv8f16(
   <vscale x 8 x half>,
   <vscale x 8 x half>*,
   <vscale x 8 x i1>,
   i32);
 
-define <vscale x 8 x half> @intrinsic_vleff_mask_v_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half>* %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vleff_mask_v_nxv8f16_nxv8f16:
+define <vscale x 8 x half> @intrinsic_vleff_mask_v_nxv8half_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half>* %1, <vscale x 8 x i1> %2, i32 %3, i32* %4) nounwind {
+; CHECK-LABEL: intrinsic_vleff_mask_v_nxv8half_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,m2,tu,mu
 ; CHECK-NEXT:    vle16ff.v v8, (a0), v0.t
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sw a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vleff.mask.nxv8f16(
+  %a = call { <vscale x 8 x half>, i32 } @llvm.riscv.vleff.mask.nxv8f16(
     <vscale x 8 x half> %0,
     <vscale x 8 x half>* %1,
     <vscale x 8 x i1> %2,
     i32 %3)
+  %b = extractvalue { <vscale x 8 x half>, i32 } %a, 0
+  %c = extractvalue { <vscale x 8 x half>, i32 } %a, 1
+  store i32 %c, i32* %4
 
-  ret <vscale x 8 x half> %a
+  ret <vscale x 8 x half> %b
 }
 
-declare <vscale x 16 x half> @llvm.riscv.vleff.nxv16f16(
+declare { <vscale x 16 x half>, i32 } @llvm.riscv.vleff.nxv16f16(
   <vscale x 16 x half>*,
   i32);
 
-define <vscale x 16 x half> @intrinsic_vleff_v_nxv16f16_nxv16f16(<vscale x 16 x half>* %0, i32 %1) nounwind {
-; CHECK-LABEL: intrinsic_vleff_v_nxv16f16_nxv16f16:
+define <vscale x 16 x half> @intrinsic_vleff_v_nxv16half_nxv16f16(<vscale x 16 x half>* %0, i32 %1, i32* %2) nounwind {
+; CHECK-LABEL: intrinsic_vleff_v_nxv16half_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,m4,ta,mu
 ; CHECK-NEXT:    vle16ff.v v8, (a0)
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sw a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vleff.nxv16f16(
+  %a = call { <vscale x 16 x half>, i32 } @llvm.riscv.vleff.nxv16f16(
     <vscale x 16 x half>* %0,
     i32 %1)
-
-  ret <vscale x 16 x half> %a
+  %b = extractvalue { <vscale x 16 x half>, i32 } %a, 0
+  %c = extractvalue { <vscale x 16 x half>, i32 } %a, 1
+  store i32 %c, i32* %2
+  ret <vscale x 16 x half> %b
 }
 
-declare <vscale x 16 x half> @llvm.riscv.vleff.mask.nxv16f16(
+declare { <vscale x 16 x half>, i32 } @llvm.riscv.vleff.mask.nxv16f16(
   <vscale x 16 x half>,
   <vscale x 16 x half>*,
   <vscale x 16 x i1>,
   i32);
 
-define <vscale x 16 x half> @intrinsic_vleff_mask_v_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half>* %1, <vscale x 16 x i1> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vleff_mask_v_nxv16f16_nxv16f16:
+define <vscale x 16 x half> @intrinsic_vleff_mask_v_nxv16half_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half>* %1, <vscale x 16 x i1> %2, i32 %3, i32* %4) nounwind {
+; CHECK-LABEL: intrinsic_vleff_mask_v_nxv16half_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,m4,tu,mu
 ; CHECK-NEXT:    vle16ff.v v8, (a0), v0.t
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sw a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vleff.mask.nxv16f16(
+  %a = call { <vscale x 16 x half>, i32 } @llvm.riscv.vleff.mask.nxv16f16(
     <vscale x 16 x half> %0,
     <vscale x 16 x half>* %1,
     <vscale x 16 x i1> %2,
     i32 %3)
+  %b = extractvalue { <vscale x 16 x half>, i32 } %a, 0
+  %c = extractvalue { <vscale x 16 x half>, i32 } %a, 1
+  store i32 %c, i32* %4
 
-  ret <vscale x 16 x half> %a
+  ret <vscale x 16 x half> %b
 }
 
-declare <vscale x 32 x half> @llvm.riscv.vleff.nxv32f16(
+declare { <vscale x 32 x half>, i32 } @llvm.riscv.vleff.nxv32f16(
   <vscale x 32 x half>*,
   i32);
 
-define <vscale x 32 x half> @intrinsic_vleff_v_nxv32f16_nxv32f16(<vscale x 32 x half>* %0, i32 %1) nounwind {
-; CHECK-LABEL: intrinsic_vleff_v_nxv32f16_nxv32f16:
+define <vscale x 32 x half> @intrinsic_vleff_v_nxv32half_nxv32f16(<vscale x 32 x half>* %0, i32 %1, i32* %2) nounwind {
+; CHECK-LABEL: intrinsic_vleff_v_nxv32half_nxv32f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,m8,ta,mu
 ; CHECK-NEXT:    vle16ff.v v8, (a0)
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sw a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 32 x half> @llvm.riscv.vleff.nxv32f16(
+  %a = call { <vscale x 32 x half>, i32 } @llvm.riscv.vleff.nxv32f16(
     <vscale x 32 x half>* %0,
     i32 %1)
-
-  ret <vscale x 32 x half> %a
+  %b = extractvalue { <vscale x 32 x half>, i32 } %a, 0
+  %c = extractvalue { <vscale x 32 x half>, i32 } %a, 1
+  store i32 %c, i32* %2
+  ret <vscale x 32 x half> %b
 }
 
-declare <vscale x 32 x half> @llvm.riscv.vleff.mask.nxv32f16(
+declare { <vscale x 32 x half>, i32 } @llvm.riscv.vleff.mask.nxv32f16(
   <vscale x 32 x half>,
   <vscale x 32 x half>*,
   <vscale x 32 x i1>,
   i32);
 
-define <vscale x 32 x half> @intrinsic_vleff_mask_v_nxv32f16_nxv32f16(<vscale x 32 x half> %0, <vscale x 32 x half>* %1, <vscale x 32 x i1> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vleff_mask_v_nxv32f16_nxv32f16:
+define <vscale x 32 x half> @intrinsic_vleff_mask_v_nxv32half_nxv32f16(<vscale x 32 x half> %0, <vscale x 32 x half>* %1, <vscale x 32 x i1> %2, i32 %3, i32* %4) nounwind {
+; CHECK-LABEL: intrinsic_vleff_mask_v_nxv32half_nxv32f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,m8,tu,mu
 ; CHECK-NEXT:    vle16ff.v v8, (a0), v0.t
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sw a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 32 x half> @llvm.riscv.vleff.mask.nxv32f16(
+  %a = call { <vscale x 32 x half>, i32 } @llvm.riscv.vleff.mask.nxv32f16(
     <vscale x 32 x half> %0,
     <vscale x 32 x half>* %1,
     <vscale x 32 x i1> %2,
     i32 %3)
+  %b = extractvalue { <vscale x 32 x half>, i32 } %a, 0
+  %c = extractvalue { <vscale x 32 x half>, i32 } %a, 1
+  store i32 %c, i32* %4
 
-  ret <vscale x 32 x half> %a
+  ret <vscale x 32 x half> %b
 }
 
-declare <vscale x 1 x i8> @llvm.riscv.vleff.nxv1i8(
+declare { <vscale x 1 x i8>, i32 } @llvm.riscv.vleff.nxv1i8(
   <vscale x 1 x i8>*,
   i32);
 
-define <vscale x 1 x i8> @intrinsic_vleff_v_nxv1i8_nxv1i8(<vscale x 1 x i8>* %0, i32 %1) nounwind {
+define <vscale x 1 x i8> @intrinsic_vleff_v_nxv1i8_nxv1i8(<vscale x 1 x i8>* %0, i32 %1, i32* %2) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_v_nxv1i8_nxv1i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e8,mf8,ta,mu
 ; CHECK-NEXT:    vle8ff.v v8, (a0)
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sw a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 1 x i8> @llvm.riscv.vleff.nxv1i8(
+  %a = call { <vscale x 1 x i8>, i32 } @llvm.riscv.vleff.nxv1i8(
     <vscale x 1 x i8>* %0,
     i32 %1)
-
-  ret <vscale x 1 x i8> %a
+  %b = extractvalue { <vscale x 1 x i8>, i32 } %a, 0
+  %c = extractvalue { <vscale x 1 x i8>, i32 } %a, 1
+  store i32 %c, i32* %2
+  ret <vscale x 1 x i8> %b
 }
 
-declare <vscale x 1 x i8> @llvm.riscv.vleff.mask.nxv1i8(
+declare { <vscale x 1 x i8>, i32 } @llvm.riscv.vleff.mask.nxv1i8(
   <vscale x 1 x i8>,
   <vscale x 1 x i8>*,
   <vscale x 1 x i1>,
   i32);
 
-define <vscale x 1 x i8> @intrinsic_vleff_mask_v_nxv1i8_nxv1i8(<vscale x 1 x i8> %0, <vscale x 1 x i8>* %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
+define <vscale x 1 x i8> @intrinsic_vleff_mask_v_nxv1i8_nxv1i8(<vscale x 1 x i8> %0, <vscale x 1 x i8>* %1, <vscale x 1 x i1> %2, i32 %3, i32* %4) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_mask_v_nxv1i8_nxv1i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e8,mf8,tu,mu
 ; CHECK-NEXT:    vle8ff.v v8, (a0), v0.t
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sw a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 1 x i8> @llvm.riscv.vleff.mask.nxv1i8(
+  %a = call { <vscale x 1 x i8>, i32 } @llvm.riscv.vleff.mask.nxv1i8(
     <vscale x 1 x i8> %0,
     <vscale x 1 x i8>* %1,
     <vscale x 1 x i1> %2,
     i32 %3)
+  %b = extractvalue { <vscale x 1 x i8>, i32 } %a, 0
+  %c = extractvalue { <vscale x 1 x i8>, i32 } %a, 1
+  store i32 %c, i32* %4
 
-  ret <vscale x 1 x i8> %a
+  ret <vscale x 1 x i8> %b
 }
 
-declare <vscale x 2 x i8> @llvm.riscv.vleff.nxv2i8(
+declare { <vscale x 2 x i8>, i32 } @llvm.riscv.vleff.nxv2i8(
   <vscale x 2 x i8>*,
   i32);
 
-define <vscale x 2 x i8> @intrinsic_vleff_v_nxv2i8_nxv2i8(<vscale x 2 x i8>* %0, i32 %1) nounwind {
+define <vscale x 2 x i8> @intrinsic_vleff_v_nxv2i8_nxv2i8(<vscale x 2 x i8>* %0, i32 %1, i32* %2) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_v_nxv2i8_nxv2i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e8,mf4,ta,mu
 ; CHECK-NEXT:    vle8ff.v v8, (a0)
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sw a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 2 x i8> @llvm.riscv.vleff.nxv2i8(
+  %a = call { <vscale x 2 x i8>, i32 } @llvm.riscv.vleff.nxv2i8(
     <vscale x 2 x i8>* %0,
     i32 %1)
-
-  ret <vscale x 2 x i8> %a
+  %b = extractvalue { <vscale x 2 x i8>, i32 } %a, 0
+  %c = extractvalue { <vscale x 2 x i8>, i32 } %a, 1
+  store i32 %c, i32* %2
+  ret <vscale x 2 x i8> %b
 }
 
-declare <vscale x 2 x i8> @llvm.riscv.vleff.mask.nxv2i8(
+declare { <vscale x 2 x i8>, i32 } @llvm.riscv.vleff.mask.nxv2i8(
   <vscale x 2 x i8>,
   <vscale x 2 x i8>*,
   <vscale x 2 x i1>,
   i32);
 
-define <vscale x 2 x i8> @intrinsic_vleff_mask_v_nxv2i8_nxv2i8(<vscale x 2 x i8> %0, <vscale x 2 x i8>* %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
+define <vscale x 2 x i8> @intrinsic_vleff_mask_v_nxv2i8_nxv2i8(<vscale x 2 x i8> %0, <vscale x 2 x i8>* %1, <vscale x 2 x i1> %2, i32 %3, i32* %4) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_mask_v_nxv2i8_nxv2i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e8,mf4,tu,mu
 ; CHECK-NEXT:    vle8ff.v v8, (a0), v0.t
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sw a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 2 x i8> @llvm.riscv.vleff.mask.nxv2i8(
+  %a = call { <vscale x 2 x i8>, i32 } @llvm.riscv.vleff.mask.nxv2i8(
     <vscale x 2 x i8> %0,
     <vscale x 2 x i8>* %1,
     <vscale x 2 x i1> %2,
     i32 %3)
+  %b = extractvalue { <vscale x 2 x i8>, i32 } %a, 0
+  %c = extractvalue { <vscale x 2 x i8>, i32 } %a, 1
+  store i32 %c, i32* %4
 
-  ret <vscale x 2 x i8> %a
+  ret <vscale x 2 x i8> %b
 }
 
-declare <vscale x 4 x i8> @llvm.riscv.vleff.nxv4i8(
+declare { <vscale x 4 x i8>, i32 } @llvm.riscv.vleff.nxv4i8(
   <vscale x 4 x i8>*,
   i32);
 
-define <vscale x 4 x i8> @intrinsic_vleff_v_nxv4i8_nxv4i8(<vscale x 4 x i8>* %0, i32 %1) nounwind {
+define <vscale x 4 x i8> @intrinsic_vleff_v_nxv4i8_nxv4i8(<vscale x 4 x i8>* %0, i32 %1, i32* %2) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_v_nxv4i8_nxv4i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e8,mf2,ta,mu
 ; CHECK-NEXT:    vle8ff.v v8, (a0)
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sw a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 4 x i8> @llvm.riscv.vleff.nxv4i8(
+  %a = call { <vscale x 4 x i8>, i32 } @llvm.riscv.vleff.nxv4i8(
     <vscale x 4 x i8>* %0,
     i32 %1)
-
-  ret <vscale x 4 x i8> %a
+  %b = extractvalue { <vscale x 4 x i8>, i32 } %a, 0
+  %c = extractvalue { <vscale x 4 x i8>, i32 } %a, 1
+  store i32 %c, i32* %2
+  ret <vscale x 4 x i8> %b
 }
 
-declare <vscale x 4 x i8> @llvm.riscv.vleff.mask.nxv4i8(
+declare { <vscale x 4 x i8>, i32 } @llvm.riscv.vleff.mask.nxv4i8(
   <vscale x 4 x i8>,
   <vscale x 4 x i8>*,
   <vscale x 4 x i1>,
   i32);
 
-define <vscale x 4 x i8> @intrinsic_vleff_mask_v_nxv4i8_nxv4i8(<vscale x 4 x i8> %0, <vscale x 4 x i8>* %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
+define <vscale x 4 x i8> @intrinsic_vleff_mask_v_nxv4i8_nxv4i8(<vscale x 4 x i8> %0, <vscale x 4 x i8>* %1, <vscale x 4 x i1> %2, i32 %3, i32* %4) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_mask_v_nxv4i8_nxv4i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e8,mf2,tu,mu
 ; CHECK-NEXT:    vle8ff.v v8, (a0), v0.t
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sw a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 4 x i8> @llvm.riscv.vleff.mask.nxv4i8(
+  %a = call { <vscale x 4 x i8>, i32 } @llvm.riscv.vleff.mask.nxv4i8(
     <vscale x 4 x i8> %0,
     <vscale x 4 x i8>* %1,
     <vscale x 4 x i1> %2,
     i32 %3)
+  %b = extractvalue { <vscale x 4 x i8>, i32 } %a, 0
+  %c = extractvalue { <vscale x 4 x i8>, i32 } %a, 1
+  store i32 %c, i32* %4
 
-  ret <vscale x 4 x i8> %a
+  ret <vscale x 4 x i8> %b
 }
 
-declare <vscale x 8 x i8> @llvm.riscv.vleff.nxv8i8(
+declare { <vscale x 8 x i8>, i32 } @llvm.riscv.vleff.nxv8i8(
   <vscale x 8 x i8>*,
   i32);
 
-define <vscale x 8 x i8> @intrinsic_vleff_v_nxv8i8_nxv8i8(<vscale x 8 x i8>* %0, i32 %1) nounwind {
+define <vscale x 8 x i8> @intrinsic_vleff_v_nxv8i8_nxv8i8(<vscale x 8 x i8>* %0, i32 %1, i32* %2) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_v_nxv8i8_nxv8i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e8,m1,ta,mu
 ; CHECK-NEXT:    vle8ff.v v8, (a0)
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sw a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 8 x i8> @llvm.riscv.vleff.nxv8i8(
+  %a = call { <vscale x 8 x i8>, i32 } @llvm.riscv.vleff.nxv8i8(
     <vscale x 8 x i8>* %0,
     i32 %1)
-
-  ret <vscale x 8 x i8> %a
+  %b = extractvalue { <vscale x 8 x i8>, i32 } %a, 0
+  %c = extractvalue { <vscale x 8 x i8>, i32 } %a, 1
+  store i32 %c, i32* %2
+  ret <vscale x 8 x i8> %b
 }
 
-declare <vscale x 8 x i8> @llvm.riscv.vleff.mask.nxv8i8(
+declare { <vscale x 8 x i8>, i32 } @llvm.riscv.vleff.mask.nxv8i8(
   <vscale x 8 x i8>,
   <vscale x 8 x i8>*,
   <vscale x 8 x i1>,
   i32);
 
-define <vscale x 8 x i8> @intrinsic_vleff_mask_v_nxv8i8_nxv8i8(<vscale x 8 x i8> %0, <vscale x 8 x i8>* %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
+define <vscale x 8 x i8> @intrinsic_vleff_mask_v_nxv8i8_nxv8i8(<vscale x 8 x i8> %0, <vscale x 8 x i8>* %1, <vscale x 8 x i1> %2, i32 %3, i32* %4) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_mask_v_nxv8i8_nxv8i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e8,m1,tu,mu
 ; CHECK-NEXT:    vle8ff.v v8, (a0), v0.t
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sw a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 8 x i8> @llvm.riscv.vleff.mask.nxv8i8(
+  %a = call { <vscale x 8 x i8>, i32 } @llvm.riscv.vleff.mask.nxv8i8(
     <vscale x 8 x i8> %0,
     <vscale x 8 x i8>* %1,
     <vscale x 8 x i1> %2,
     i32 %3)
+  %b = extractvalue { <vscale x 8 x i8>, i32 } %a, 0
+  %c = extractvalue { <vscale x 8 x i8>, i32 } %a, 1
+  store i32 %c, i32* %4
 
-  ret <vscale x 8 x i8> %a
+  ret <vscale x 8 x i8> %b
 }
 
-declare <vscale x 16 x i8> @llvm.riscv.vleff.nxv16i8(
+declare { <vscale x 16 x i8>, i32 } @llvm.riscv.vleff.nxv16i8(
   <vscale x 16 x i8>*,
   i32);
 
-define <vscale x 16 x i8> @intrinsic_vleff_v_nxv16i8_nxv16i8(<vscale x 16 x i8>* %0, i32 %1) nounwind {
+define <vscale x 16 x i8> @intrinsic_vleff_v_nxv16i8_nxv16i8(<vscale x 16 x i8>* %0, i32 %1, i32* %2) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_v_nxv16i8_nxv16i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e8,m2,ta,mu
 ; CHECK-NEXT:    vle8ff.v v8, (a0)
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sw a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 16 x i8> @llvm.riscv.vleff.nxv16i8(
+  %a = call { <vscale x 16 x i8>, i32 } @llvm.riscv.vleff.nxv16i8(
     <vscale x 16 x i8>* %0,
     i32 %1)
-
-  ret <vscale x 16 x i8> %a
+  %b = extractvalue { <vscale x 16 x i8>, i32 } %a, 0
+  %c = extractvalue { <vscale x 16 x i8>, i32 } %a, 1
+  store i32 %c, i32* %2
+  ret <vscale x 16 x i8> %b
 }
 
-declare <vscale x 16 x i8> @llvm.riscv.vleff.mask.nxv16i8(
+declare { <vscale x 16 x i8>, i32 } @llvm.riscv.vleff.mask.nxv16i8(
   <vscale x 16 x i8>,
   <vscale x 16 x i8>*,
   <vscale x 16 x i1>,
   i32);
 
-define <vscale x 16 x i8> @intrinsic_vleff_mask_v_nxv16i8_nxv16i8(<vscale x 16 x i8> %0, <vscale x 16 x i8>* %1, <vscale x 16 x i1> %2, i32 %3) nounwind {
+define <vscale x 16 x i8> @intrinsic_vleff_mask_v_nxv16i8_nxv16i8(<vscale x 16 x i8> %0, <vscale x 16 x i8>* %1, <vscale x 16 x i1> %2, i32 %3, i32* %4) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_mask_v_nxv16i8_nxv16i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e8,m2,tu,mu
 ; CHECK-NEXT:    vle8ff.v v8, (a0), v0.t
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sw a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 16 x i8> @llvm.riscv.vleff.mask.nxv16i8(
+  %a = call { <vscale x 16 x i8>, i32 } @llvm.riscv.vleff.mask.nxv16i8(
     <vscale x 16 x i8> %0,
     <vscale x 16 x i8>* %1,
     <vscale x 16 x i1> %2,
     i32 %3)
+  %b = extractvalue { <vscale x 16 x i8>, i32 } %a, 0
+  %c = extractvalue { <vscale x 16 x i8>, i32 } %a, 1
+  store i32 %c, i32* %4
 
-  ret <vscale x 16 x i8> %a
+  ret <vscale x 16 x i8> %b
 }
 
-declare <vscale x 32 x i8> @llvm.riscv.vleff.nxv32i8(
+declare { <vscale x 32 x i8>, i32 } @llvm.riscv.vleff.nxv32i8(
   <vscale x 32 x i8>*,
   i32);
 
-define <vscale x 32 x i8> @intrinsic_vleff_v_nxv32i8_nxv32i8(<vscale x 32 x i8>* %0, i32 %1) nounwind {
+define <vscale x 32 x i8> @intrinsic_vleff_v_nxv32i8_nxv32i8(<vscale x 32 x i8>* %0, i32 %1, i32* %2) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_v_nxv32i8_nxv32i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e8,m4,ta,mu
 ; CHECK-NEXT:    vle8ff.v v8, (a0)
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sw a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 32 x i8> @llvm.riscv.vleff.nxv32i8(
+  %a = call { <vscale x 32 x i8>, i32 } @llvm.riscv.vleff.nxv32i8(
     <vscale x 32 x i8>* %0,
     i32 %1)
-
-  ret <vscale x 32 x i8> %a
+  %b = extractvalue { <vscale x 32 x i8>, i32 } %a, 0
+  %c = extractvalue { <vscale x 32 x i8>, i32 } %a, 1
+  store i32 %c, i32* %2
+  ret <vscale x 32 x i8> %b
 }
 
-declare <vscale x 32 x i8> @llvm.riscv.vleff.mask.nxv32i8(
+declare { <vscale x 32 x i8>, i32 } @llvm.riscv.vleff.mask.nxv32i8(
   <vscale x 32 x i8>,
   <vscale x 32 x i8>*,
   <vscale x 32 x i1>,
   i32);
 
-define <vscale x 32 x i8> @intrinsic_vleff_mask_v_nxv32i8_nxv32i8(<vscale x 32 x i8> %0, <vscale x 32 x i8>* %1, <vscale x 32 x i1> %2, i32 %3) nounwind {
+define <vscale x 32 x i8> @intrinsic_vleff_mask_v_nxv32i8_nxv32i8(<vscale x 32 x i8> %0, <vscale x 32 x i8>* %1, <vscale x 32 x i1> %2, i32 %3, i32* %4) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_mask_v_nxv32i8_nxv32i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e8,m4,tu,mu
 ; CHECK-NEXT:    vle8ff.v v8, (a0), v0.t
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sw a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 32 x i8> @llvm.riscv.vleff.mask.nxv32i8(
+  %a = call { <vscale x 32 x i8>, i32 } @llvm.riscv.vleff.mask.nxv32i8(
     <vscale x 32 x i8> %0,
     <vscale x 32 x i8>* %1,
     <vscale x 32 x i1> %2,
     i32 %3)
+  %b = extractvalue { <vscale x 32 x i8>, i32 } %a, 0
+  %c = extractvalue { <vscale x 32 x i8>, i32 } %a, 1
+  store i32 %c, i32* %4
 
-  ret <vscale x 32 x i8> %a
+  ret <vscale x 32 x i8> %b
 }
 
-declare <vscale x 64 x i8> @llvm.riscv.vleff.nxv64i8(
+declare { <vscale x 64 x i8>, i32 } @llvm.riscv.vleff.nxv64i8(
   <vscale x 64 x i8>*,
   i32);
 
-define <vscale x 64 x i8> @intrinsic_vleff_v_nxv64i8_nxv64i8(<vscale x 64 x i8>* %0, i32 %1) nounwind {
+define <vscale x 64 x i8> @intrinsic_vleff_v_nxv64i8_nxv64i8(<vscale x 64 x i8>* %0, i32 %1, i32* %2) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_v_nxv64i8_nxv64i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e8,m8,ta,mu
 ; CHECK-NEXT:    vle8ff.v v8, (a0)
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sw a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 64 x i8> @llvm.riscv.vleff.nxv64i8(
+  %a = call { <vscale x 64 x i8>, i32 } @llvm.riscv.vleff.nxv64i8(
     <vscale x 64 x i8>* %0,
     i32 %1)
-
-  ret <vscale x 64 x i8> %a
+  %b = extractvalue { <vscale x 64 x i8>, i32 } %a, 0
+  %c = extractvalue { <vscale x 64 x i8>, i32 } %a, 1
+  store i32 %c, i32* %2
+  ret <vscale x 64 x i8> %b
 }
 
-declare <vscale x 64 x i8> @llvm.riscv.vleff.mask.nxv64i8(
+declare { <vscale x 64 x i8>, i32 } @llvm.riscv.vleff.mask.nxv64i8(
   <vscale x 64 x i8>,
   <vscale x 64 x i8>*,
   <vscale x 64 x i1>,
   i32);
 
-define <vscale x 64 x i8> @intrinsic_vleff_mask_v_nxv64i8_nxv64i8(<vscale x 64 x i8> %0, <vscale x 64 x i8>* %1, <vscale x 64 x i1> %2, i32 %3) nounwind {
+define <vscale x 64 x i8> @intrinsic_vleff_mask_v_nxv64i8_nxv64i8(<vscale x 64 x i8> %0, <vscale x 64 x i8>* %1, <vscale x 64 x i1> %2, i32 %3, i32* %4) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_mask_v_nxv64i8_nxv64i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e8,m8,tu,mu
 ; CHECK-NEXT:    vle8ff.v v8, (a0), v0.t
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sw a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 64 x i8> @llvm.riscv.vleff.mask.nxv64i8(
+  %a = call { <vscale x 64 x i8>, i32 } @llvm.riscv.vleff.mask.nxv64i8(
     <vscale x 64 x i8> %0,
     <vscale x 64 x i8>* %1,
     <vscale x 64 x i1> %2,
     i32 %3)
+  %b = extractvalue { <vscale x 64 x i8>, i32 } %a, 0
+  %c = extractvalue { <vscale x 64 x i8>, i32 } %a, 1
+  store i32 %c, i32* %4
+
+  ret <vscale x 64 x i8> %b
+}
+
+; Test with the VL output unused
+define <vscale x 1 x double> @intrinsic_vleff_dead_vl(<vscale x 1 x double>* %0, i32 %1, i32* %2) nounwind {
+; CHECK-LABEL: intrinsic_vleff_dead_vl:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a1, a1, e64,m1,ta,mu
+; CHECK-NEXT:    vle64ff.v v8, (a0)
+; CHECK-NEXT:    ret
+entry:
+  %a = call { <vscale x 1 x double>, i32 } @llvm.riscv.vleff.nxv1f64(
+    <vscale x 1 x double>* %0,
+    i32 %1)
+  %b = extractvalue { <vscale x 1 x double>, i32 } %a, 0
+  ret <vscale x 1 x double> %b
+}
+
+define <vscale x 1 x double> @intrinsic_vleff_mask_dead_vl(<vscale x 1 x double> %0, <vscale x 1 x double>* %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vleff_mask_dead_vl:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a1, a1, e64,m1,tu,mu
+; CHECK-NEXT:    vle64ff.v v8, (a0), v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call { <vscale x 1 x double>, i32 } @llvm.riscv.vleff.mask.nxv1f64(
+    <vscale x 1 x double> %0,
+    <vscale x 1 x double>* %1,
+    <vscale x 1 x i1> %2,
+    i32 %3)
+  %b = extractvalue { <vscale x 1 x double>, i32 } %a, 0
+
+  ret <vscale x 1 x double> %b
+}
+
+; Test with the loaded value unused
+define void @intrinsic_vleff_dead_value(<vscale x 1 x double>* %0, i32 %1, i32* %2) nounwind {
+; CHECK-LABEL: intrinsic_vleff_dead_value:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a1, a1, e64,m1,ta,mu
+; CHECK-NEXT:    vle64ff.v v25, (a0)
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sw a0, 0(a2)
+; CHECK-NEXT:    ret
+entry:
+  %a = call { <vscale x 1 x double>, i32 } @llvm.riscv.vleff.nxv1f64(
+    <vscale x 1 x double>* %0,
+    i32 %1)
+  %b = extractvalue { <vscale x 1 x double>, i32 } %a, 1
+  store i32 %b, i32* %2
+  ret void
+}
+
+define void @intrinsic_vleff_mask_dead_value(<vscale x 1 x double> %0, <vscale x 1 x double>* %1, <vscale x 1 x i1> %2, i32 %3, i32* %4) nounwind {
+; CHECK-LABEL: intrinsic_vleff_mask_dead_value:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a1, a1, e64,m1,tu,mu
+; CHECK-NEXT:    vle64ff.v v8, (a0), v0.t
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sw a0, 0(a2)
+; CHECK-NEXT:    ret
+entry:
+  %a = call { <vscale x 1 x double>, i32 } @llvm.riscv.vleff.mask.nxv1f64(
+    <vscale x 1 x double> %0,
+    <vscale x 1 x double>* %1,
+    <vscale x 1 x i1> %2,
+    i32 %3)
+  %b = extractvalue { <vscale x 1 x double>, i32 } %a, 1
+  store i32 %b, i32* %4
+
+  ret void
+}
+
+; Test with both outputs dead. Make sure the vleff isn't deleted.
+define void @intrinsic_vleff_dead_all(<vscale x 1 x double>* %0, i32 %1, i32* %2) nounwind {
+; CHECK-LABEL: intrinsic_vleff_dead_all:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a1, a1, e64,m1,ta,mu
+; CHECK-NEXT:    vle64ff.v v25, (a0)
+; CHECK-NEXT:    ret
+entry:
+  %a = call { <vscale x 1 x double>, i32 } @llvm.riscv.vleff.nxv1f64(
+    <vscale x 1 x double>* %0,
+    i32 %1)
+  ret void
+}
+
+define void @intrinsic_vleff_mask_dead_all(<vscale x 1 x double> %0, <vscale x 1 x double>* %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vleff_mask_dead_all:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a1, a1, e64,m1,tu,mu
+; CHECK-NEXT:    vle64ff.v v8, (a0), v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call { <vscale x 1 x double>, i32 } @llvm.riscv.vleff.mask.nxv1f64(
+    <vscale x 1 x double> %0,
+    <vscale x 1 x double>* %1,
+    <vscale x 1 x i1> %2,
+    i32 %3)
 
-  ret <vscale x 64 x i8> %a
+  ret void
 }

diff  --git a/llvm/test/CodeGen/RISCV/rvv/vleff-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vleff-rv64.ll
index 458957a6f5c6..894d22af7f9d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vleff-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vleff-rv64.ll
@@ -1,1482 +1,1915 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+experimental-zfh,+f,+d -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
-declare <vscale x 1 x i64> @llvm.riscv.vleff.nxv1i64(
+declare { <vscale x 1 x i64>, i64 } @llvm.riscv.vleff.nxv1i64(
   <vscale x 1 x i64>*,
   i64);
 
-define <vscale x 1 x i64> @intrinsic_vleff_v_nxv1i64_nxv1i64(<vscale x 1 x i64>* %0, i64 %1) nounwind {
+define <vscale x 1 x i64> @intrinsic_vleff_v_nxv1i64_nxv1i64(<vscale x 1 x i64>* %0, i64 %1, i64* %2) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_v_nxv1i64_nxv1i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e64,m1,ta,mu
 ; CHECK-NEXT:    vle64ff.v v8, (a0)
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sd a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 1 x i64> @llvm.riscv.vleff.nxv1i64(
+  %a = call { <vscale x 1 x i64>, i64 } @llvm.riscv.vleff.nxv1i64(
     <vscale x 1 x i64>* %0,
     i64 %1)
-
-  ret <vscale x 1 x i64> %a
+  %b = extractvalue { <vscale x 1 x i64>, i64 } %a, 0
+  %c = extractvalue { <vscale x 1 x i64>, i64 } %a, 1
+  store i64 %c, i64* %2
+  ret <vscale x 1 x i64> %b
 }
 
-declare <vscale x 1 x i64> @llvm.riscv.vleff.mask.nxv1i64(
+declare { <vscale x 1 x i64>, i64 } @llvm.riscv.vleff.mask.nxv1i64(
   <vscale x 1 x i64>,
   <vscale x 1 x i64>*,
   <vscale x 1 x i1>,
   i64);
 
-define <vscale x 1 x i64> @intrinsic_vleff_mask_v_nxv1i64_nxv1i64(<vscale x 1 x i64> %0, <vscale x 1 x i64>* %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
+define <vscale x 1 x i64> @intrinsic_vleff_mask_v_nxv1i64_nxv1i64(<vscale x 1 x i64> %0, <vscale x 1 x i64>* %1, <vscale x 1 x i1> %2, i64 %3, i64* %4) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_mask_v_nxv1i64_nxv1i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e64,m1,tu,mu
 ; CHECK-NEXT:    vle64ff.v v8, (a0), v0.t
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sd a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 1 x i64> @llvm.riscv.vleff.mask.nxv1i64(
+  %a = call { <vscale x 1 x i64>, i64 } @llvm.riscv.vleff.mask.nxv1i64(
     <vscale x 1 x i64> %0,
     <vscale x 1 x i64>* %1,
     <vscale x 1 x i1> %2,
     i64 %3)
+  %b = extractvalue { <vscale x 1 x i64>, i64 } %a, 0
+  %c = extractvalue { <vscale x 1 x i64>, i64 } %a, 1
+  store i64 %c, i64* %4
 
-  ret <vscale x 1 x i64> %a
+  ret <vscale x 1 x i64> %b
 }
 
-declare <vscale x 2 x i64> @llvm.riscv.vleff.nxv2i64(
+declare { <vscale x 2 x i64>, i64 } @llvm.riscv.vleff.nxv2i64(
   <vscale x 2 x i64>*,
   i64);
 
-define <vscale x 2 x i64> @intrinsic_vleff_v_nxv2i64_nxv2i64(<vscale x 2 x i64>* %0, i64 %1) nounwind {
+define <vscale x 2 x i64> @intrinsic_vleff_v_nxv2i64_nxv2i64(<vscale x 2 x i64>* %0, i64 %1, i64* %2) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_v_nxv2i64_nxv2i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e64,m2,ta,mu
 ; CHECK-NEXT:    vle64ff.v v8, (a0)
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sd a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 2 x i64> @llvm.riscv.vleff.nxv2i64(
+  %a = call { <vscale x 2 x i64>, i64 } @llvm.riscv.vleff.nxv2i64(
     <vscale x 2 x i64>* %0,
     i64 %1)
-
-  ret <vscale x 2 x i64> %a
+  %b = extractvalue { <vscale x 2 x i64>, i64 } %a, 0
+  %c = extractvalue { <vscale x 2 x i64>, i64 } %a, 1
+  store i64 %c, i64* %2
+  ret <vscale x 2 x i64> %b
 }
 
-declare <vscale x 2 x i64> @llvm.riscv.vleff.mask.nxv2i64(
+declare { <vscale x 2 x i64>, i64 } @llvm.riscv.vleff.mask.nxv2i64(
   <vscale x 2 x i64>,
   <vscale x 2 x i64>*,
   <vscale x 2 x i1>,
   i64);
 
-define <vscale x 2 x i64> @intrinsic_vleff_mask_v_nxv2i64_nxv2i64(<vscale x 2 x i64> %0, <vscale x 2 x i64>* %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
+define <vscale x 2 x i64> @intrinsic_vleff_mask_v_nxv2i64_nxv2i64(<vscale x 2 x i64> %0, <vscale x 2 x i64>* %1, <vscale x 2 x i1> %2, i64 %3, i64* %4) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_mask_v_nxv2i64_nxv2i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e64,m2,tu,mu
 ; CHECK-NEXT:    vle64ff.v v8, (a0), v0.t
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sd a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 2 x i64> @llvm.riscv.vleff.mask.nxv2i64(
+  %a = call { <vscale x 2 x i64>, i64 } @llvm.riscv.vleff.mask.nxv2i64(
     <vscale x 2 x i64> %0,
     <vscale x 2 x i64>* %1,
     <vscale x 2 x i1> %2,
     i64 %3)
+  %b = extractvalue { <vscale x 2 x i64>, i64 } %a, 0
+  %c = extractvalue { <vscale x 2 x i64>, i64 } %a, 1
+  store i64 %c, i64* %4
 
-  ret <vscale x 2 x i64> %a
+  ret <vscale x 2 x i64> %b
 }
 
-declare <vscale x 4 x i64> @llvm.riscv.vleff.nxv4i64(
+declare { <vscale x 4 x i64>, i64 } @llvm.riscv.vleff.nxv4i64(
   <vscale x 4 x i64>*,
   i64);
 
-define <vscale x 4 x i64> @intrinsic_vleff_v_nxv4i64_nxv4i64(<vscale x 4 x i64>* %0, i64 %1) nounwind {
+define <vscale x 4 x i64> @intrinsic_vleff_v_nxv4i64_nxv4i64(<vscale x 4 x i64>* %0, i64 %1, i64* %2) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_v_nxv4i64_nxv4i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e64,m4,ta,mu
 ; CHECK-NEXT:    vle64ff.v v8, (a0)
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sd a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 4 x i64> @llvm.riscv.vleff.nxv4i64(
+  %a = call { <vscale x 4 x i64>, i64 } @llvm.riscv.vleff.nxv4i64(
     <vscale x 4 x i64>* %0,
     i64 %1)
-
-  ret <vscale x 4 x i64> %a
+  %b = extractvalue { <vscale x 4 x i64>, i64 } %a, 0
+  %c = extractvalue { <vscale x 4 x i64>, i64 } %a, 1
+  store i64 %c, i64* %2
+  ret <vscale x 4 x i64> %b
 }
 
-declare <vscale x 4 x i64> @llvm.riscv.vleff.mask.nxv4i64(
+declare { <vscale x 4 x i64>, i64 } @llvm.riscv.vleff.mask.nxv4i64(
   <vscale x 4 x i64>,
   <vscale x 4 x i64>*,
   <vscale x 4 x i1>,
   i64);
 
-define <vscale x 4 x i64> @intrinsic_vleff_mask_v_nxv4i64_nxv4i64(<vscale x 4 x i64> %0, <vscale x 4 x i64>* %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
+define <vscale x 4 x i64> @intrinsic_vleff_mask_v_nxv4i64_nxv4i64(<vscale x 4 x i64> %0, <vscale x 4 x i64>* %1, <vscale x 4 x i1> %2, i64 %3, i64* %4) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_mask_v_nxv4i64_nxv4i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e64,m4,tu,mu
 ; CHECK-NEXT:    vle64ff.v v8, (a0), v0.t
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sd a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 4 x i64> @llvm.riscv.vleff.mask.nxv4i64(
+  %a = call { <vscale x 4 x i64>, i64 } @llvm.riscv.vleff.mask.nxv4i64(
     <vscale x 4 x i64> %0,
     <vscale x 4 x i64>* %1,
     <vscale x 4 x i1> %2,
     i64 %3)
+  %b = extractvalue { <vscale x 4 x i64>, i64 } %a, 0
+  %c = extractvalue { <vscale x 4 x i64>, i64 } %a, 1
+  store i64 %c, i64* %4
 
-  ret <vscale x 4 x i64> %a
+  ret <vscale x 4 x i64> %b
 }
 
-declare <vscale x 8 x i64> @llvm.riscv.vleff.nxv8i64(
+declare { <vscale x 8 x i64>, i64 } @llvm.riscv.vleff.nxv8i64(
   <vscale x 8 x i64>*,
   i64);
 
-define <vscale x 8 x i64> @intrinsic_vleff_v_nxv8i64_nxv8i64(<vscale x 8 x i64>* %0, i64 %1) nounwind {
+define <vscale x 8 x i64> @intrinsic_vleff_v_nxv8i64_nxv8i64(<vscale x 8 x i64>* %0, i64 %1, i64* %2) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_v_nxv8i64_nxv8i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e64,m8,ta,mu
 ; CHECK-NEXT:    vle64ff.v v8, (a0)
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sd a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 8 x i64> @llvm.riscv.vleff.nxv8i64(
+  %a = call { <vscale x 8 x i64>, i64 } @llvm.riscv.vleff.nxv8i64(
     <vscale x 8 x i64>* %0,
     i64 %1)
-
-  ret <vscale x 8 x i64> %a
+  %b = extractvalue { <vscale x 8 x i64>, i64 } %a, 0
+  %c = extractvalue { <vscale x 8 x i64>, i64 } %a, 1
+  store i64 %c, i64* %2
+  ret <vscale x 8 x i64> %b
 }
 
-declare <vscale x 8 x i64> @llvm.riscv.vleff.mask.nxv8i64(
+declare { <vscale x 8 x i64>, i64 } @llvm.riscv.vleff.mask.nxv8i64(
   <vscale x 8 x i64>,
   <vscale x 8 x i64>*,
   <vscale x 8 x i1>,
   i64);
 
-define <vscale x 8 x i64> @intrinsic_vleff_mask_v_nxv8i64_nxv8i64(<vscale x 8 x i64> %0, <vscale x 8 x i64>* %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
+define <vscale x 8 x i64> @intrinsic_vleff_mask_v_nxv8i64_nxv8i64(<vscale x 8 x i64> %0, <vscale x 8 x i64>* %1, <vscale x 8 x i1> %2, i64 %3, i64* %4) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_mask_v_nxv8i64_nxv8i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e64,m8,tu,mu
 ; CHECK-NEXT:    vle64ff.v v8, (a0), v0.t
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sd a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 8 x i64> @llvm.riscv.vleff.mask.nxv8i64(
+  %a = call { <vscale x 8 x i64>, i64 } @llvm.riscv.vleff.mask.nxv8i64(
     <vscale x 8 x i64> %0,
     <vscale x 8 x i64>* %1,
     <vscale x 8 x i1> %2,
     i64 %3)
+  %b = extractvalue { <vscale x 8 x i64>, i64 } %a, 0
+  %c = extractvalue { <vscale x 8 x i64>, i64 } %a, 1
+  store i64 %c, i64* %4
 
-  ret <vscale x 8 x i64> %a
+  ret <vscale x 8 x i64> %b
 }
 
-declare <vscale x 1 x double> @llvm.riscv.vleff.nxv1f64(
+declare { <vscale x 1 x double>, i64 } @llvm.riscv.vleff.nxv1f64(
   <vscale x 1 x double>*,
   i64);
 
-define <vscale x 1 x double> @intrinsic_vleff_v_nxv1f64_nxv1f64(<vscale x 1 x double>* %0, i64 %1) nounwind {
+define <vscale x 1 x double> @intrinsic_vleff_v_nxv1f64_nxv1f64(<vscale x 1 x double>* %0, i64 %1, i64* %2) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_v_nxv1f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e64,m1,ta,mu
 ; CHECK-NEXT:    vle64ff.v v8, (a0)
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sd a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vleff.nxv1f64(
+  %a = call { <vscale x 1 x double>, i64 } @llvm.riscv.vleff.nxv1f64(
     <vscale x 1 x double>* %0,
     i64 %1)
-
-  ret <vscale x 1 x double> %a
+  %b = extractvalue { <vscale x 1 x double>, i64 } %a, 0
+  %c = extractvalue { <vscale x 1 x double>, i64 } %a, 1
+  store i64 %c, i64* %2
+  ret <vscale x 1 x double> %b
 }
 
-declare <vscale x 1 x double> @llvm.riscv.vleff.mask.nxv1f64(
+declare { <vscale x 1 x double>, i64 } @llvm.riscv.vleff.mask.nxv1f64(
   <vscale x 1 x double>,
   <vscale x 1 x double>*,
   <vscale x 1 x i1>,
   i64);
 
-define <vscale x 1 x double> @intrinsic_vleff_mask_v_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double>* %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
+define <vscale x 1 x double> @intrinsic_vleff_mask_v_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double>* %1, <vscale x 1 x i1> %2, i64 %3, i64* %4) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_mask_v_nxv1f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e64,m1,tu,mu
 ; CHECK-NEXT:    vle64ff.v v8, (a0), v0.t
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sd a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vleff.mask.nxv1f64(
+  %a = call { <vscale x 1 x double>, i64 } @llvm.riscv.vleff.mask.nxv1f64(
     <vscale x 1 x double> %0,
     <vscale x 1 x double>* %1,
     <vscale x 1 x i1> %2,
     i64 %3)
+  %b = extractvalue { <vscale x 1 x double>, i64 } %a, 0
+  %c = extractvalue { <vscale x 1 x double>, i64 } %a, 1
+  store i64 %c, i64* %4
 
-  ret <vscale x 1 x double> %a
+  ret <vscale x 1 x double> %b
 }
 
-declare <vscale x 2 x double> @llvm.riscv.vleff.nxv2f64(
+declare { <vscale x 2 x double>, i64 } @llvm.riscv.vleff.nxv2f64(
   <vscale x 2 x double>*,
   i64);
 
-define <vscale x 2 x double> @intrinsic_vleff_v_nxv2f64_nxv2f64(<vscale x 2 x double>* %0, i64 %1) nounwind {
+define <vscale x 2 x double> @intrinsic_vleff_v_nxv2f64_nxv2f64(<vscale x 2 x double>* %0, i64 %1, i64* %2) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_v_nxv2f64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e64,m2,ta,mu
 ; CHECK-NEXT:    vle64ff.v v8, (a0)
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sd a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vleff.nxv2f64(
+  %a = call { <vscale x 2 x double>, i64 } @llvm.riscv.vleff.nxv2f64(
     <vscale x 2 x double>* %0,
     i64 %1)
-
-  ret <vscale x 2 x double> %a
+  %b = extractvalue { <vscale x 2 x double>, i64 } %a, 0
+  %c = extractvalue { <vscale x 2 x double>, i64 } %a, 1
+  store i64 %c, i64* %2
+  ret <vscale x 2 x double> %b
 }
 
-declare <vscale x 2 x double> @llvm.riscv.vleff.mask.nxv2f64(
+declare { <vscale x 2 x double>, i64 } @llvm.riscv.vleff.mask.nxv2f64(
   <vscale x 2 x double>,
   <vscale x 2 x double>*,
   <vscale x 2 x i1>,
   i64);
 
-define <vscale x 2 x double> @intrinsic_vleff_mask_v_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double>* %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
+define <vscale x 2 x double> @intrinsic_vleff_mask_v_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double>* %1, <vscale x 2 x i1> %2, i64 %3, i64* %4) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_mask_v_nxv2f64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e64,m2,tu,mu
 ; CHECK-NEXT:    vle64ff.v v8, (a0), v0.t
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sd a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vleff.mask.nxv2f64(
+  %a = call { <vscale x 2 x double>, i64 } @llvm.riscv.vleff.mask.nxv2f64(
     <vscale x 2 x double> %0,
     <vscale x 2 x double>* %1,
     <vscale x 2 x i1> %2,
     i64 %3)
+  %b = extractvalue { <vscale x 2 x double>, i64 } %a, 0
+  %c = extractvalue { <vscale x 2 x double>, i64 } %a, 1
+  store i64 %c, i64* %4
 
-  ret <vscale x 2 x double> %a
+  ret <vscale x 2 x double> %b
 }
 
-declare <vscale x 4 x double> @llvm.riscv.vleff.nxv4f64(
+declare { <vscale x 4 x double>, i64 } @llvm.riscv.vleff.nxv4f64(
   <vscale x 4 x double>*,
   i64);
 
-define <vscale x 4 x double> @intrinsic_vleff_v_nxv4f64_nxv4f64(<vscale x 4 x double>* %0, i64 %1) nounwind {
+define <vscale x 4 x double> @intrinsic_vleff_v_nxv4f64_nxv4f64(<vscale x 4 x double>* %0, i64 %1, i64* %2) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_v_nxv4f64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e64,m4,ta,mu
 ; CHECK-NEXT:    vle64ff.v v8, (a0)
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sd a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vleff.nxv4f64(
+  %a = call { <vscale x 4 x double>, i64 } @llvm.riscv.vleff.nxv4f64(
     <vscale x 4 x double>* %0,
     i64 %1)
-
-  ret <vscale x 4 x double> %a
+  %b = extractvalue { <vscale x 4 x double>, i64 } %a, 0
+  %c = extractvalue { <vscale x 4 x double>, i64 } %a, 1
+  store i64 %c, i64* %2
+  ret <vscale x 4 x double> %b
 }
 
-declare <vscale x 4 x double> @llvm.riscv.vleff.mask.nxv4f64(
+declare { <vscale x 4 x double>, i64 } @llvm.riscv.vleff.mask.nxv4f64(
   <vscale x 4 x double>,
   <vscale x 4 x double>*,
   <vscale x 4 x i1>,
   i64);
 
-define <vscale x 4 x double> @intrinsic_vleff_mask_v_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double>* %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
+define <vscale x 4 x double> @intrinsic_vleff_mask_v_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double>* %1, <vscale x 4 x i1> %2, i64 %3, i64* %4) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_mask_v_nxv4f64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e64,m4,tu,mu
 ; CHECK-NEXT:    vle64ff.v v8, (a0), v0.t
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sd a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vleff.mask.nxv4f64(
+  %a = call { <vscale x 4 x double>, i64 } @llvm.riscv.vleff.mask.nxv4f64(
     <vscale x 4 x double> %0,
     <vscale x 4 x double>* %1,
     <vscale x 4 x i1> %2,
     i64 %3)
+  %b = extractvalue { <vscale x 4 x double>, i64 } %a, 0
+  %c = extractvalue { <vscale x 4 x double>, i64 } %a, 1
+  store i64 %c, i64* %4
 
-  ret <vscale x 4 x double> %a
+  ret <vscale x 4 x double> %b
 }
 
-declare <vscale x 8 x double> @llvm.riscv.vleff.nxv8f64(
+declare { <vscale x 8 x double>, i64 } @llvm.riscv.vleff.nxv8f64(
   <vscale x 8 x double>*,
   i64);
 
-define <vscale x 8 x double> @intrinsic_vleff_v_nxv8f64_nxv8f64(<vscale x 8 x double>* %0, i64 %1) nounwind {
+define <vscale x 8 x double> @intrinsic_vleff_v_nxv8f64_nxv8f64(<vscale x 8 x double>* %0, i64 %1, i64* %2) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_v_nxv8f64_nxv8f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e64,m8,ta,mu
 ; CHECK-NEXT:    vle64ff.v v8, (a0)
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sd a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vleff.nxv8f64(
+  %a = call { <vscale x 8 x double>, i64 } @llvm.riscv.vleff.nxv8f64(
     <vscale x 8 x double>* %0,
     i64 %1)
-
-  ret <vscale x 8 x double> %a
+  %b = extractvalue { <vscale x 8 x double>, i64 } %a, 0
+  %c = extractvalue { <vscale x 8 x double>, i64 } %a, 1
+  store i64 %c, i64* %2
+  ret <vscale x 8 x double> %b
 }
 
-declare <vscale x 8 x double> @llvm.riscv.vleff.mask.nxv8f64(
+declare { <vscale x 8 x double>, i64 } @llvm.riscv.vleff.mask.nxv8f64(
   <vscale x 8 x double>,
   <vscale x 8 x double>*,
   <vscale x 8 x i1>,
   i64);
 
-define <vscale x 8 x double> @intrinsic_vleff_mask_v_nxv8f64_nxv8f64(<vscale x 8 x double> %0, <vscale x 8 x double>* %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
+define <vscale x 8 x double> @intrinsic_vleff_mask_v_nxv8f64_nxv8f64(<vscale x 8 x double> %0, <vscale x 8 x double>* %1, <vscale x 8 x i1> %2, i64 %3, i64* %4) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_mask_v_nxv8f64_nxv8f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e64,m8,tu,mu
 ; CHECK-NEXT:    vle64ff.v v8, (a0), v0.t
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sd a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vleff.mask.nxv8f64(
+  %a = call { <vscale x 8 x double>, i64 } @llvm.riscv.vleff.mask.nxv8f64(
     <vscale x 8 x double> %0,
     <vscale x 8 x double>* %1,
     <vscale x 8 x i1> %2,
     i64 %3)
+  %b = extractvalue { <vscale x 8 x double>, i64 } %a, 0
+  %c = extractvalue { <vscale x 8 x double>, i64 } %a, 1
+  store i64 %c, i64* %4
 
-  ret <vscale x 8 x double> %a
+  ret <vscale x 8 x double> %b
 }
 
-declare <vscale x 1 x i32> @llvm.riscv.vleff.nxv1i32(
+declare { <vscale x 1 x i32>, i64 } @llvm.riscv.vleff.nxv1i32(
   <vscale x 1 x i32>*,
   i64);
 
-define <vscale x 1 x i32> @intrinsic_vleff_v_nxv1i32_nxv1i32(<vscale x 1 x i32>* %0, i64 %1) nounwind {
+define <vscale x 1 x i32> @intrinsic_vleff_v_nxv1i32_nxv1i32(<vscale x 1 x i32>* %0, i64 %1, i64* %2) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_v_nxv1i32_nxv1i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,mf2,ta,mu
 ; CHECK-NEXT:    vle32ff.v v8, (a0)
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sd a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 1 x i32> @llvm.riscv.vleff.nxv1i32(
+  %a = call { <vscale x 1 x i32>, i64 } @llvm.riscv.vleff.nxv1i32(
     <vscale x 1 x i32>* %0,
     i64 %1)
-
-  ret <vscale x 1 x i32> %a
+  %b = extractvalue { <vscale x 1 x i32>, i64 } %a, 0
+  %c = extractvalue { <vscale x 1 x i32>, i64 } %a, 1
+  store i64 %c, i64* %2
+  ret <vscale x 1 x i32> %b
 }
 
-declare <vscale x 1 x i32> @llvm.riscv.vleff.mask.nxv1i32(
+declare { <vscale x 1 x i32>, i64 } @llvm.riscv.vleff.mask.nxv1i32(
   <vscale x 1 x i32>,
   <vscale x 1 x i32>*,
   <vscale x 1 x i1>,
   i64);
 
-define <vscale x 1 x i32> @intrinsic_vleff_mask_v_nxv1i32_nxv1i32(<vscale x 1 x i32> %0, <vscale x 1 x i32>* %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
+define <vscale x 1 x i32> @intrinsic_vleff_mask_v_nxv1i32_nxv1i32(<vscale x 1 x i32> %0, <vscale x 1 x i32>* %1, <vscale x 1 x i1> %2, i64 %3, i64* %4) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_mask_v_nxv1i32_nxv1i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,mf2,tu,mu
 ; CHECK-NEXT:    vle32ff.v v8, (a0), v0.t
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sd a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 1 x i32> @llvm.riscv.vleff.mask.nxv1i32(
+  %a = call { <vscale x 1 x i32>, i64 } @llvm.riscv.vleff.mask.nxv1i32(
     <vscale x 1 x i32> %0,
     <vscale x 1 x i32>* %1,
     <vscale x 1 x i1> %2,
     i64 %3)
+  %b = extractvalue { <vscale x 1 x i32>, i64 } %a, 0
+  %c = extractvalue { <vscale x 1 x i32>, i64 } %a, 1
+  store i64 %c, i64* %4
 
-  ret <vscale x 1 x i32> %a
+  ret <vscale x 1 x i32> %b
 }
 
-declare <vscale x 2 x i32> @llvm.riscv.vleff.nxv2i32(
+declare { <vscale x 2 x i32>, i64 } @llvm.riscv.vleff.nxv2i32(
   <vscale x 2 x i32>*,
   i64);
 
-define <vscale x 2 x i32> @intrinsic_vleff_v_nxv2i32_nxv2i32(<vscale x 2 x i32>* %0, i64 %1) nounwind {
+define <vscale x 2 x i32> @intrinsic_vleff_v_nxv2i32_nxv2i32(<vscale x 2 x i32>* %0, i64 %1, i64* %2) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_v_nxv2i32_nxv2i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m1,ta,mu
 ; CHECK-NEXT:    vle32ff.v v8, (a0)
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sd a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 2 x i32> @llvm.riscv.vleff.nxv2i32(
+  %a = call { <vscale x 2 x i32>, i64 } @llvm.riscv.vleff.nxv2i32(
     <vscale x 2 x i32>* %0,
     i64 %1)
-
-  ret <vscale x 2 x i32> %a
+  %b = extractvalue { <vscale x 2 x i32>, i64 } %a, 0
+  %c = extractvalue { <vscale x 2 x i32>, i64 } %a, 1
+  store i64 %c, i64* %2
+  ret <vscale x 2 x i32> %b
 }
 
-declare <vscale x 2 x i32> @llvm.riscv.vleff.mask.nxv2i32(
+declare { <vscale x 2 x i32>, i64 } @llvm.riscv.vleff.mask.nxv2i32(
   <vscale x 2 x i32>,
   <vscale x 2 x i32>*,
   <vscale x 2 x i1>,
   i64);
 
-define <vscale x 2 x i32> @intrinsic_vleff_mask_v_nxv2i32_nxv2i32(<vscale x 2 x i32> %0, <vscale x 2 x i32>* %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
+define <vscale x 2 x i32> @intrinsic_vleff_mask_v_nxv2i32_nxv2i32(<vscale x 2 x i32> %0, <vscale x 2 x i32>* %1, <vscale x 2 x i1> %2, i64 %3, i64* %4) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_mask_v_nxv2i32_nxv2i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m1,tu,mu
 ; CHECK-NEXT:    vle32ff.v v8, (a0), v0.t
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sd a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 2 x i32> @llvm.riscv.vleff.mask.nxv2i32(
+  %a = call { <vscale x 2 x i32>, i64 } @llvm.riscv.vleff.mask.nxv2i32(
     <vscale x 2 x i32> %0,
     <vscale x 2 x i32>* %1,
     <vscale x 2 x i1> %2,
     i64 %3)
+  %b = extractvalue { <vscale x 2 x i32>, i64 } %a, 0
+  %c = extractvalue { <vscale x 2 x i32>, i64 } %a, 1
+  store i64 %c, i64* %4
 
-  ret <vscale x 2 x i32> %a
+  ret <vscale x 2 x i32> %b
 }
 
-declare <vscale x 4 x i32> @llvm.riscv.vleff.nxv4i32(
+declare { <vscale x 4 x i32>, i64 } @llvm.riscv.vleff.nxv4i32(
   <vscale x 4 x i32>*,
   i64);
 
-define <vscale x 4 x i32> @intrinsic_vleff_v_nxv4i32_nxv4i32(<vscale x 4 x i32>* %0, i64 %1) nounwind {
+define <vscale x 4 x i32> @intrinsic_vleff_v_nxv4i32_nxv4i32(<vscale x 4 x i32>* %0, i64 %1, i64* %2) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_v_nxv4i32_nxv4i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m2,ta,mu
 ; CHECK-NEXT:    vle32ff.v v8, (a0)
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sd a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 4 x i32> @llvm.riscv.vleff.nxv4i32(
+  %a = call { <vscale x 4 x i32>, i64 } @llvm.riscv.vleff.nxv4i32(
     <vscale x 4 x i32>* %0,
     i64 %1)
-
-  ret <vscale x 4 x i32> %a
+  %b = extractvalue { <vscale x 4 x i32>, i64 } %a, 0
+  %c = extractvalue { <vscale x 4 x i32>, i64 } %a, 1
+  store i64 %c, i64* %2
+  ret <vscale x 4 x i32> %b
 }
 
-declare <vscale x 4 x i32> @llvm.riscv.vleff.mask.nxv4i32(
+declare { <vscale x 4 x i32>, i64 } @llvm.riscv.vleff.mask.nxv4i32(
   <vscale x 4 x i32>,
   <vscale x 4 x i32>*,
   <vscale x 4 x i1>,
   i64);
 
-define <vscale x 4 x i32> @intrinsic_vleff_mask_v_nxv4i32_nxv4i32(<vscale x 4 x i32> %0, <vscale x 4 x i32>* %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
+define <vscale x 4 x i32> @intrinsic_vleff_mask_v_nxv4i32_nxv4i32(<vscale x 4 x i32> %0, <vscale x 4 x i32>* %1, <vscale x 4 x i1> %2, i64 %3, i64* %4) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_mask_v_nxv4i32_nxv4i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m2,tu,mu
 ; CHECK-NEXT:    vle32ff.v v8, (a0), v0.t
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sd a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 4 x i32> @llvm.riscv.vleff.mask.nxv4i32(
+  %a = call { <vscale x 4 x i32>, i64 } @llvm.riscv.vleff.mask.nxv4i32(
     <vscale x 4 x i32> %0,
     <vscale x 4 x i32>* %1,
     <vscale x 4 x i1> %2,
     i64 %3)
+  %b = extractvalue { <vscale x 4 x i32>, i64 } %a, 0
+  %c = extractvalue { <vscale x 4 x i32>, i64 } %a, 1
+  store i64 %c, i64* %4
 
-  ret <vscale x 4 x i32> %a
+  ret <vscale x 4 x i32> %b
 }
 
-declare <vscale x 8 x i32> @llvm.riscv.vleff.nxv8i32(
+declare { <vscale x 8 x i32>, i64 } @llvm.riscv.vleff.nxv8i32(
   <vscale x 8 x i32>*,
   i64);
 
-define <vscale x 8 x i32> @intrinsic_vleff_v_nxv8i32_nxv8i32(<vscale x 8 x i32>* %0, i64 %1) nounwind {
+define <vscale x 8 x i32> @intrinsic_vleff_v_nxv8i32_nxv8i32(<vscale x 8 x i32>* %0, i64 %1, i64* %2) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_v_nxv8i32_nxv8i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m4,ta,mu
 ; CHECK-NEXT:    vle32ff.v v8, (a0)
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sd a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 8 x i32> @llvm.riscv.vleff.nxv8i32(
+  %a = call { <vscale x 8 x i32>, i64 } @llvm.riscv.vleff.nxv8i32(
     <vscale x 8 x i32>* %0,
     i64 %1)
-
-  ret <vscale x 8 x i32> %a
+  %b = extractvalue { <vscale x 8 x i32>, i64 } %a, 0
+  %c = extractvalue { <vscale x 8 x i32>, i64 } %a, 1
+  store i64 %c, i64* %2
+  ret <vscale x 8 x i32> %b
 }
 
-declare <vscale x 8 x i32> @llvm.riscv.vleff.mask.nxv8i32(
+declare { <vscale x 8 x i32>, i64 } @llvm.riscv.vleff.mask.nxv8i32(
   <vscale x 8 x i32>,
   <vscale x 8 x i32>*,
   <vscale x 8 x i1>,
   i64);
 
-define <vscale x 8 x i32> @intrinsic_vleff_mask_v_nxv8i32_nxv8i32(<vscale x 8 x i32> %0, <vscale x 8 x i32>* %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
+define <vscale x 8 x i32> @intrinsic_vleff_mask_v_nxv8i32_nxv8i32(<vscale x 8 x i32> %0, <vscale x 8 x i32>* %1, <vscale x 8 x i1> %2, i64 %3, i64* %4) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_mask_v_nxv8i32_nxv8i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m4,tu,mu
 ; CHECK-NEXT:    vle32ff.v v8, (a0), v0.t
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sd a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 8 x i32> @llvm.riscv.vleff.mask.nxv8i32(
+  %a = call { <vscale x 8 x i32>, i64 } @llvm.riscv.vleff.mask.nxv8i32(
     <vscale x 8 x i32> %0,
     <vscale x 8 x i32>* %1,
     <vscale x 8 x i1> %2,
     i64 %3)
+  %b = extractvalue { <vscale x 8 x i32>, i64 } %a, 0
+  %c = extractvalue { <vscale x 8 x i32>, i64 } %a, 1
+  store i64 %c, i64* %4
 
-  ret <vscale x 8 x i32> %a
+  ret <vscale x 8 x i32> %b
 }
 
-declare <vscale x 16 x i32> @llvm.riscv.vleff.nxv16i32(
+declare { <vscale x 16 x i32>, i64 } @llvm.riscv.vleff.nxv16i32(
   <vscale x 16 x i32>*,
   i64);
 
-define <vscale x 16 x i32> @intrinsic_vleff_v_nxv16i32_nxv16i32(<vscale x 16 x i32>* %0, i64 %1) nounwind {
+define <vscale x 16 x i32> @intrinsic_vleff_v_nxv16i32_nxv16i32(<vscale x 16 x i32>* %0, i64 %1, i64* %2) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_v_nxv16i32_nxv16i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m8,ta,mu
 ; CHECK-NEXT:    vle32ff.v v8, (a0)
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sd a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 16 x i32> @llvm.riscv.vleff.nxv16i32(
+  %a = call { <vscale x 16 x i32>, i64 } @llvm.riscv.vleff.nxv16i32(
     <vscale x 16 x i32>* %0,
     i64 %1)
-
-  ret <vscale x 16 x i32> %a
+  %b = extractvalue { <vscale x 16 x i32>, i64 } %a, 0
+  %c = extractvalue { <vscale x 16 x i32>, i64 } %a, 1
+  store i64 %c, i64* %2
+  ret <vscale x 16 x i32> %b
 }
 
-declare <vscale x 16 x i32> @llvm.riscv.vleff.mask.nxv16i32(
+declare { <vscale x 16 x i32>, i64 } @llvm.riscv.vleff.mask.nxv16i32(
   <vscale x 16 x i32>,
   <vscale x 16 x i32>*,
   <vscale x 16 x i1>,
   i64);
 
-define <vscale x 16 x i32> @intrinsic_vleff_mask_v_nxv16i32_nxv16i32(<vscale x 16 x i32> %0, <vscale x 16 x i32>* %1, <vscale x 16 x i1> %2, i64 %3) nounwind {
+define <vscale x 16 x i32> @intrinsic_vleff_mask_v_nxv16i32_nxv16i32(<vscale x 16 x i32> %0, <vscale x 16 x i32>* %1, <vscale x 16 x i1> %2, i64 %3, i64* %4) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_mask_v_nxv16i32_nxv16i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m8,tu,mu
 ; CHECK-NEXT:    vle32ff.v v8, (a0), v0.t
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sd a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 16 x i32> @llvm.riscv.vleff.mask.nxv16i32(
+  %a = call { <vscale x 16 x i32>, i64 } @llvm.riscv.vleff.mask.nxv16i32(
     <vscale x 16 x i32> %0,
     <vscale x 16 x i32>* %1,
     <vscale x 16 x i1> %2,
     i64 %3)
+  %b = extractvalue { <vscale x 16 x i32>, i64 } %a, 0
+  %c = extractvalue { <vscale x 16 x i32>, i64 } %a, 1
+  store i64 %c, i64* %4
 
-  ret <vscale x 16 x i32> %a
+  ret <vscale x 16 x i32> %b
 }
 
-declare <vscale x 1 x float> @llvm.riscv.vleff.nxv1f32(
+declare { <vscale x 1 x float>, i64 } @llvm.riscv.vleff.nxv1f32(
   <vscale x 1 x float>*,
   i64);
 
-define <vscale x 1 x float> @intrinsic_vleff_v_nxv1f32_nxv1f32(<vscale x 1 x float>* %0, i64 %1) nounwind {
+define <vscale x 1 x float> @intrinsic_vleff_v_nxv1f32_nxv1f32(<vscale x 1 x float>* %0, i64 %1, i64* %2) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_v_nxv1f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,mf2,ta,mu
 ; CHECK-NEXT:    vle32ff.v v8, (a0)
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sd a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vleff.nxv1f32(
+  %a = call { <vscale x 1 x float>, i64 } @llvm.riscv.vleff.nxv1f32(
     <vscale x 1 x float>* %0,
     i64 %1)
-
-  ret <vscale x 1 x float> %a
+  %b = extractvalue { <vscale x 1 x float>, i64 } %a, 0
+  %c = extractvalue { <vscale x 1 x float>, i64 } %a, 1
+  store i64 %c, i64* %2
+  ret <vscale x 1 x float> %b
 }
 
-declare <vscale x 1 x float> @llvm.riscv.vleff.mask.nxv1f32(
+declare { <vscale x 1 x float>, i64 } @llvm.riscv.vleff.mask.nxv1f32(
   <vscale x 1 x float>,
   <vscale x 1 x float>*,
   <vscale x 1 x i1>,
   i64);
 
-define <vscale x 1 x float> @intrinsic_vleff_mask_v_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float>* %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
+define <vscale x 1 x float> @intrinsic_vleff_mask_v_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float>* %1, <vscale x 1 x i1> %2, i64 %3, i64* %4) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_mask_v_nxv1f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,mf2,tu,mu
 ; CHECK-NEXT:    vle32ff.v v8, (a0), v0.t
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sd a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vleff.mask.nxv1f32(
+  %a = call { <vscale x 1 x float>, i64 } @llvm.riscv.vleff.mask.nxv1f32(
     <vscale x 1 x float> %0,
     <vscale x 1 x float>* %1,
     <vscale x 1 x i1> %2,
     i64 %3)
+  %b = extractvalue { <vscale x 1 x float>, i64 } %a, 0
+  %c = extractvalue { <vscale x 1 x float>, i64 } %a, 1
+  store i64 %c, i64* %4
 
-  ret <vscale x 1 x float> %a
+  ret <vscale x 1 x float> %b
 }
 
-declare <vscale x 2 x float> @llvm.riscv.vleff.nxv2f32(
+declare { <vscale x 2 x float>, i64 } @llvm.riscv.vleff.nxv2f32(
   <vscale x 2 x float>*,
   i64);
 
-define <vscale x 2 x float> @intrinsic_vleff_v_nxv2f32_nxv2f32(<vscale x 2 x float>* %0, i64 %1) nounwind {
+define <vscale x 2 x float> @intrinsic_vleff_v_nxv2f32_nxv2f32(<vscale x 2 x float>* %0, i64 %1, i64* %2) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_v_nxv2f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m1,ta,mu
 ; CHECK-NEXT:    vle32ff.v v8, (a0)
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sd a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vleff.nxv2f32(
+  %a = call { <vscale x 2 x float>, i64 } @llvm.riscv.vleff.nxv2f32(
     <vscale x 2 x float>* %0,
     i64 %1)
-
-  ret <vscale x 2 x float> %a
+  %b = extractvalue { <vscale x 2 x float>, i64 } %a, 0
+  %c = extractvalue { <vscale x 2 x float>, i64 } %a, 1
+  store i64 %c, i64* %2
+  ret <vscale x 2 x float> %b
 }
 
-declare <vscale x 2 x float> @llvm.riscv.vleff.mask.nxv2f32(
+declare { <vscale x 2 x float>, i64 } @llvm.riscv.vleff.mask.nxv2f32(
   <vscale x 2 x float>,
   <vscale x 2 x float>*,
   <vscale x 2 x i1>,
   i64);
 
-define <vscale x 2 x float> @intrinsic_vleff_mask_v_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float>* %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
+define <vscale x 2 x float> @intrinsic_vleff_mask_v_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float>* %1, <vscale x 2 x i1> %2, i64 %3, i64* %4) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_mask_v_nxv2f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m1,tu,mu
 ; CHECK-NEXT:    vle32ff.v v8, (a0), v0.t
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sd a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vleff.mask.nxv2f32(
+  %a = call { <vscale x 2 x float>, i64 } @llvm.riscv.vleff.mask.nxv2f32(
     <vscale x 2 x float> %0,
     <vscale x 2 x float>* %1,
     <vscale x 2 x i1> %2,
     i64 %3)
+  %b = extractvalue { <vscale x 2 x float>, i64 } %a, 0
+  %c = extractvalue { <vscale x 2 x float>, i64 } %a, 1
+  store i64 %c, i64* %4
 
-  ret <vscale x 2 x float> %a
+  ret <vscale x 2 x float> %b
 }
 
-declare <vscale x 4 x float> @llvm.riscv.vleff.nxv4f32(
+declare { <vscale x 4 x float>, i64 } @llvm.riscv.vleff.nxv4f32(
   <vscale x 4 x float>*,
   i64);
 
-define <vscale x 4 x float> @intrinsic_vleff_v_nxv4f32_nxv4f32(<vscale x 4 x float>* %0, i64 %1) nounwind {
+define <vscale x 4 x float> @intrinsic_vleff_v_nxv4f32_nxv4f32(<vscale x 4 x float>* %0, i64 %1, i64* %2) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_v_nxv4f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m2,ta,mu
 ; CHECK-NEXT:    vle32ff.v v8, (a0)
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sd a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vleff.nxv4f32(
+  %a = call { <vscale x 4 x float>, i64 } @llvm.riscv.vleff.nxv4f32(
     <vscale x 4 x float>* %0,
     i64 %1)
-
-  ret <vscale x 4 x float> %a
+  %b = extractvalue { <vscale x 4 x float>, i64 } %a, 0
+  %c = extractvalue { <vscale x 4 x float>, i64 } %a, 1
+  store i64 %c, i64* %2
+  ret <vscale x 4 x float> %b
 }
 
-declare <vscale x 4 x float> @llvm.riscv.vleff.mask.nxv4f32(
+declare { <vscale x 4 x float>, i64 } @llvm.riscv.vleff.mask.nxv4f32(
   <vscale x 4 x float>,
   <vscale x 4 x float>*,
   <vscale x 4 x i1>,
   i64);
 
-define <vscale x 4 x float> @intrinsic_vleff_mask_v_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float>* %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
+define <vscale x 4 x float> @intrinsic_vleff_mask_v_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float>* %1, <vscale x 4 x i1> %2, i64 %3, i64* %4) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_mask_v_nxv4f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m2,tu,mu
 ; CHECK-NEXT:    vle32ff.v v8, (a0), v0.t
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sd a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vleff.mask.nxv4f32(
+  %a = call { <vscale x 4 x float>, i64 } @llvm.riscv.vleff.mask.nxv4f32(
     <vscale x 4 x float> %0,
     <vscale x 4 x float>* %1,
     <vscale x 4 x i1> %2,
     i64 %3)
+  %b = extractvalue { <vscale x 4 x float>, i64 } %a, 0
+  %c = extractvalue { <vscale x 4 x float>, i64 } %a, 1
+  store i64 %c, i64* %4
 
-  ret <vscale x 4 x float> %a
+  ret <vscale x 4 x float> %b
 }
 
-declare <vscale x 8 x float> @llvm.riscv.vleff.nxv8f32(
+declare { <vscale x 8 x float>, i64 } @llvm.riscv.vleff.nxv8f32(
   <vscale x 8 x float>*,
   i64);
 
-define <vscale x 8 x float> @intrinsic_vleff_v_nxv8f32_nxv8f32(<vscale x 8 x float>* %0, i64 %1) nounwind {
+define <vscale x 8 x float> @intrinsic_vleff_v_nxv8f32_nxv8f32(<vscale x 8 x float>* %0, i64 %1, i64* %2) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_v_nxv8f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m4,ta,mu
 ; CHECK-NEXT:    vle32ff.v v8, (a0)
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sd a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vleff.nxv8f32(
+  %a = call { <vscale x 8 x float>, i64 } @llvm.riscv.vleff.nxv8f32(
     <vscale x 8 x float>* %0,
     i64 %1)
-
-  ret <vscale x 8 x float> %a
+  %b = extractvalue { <vscale x 8 x float>, i64 } %a, 0
+  %c = extractvalue { <vscale x 8 x float>, i64 } %a, 1
+  store i64 %c, i64* %2
+  ret <vscale x 8 x float> %b
 }
 
-declare <vscale x 8 x float> @llvm.riscv.vleff.mask.nxv8f32(
+declare { <vscale x 8 x float>, i64 } @llvm.riscv.vleff.mask.nxv8f32(
   <vscale x 8 x float>,
   <vscale x 8 x float>*,
   <vscale x 8 x i1>,
   i64);
 
-define <vscale x 8 x float> @intrinsic_vleff_mask_v_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float>* %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
+define <vscale x 8 x float> @intrinsic_vleff_mask_v_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float>* %1, <vscale x 8 x i1> %2, i64 %3, i64* %4) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_mask_v_nxv8f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m4,tu,mu
 ; CHECK-NEXT:    vle32ff.v v8, (a0), v0.t
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sd a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vleff.mask.nxv8f32(
+  %a = call { <vscale x 8 x float>, i64 } @llvm.riscv.vleff.mask.nxv8f32(
     <vscale x 8 x float> %0,
     <vscale x 8 x float>* %1,
     <vscale x 8 x i1> %2,
     i64 %3)
+  %b = extractvalue { <vscale x 8 x float>, i64 } %a, 0
+  %c = extractvalue { <vscale x 8 x float>, i64 } %a, 1
+  store i64 %c, i64* %4
 
-  ret <vscale x 8 x float> %a
+  ret <vscale x 8 x float> %b
 }
 
-declare <vscale x 16 x float> @llvm.riscv.vleff.nxv16f32(
+declare { <vscale x 16 x float>, i64 } @llvm.riscv.vleff.nxv16f32(
   <vscale x 16 x float>*,
   i64);
 
-define <vscale x 16 x float> @intrinsic_vleff_v_nxv16f32_nxv16f32(<vscale x 16 x float>* %0, i64 %1) nounwind {
+define <vscale x 16 x float> @intrinsic_vleff_v_nxv16f32_nxv16f32(<vscale x 16 x float>* %0, i64 %1, i64* %2) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_v_nxv16f32_nxv16f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m8,ta,mu
 ; CHECK-NEXT:    vle32ff.v v8, (a0)
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sd a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vleff.nxv16f32(
+  %a = call { <vscale x 16 x float>, i64 } @llvm.riscv.vleff.nxv16f32(
     <vscale x 16 x float>* %0,
     i64 %1)
-
-  ret <vscale x 16 x float> %a
+  %b = extractvalue { <vscale x 16 x float>, i64 } %a, 0
+  %c = extractvalue { <vscale x 16 x float>, i64 } %a, 1
+  store i64 %c, i64* %2
+  ret <vscale x 16 x float> %b
 }
 
-declare <vscale x 16 x float> @llvm.riscv.vleff.mask.nxv16f32(
+declare { <vscale x 16 x float>, i64 } @llvm.riscv.vleff.mask.nxv16f32(
   <vscale x 16 x float>,
   <vscale x 16 x float>*,
   <vscale x 16 x i1>,
   i64);
 
-define <vscale x 16 x float> @intrinsic_vleff_mask_v_nxv16f32_nxv16f32(<vscale x 16 x float> %0, <vscale x 16 x float>* %1, <vscale x 16 x i1> %2, i64 %3) nounwind {
+define <vscale x 16 x float> @intrinsic_vleff_mask_v_nxv16f32_nxv16f32(<vscale x 16 x float> %0, <vscale x 16 x float>* %1, <vscale x 16 x i1> %2, i64 %3, i64* %4) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_mask_v_nxv16f32_nxv16f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m8,tu,mu
 ; CHECK-NEXT:    vle32ff.v v8, (a0), v0.t
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sd a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vleff.mask.nxv16f32(
+  %a = call { <vscale x 16 x float>, i64 } @llvm.riscv.vleff.mask.nxv16f32(
     <vscale x 16 x float> %0,
     <vscale x 16 x float>* %1,
     <vscale x 16 x i1> %2,
     i64 %3)
+  %b = extractvalue { <vscale x 16 x float>, i64 } %a, 0
+  %c = extractvalue { <vscale x 16 x float>, i64 } %a, 1
+  store i64 %c, i64* %4
 
-  ret <vscale x 16 x float> %a
+  ret <vscale x 16 x float> %b
 }
 
-declare <vscale x 1 x i16> @llvm.riscv.vleff.nxv1i16(
+declare { <vscale x 1 x i16>, i64 } @llvm.riscv.vleff.nxv1i16(
   <vscale x 1 x i16>*,
   i64);
 
-define <vscale x 1 x i16> @intrinsic_vleff_v_nxv1i16_nxv1i16(<vscale x 1 x i16>* %0, i64 %1) nounwind {
+define <vscale x 1 x i16> @intrinsic_vleff_v_nxv1i16_nxv1i16(<vscale x 1 x i16>* %0, i64 %1, i64* %2) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_v_nxv1i16_nxv1i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,mf4,ta,mu
 ; CHECK-NEXT:    vle16ff.v v8, (a0)
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sd a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 1 x i16> @llvm.riscv.vleff.nxv1i16(
+  %a = call { <vscale x 1 x i16>, i64 } @llvm.riscv.vleff.nxv1i16(
     <vscale x 1 x i16>* %0,
     i64 %1)
-
-  ret <vscale x 1 x i16> %a
+  %b = extractvalue { <vscale x 1 x i16>, i64 } %a, 0
+  %c = extractvalue { <vscale x 1 x i16>, i64 } %a, 1
+  store i64 %c, i64* %2
+  ret <vscale x 1 x i16> %b
 }
 
-declare <vscale x 1 x i16> @llvm.riscv.vleff.mask.nxv1i16(
+declare { <vscale x 1 x i16>, i64 } @llvm.riscv.vleff.mask.nxv1i16(
   <vscale x 1 x i16>,
   <vscale x 1 x i16>*,
   <vscale x 1 x i1>,
   i64);
 
-define <vscale x 1 x i16> @intrinsic_vleff_mask_v_nxv1i16_nxv1i16(<vscale x 1 x i16> %0, <vscale x 1 x i16>* %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
+define <vscale x 1 x i16> @intrinsic_vleff_mask_v_nxv1i16_nxv1i16(<vscale x 1 x i16> %0, <vscale x 1 x i16>* %1, <vscale x 1 x i1> %2, i64 %3, i64* %4) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_mask_v_nxv1i16_nxv1i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,mf4,tu,mu
 ; CHECK-NEXT:    vle16ff.v v8, (a0), v0.t
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sd a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 1 x i16> @llvm.riscv.vleff.mask.nxv1i16(
+  %a = call { <vscale x 1 x i16>, i64 } @llvm.riscv.vleff.mask.nxv1i16(
     <vscale x 1 x i16> %0,
     <vscale x 1 x i16>* %1,
     <vscale x 1 x i1> %2,
     i64 %3)
+  %b = extractvalue { <vscale x 1 x i16>, i64 } %a, 0
+  %c = extractvalue { <vscale x 1 x i16>, i64 } %a, 1
+  store i64 %c, i64* %4
 
-  ret <vscale x 1 x i16> %a
+  ret <vscale x 1 x i16> %b
 }
 
-declare <vscale x 2 x i16> @llvm.riscv.vleff.nxv2i16(
+declare { <vscale x 2 x i16>, i64 } @llvm.riscv.vleff.nxv2i16(
   <vscale x 2 x i16>*,
   i64);
 
-define <vscale x 2 x i16> @intrinsic_vleff_v_nxv2i16_nxv2i16(<vscale x 2 x i16>* %0, i64 %1) nounwind {
+define <vscale x 2 x i16> @intrinsic_vleff_v_nxv2i16_nxv2i16(<vscale x 2 x i16>* %0, i64 %1, i64* %2) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_v_nxv2i16_nxv2i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,mf2,ta,mu
 ; CHECK-NEXT:    vle16ff.v v8, (a0)
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sd a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 2 x i16> @llvm.riscv.vleff.nxv2i16(
+  %a = call { <vscale x 2 x i16>, i64 } @llvm.riscv.vleff.nxv2i16(
     <vscale x 2 x i16>* %0,
     i64 %1)
-
-  ret <vscale x 2 x i16> %a
+  %b = extractvalue { <vscale x 2 x i16>, i64 } %a, 0
+  %c = extractvalue { <vscale x 2 x i16>, i64 } %a, 1
+  store i64 %c, i64* %2
+  ret <vscale x 2 x i16> %b
 }
 
-declare <vscale x 2 x i16> @llvm.riscv.vleff.mask.nxv2i16(
+declare { <vscale x 2 x i16>, i64 } @llvm.riscv.vleff.mask.nxv2i16(
   <vscale x 2 x i16>,
   <vscale x 2 x i16>*,
   <vscale x 2 x i1>,
   i64);
 
-define <vscale x 2 x i16> @intrinsic_vleff_mask_v_nxv2i16_nxv2i16(<vscale x 2 x i16> %0, <vscale x 2 x i16>* %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
+define <vscale x 2 x i16> @intrinsic_vleff_mask_v_nxv2i16_nxv2i16(<vscale x 2 x i16> %0, <vscale x 2 x i16>* %1, <vscale x 2 x i1> %2, i64 %3, i64* %4) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_mask_v_nxv2i16_nxv2i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,mf2,tu,mu
 ; CHECK-NEXT:    vle16ff.v v8, (a0), v0.t
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sd a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 2 x i16> @llvm.riscv.vleff.mask.nxv2i16(
+  %a = call { <vscale x 2 x i16>, i64 } @llvm.riscv.vleff.mask.nxv2i16(
     <vscale x 2 x i16> %0,
     <vscale x 2 x i16>* %1,
     <vscale x 2 x i1> %2,
     i64 %3)
+  %b = extractvalue { <vscale x 2 x i16>, i64 } %a, 0
+  %c = extractvalue { <vscale x 2 x i16>, i64 } %a, 1
+  store i64 %c, i64* %4
 
-  ret <vscale x 2 x i16> %a
+  ret <vscale x 2 x i16> %b
 }
 
-declare <vscale x 4 x i16> @llvm.riscv.vleff.nxv4i16(
+declare { <vscale x 4 x i16>, i64 } @llvm.riscv.vleff.nxv4i16(
   <vscale x 4 x i16>*,
   i64);
 
-define <vscale x 4 x i16> @intrinsic_vleff_v_nxv4i16_nxv4i16(<vscale x 4 x i16>* %0, i64 %1) nounwind {
+define <vscale x 4 x i16> @intrinsic_vleff_v_nxv4i16_nxv4i16(<vscale x 4 x i16>* %0, i64 %1, i64* %2) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_v_nxv4i16_nxv4i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,m1,ta,mu
 ; CHECK-NEXT:    vle16ff.v v8, (a0)
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sd a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 4 x i16> @llvm.riscv.vleff.nxv4i16(
+  %a = call { <vscale x 4 x i16>, i64 } @llvm.riscv.vleff.nxv4i16(
     <vscale x 4 x i16>* %0,
     i64 %1)
-
-  ret <vscale x 4 x i16> %a
+  %b = extractvalue { <vscale x 4 x i16>, i64 } %a, 0
+  %c = extractvalue { <vscale x 4 x i16>, i64 } %a, 1
+  store i64 %c, i64* %2
+  ret <vscale x 4 x i16> %b
 }
 
-declare <vscale x 4 x i16> @llvm.riscv.vleff.mask.nxv4i16(
+declare { <vscale x 4 x i16>, i64 } @llvm.riscv.vleff.mask.nxv4i16(
   <vscale x 4 x i16>,
   <vscale x 4 x i16>*,
   <vscale x 4 x i1>,
   i64);
 
-define <vscale x 4 x i16> @intrinsic_vleff_mask_v_nxv4i16_nxv4i16(<vscale x 4 x i16> %0, <vscale x 4 x i16>* %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
+define <vscale x 4 x i16> @intrinsic_vleff_mask_v_nxv4i16_nxv4i16(<vscale x 4 x i16> %0, <vscale x 4 x i16>* %1, <vscale x 4 x i1> %2, i64 %3, i64* %4) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_mask_v_nxv4i16_nxv4i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,m1,tu,mu
 ; CHECK-NEXT:    vle16ff.v v8, (a0), v0.t
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sd a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 4 x i16> @llvm.riscv.vleff.mask.nxv4i16(
+  %a = call { <vscale x 4 x i16>, i64 } @llvm.riscv.vleff.mask.nxv4i16(
     <vscale x 4 x i16> %0,
     <vscale x 4 x i16>* %1,
     <vscale x 4 x i1> %2,
     i64 %3)
+  %b = extractvalue { <vscale x 4 x i16>, i64 } %a, 0
+  %c = extractvalue { <vscale x 4 x i16>, i64 } %a, 1
+  store i64 %c, i64* %4
 
-  ret <vscale x 4 x i16> %a
+  ret <vscale x 4 x i16> %b
 }
 
-declare <vscale x 8 x i16> @llvm.riscv.vleff.nxv8i16(
+declare { <vscale x 8 x i16>, i64 } @llvm.riscv.vleff.nxv8i16(
   <vscale x 8 x i16>*,
   i64);
 
-define <vscale x 8 x i16> @intrinsic_vleff_v_nxv8i16_nxv8i16(<vscale x 8 x i16>* %0, i64 %1) nounwind {
+define <vscale x 8 x i16> @intrinsic_vleff_v_nxv8i16_nxv8i16(<vscale x 8 x i16>* %0, i64 %1, i64* %2) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_v_nxv8i16_nxv8i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,m2,ta,mu
 ; CHECK-NEXT:    vle16ff.v v8, (a0)
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sd a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 8 x i16> @llvm.riscv.vleff.nxv8i16(
+  %a = call { <vscale x 8 x i16>, i64 } @llvm.riscv.vleff.nxv8i16(
     <vscale x 8 x i16>* %0,
     i64 %1)
-
-  ret <vscale x 8 x i16> %a
+  %b = extractvalue { <vscale x 8 x i16>, i64 } %a, 0
+  %c = extractvalue { <vscale x 8 x i16>, i64 } %a, 1
+  store i64 %c, i64* %2
+  ret <vscale x 8 x i16> %b
 }
 
-declare <vscale x 8 x i16> @llvm.riscv.vleff.mask.nxv8i16(
+declare { <vscale x 8 x i16>, i64 } @llvm.riscv.vleff.mask.nxv8i16(
   <vscale x 8 x i16>,
   <vscale x 8 x i16>*,
   <vscale x 8 x i1>,
   i64);
 
-define <vscale x 8 x i16> @intrinsic_vleff_mask_v_nxv8i16_nxv8i16(<vscale x 8 x i16> %0, <vscale x 8 x i16>* %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
+define <vscale x 8 x i16> @intrinsic_vleff_mask_v_nxv8i16_nxv8i16(<vscale x 8 x i16> %0, <vscale x 8 x i16>* %1, <vscale x 8 x i1> %2, i64 %3, i64* %4) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_mask_v_nxv8i16_nxv8i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,m2,tu,mu
 ; CHECK-NEXT:    vle16ff.v v8, (a0), v0.t
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sd a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 8 x i16> @llvm.riscv.vleff.mask.nxv8i16(
+  %a = call { <vscale x 8 x i16>, i64 } @llvm.riscv.vleff.mask.nxv8i16(
     <vscale x 8 x i16> %0,
     <vscale x 8 x i16>* %1,
     <vscale x 8 x i1> %2,
     i64 %3)
+  %b = extractvalue { <vscale x 8 x i16>, i64 } %a, 0
+  %c = extractvalue { <vscale x 8 x i16>, i64 } %a, 1
+  store i64 %c, i64* %4
 
-  ret <vscale x 8 x i16> %a
+  ret <vscale x 8 x i16> %b
 }
 
-declare <vscale x 16 x i16> @llvm.riscv.vleff.nxv16i16(
+declare { <vscale x 16 x i16>, i64 } @llvm.riscv.vleff.nxv16i16(
   <vscale x 16 x i16>*,
   i64);
 
-define <vscale x 16 x i16> @intrinsic_vleff_v_nxv16i16_nxv16i16(<vscale x 16 x i16>* %0, i64 %1) nounwind {
+define <vscale x 16 x i16> @intrinsic_vleff_v_nxv16i16_nxv16i16(<vscale x 16 x i16>* %0, i64 %1, i64* %2) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_v_nxv16i16_nxv16i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,m4,ta,mu
 ; CHECK-NEXT:    vle16ff.v v8, (a0)
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sd a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 16 x i16> @llvm.riscv.vleff.nxv16i16(
+  %a = call { <vscale x 16 x i16>, i64 } @llvm.riscv.vleff.nxv16i16(
     <vscale x 16 x i16>* %0,
     i64 %1)
-
-  ret <vscale x 16 x i16> %a
+  %b = extractvalue { <vscale x 16 x i16>, i64 } %a, 0
+  %c = extractvalue { <vscale x 16 x i16>, i64 } %a, 1
+  store i64 %c, i64* %2
+  ret <vscale x 16 x i16> %b
 }
 
-declare <vscale x 16 x i16> @llvm.riscv.vleff.mask.nxv16i16(
+declare { <vscale x 16 x i16>, i64 } @llvm.riscv.vleff.mask.nxv16i16(
   <vscale x 16 x i16>,
   <vscale x 16 x i16>*,
   <vscale x 16 x i1>,
   i64);
 
-define <vscale x 16 x i16> @intrinsic_vleff_mask_v_nxv16i16_nxv16i16(<vscale x 16 x i16> %0, <vscale x 16 x i16>* %1, <vscale x 16 x i1> %2, i64 %3) nounwind {
+define <vscale x 16 x i16> @intrinsic_vleff_mask_v_nxv16i16_nxv16i16(<vscale x 16 x i16> %0, <vscale x 16 x i16>* %1, <vscale x 16 x i1> %2, i64 %3, i64* %4) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_mask_v_nxv16i16_nxv16i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,m4,tu,mu
 ; CHECK-NEXT:    vle16ff.v v8, (a0), v0.t
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sd a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 16 x i16> @llvm.riscv.vleff.mask.nxv16i16(
+  %a = call { <vscale x 16 x i16>, i64 } @llvm.riscv.vleff.mask.nxv16i16(
     <vscale x 16 x i16> %0,
     <vscale x 16 x i16>* %1,
     <vscale x 16 x i1> %2,
     i64 %3)
+  %b = extractvalue { <vscale x 16 x i16>, i64 } %a, 0
+  %c = extractvalue { <vscale x 16 x i16>, i64 } %a, 1
+  store i64 %c, i64* %4
 
-  ret <vscale x 16 x i16> %a
+  ret <vscale x 16 x i16> %b
 }
 
-declare <vscale x 32 x i16> @llvm.riscv.vleff.nxv32i16(
+declare { <vscale x 32 x i16>, i64 } @llvm.riscv.vleff.nxv32i16(
   <vscale x 32 x i16>*,
   i64);
 
-define <vscale x 32 x i16> @intrinsic_vleff_v_nxv32i16_nxv32i16(<vscale x 32 x i16>* %0, i64 %1) nounwind {
+define <vscale x 32 x i16> @intrinsic_vleff_v_nxv32i16_nxv32i16(<vscale x 32 x i16>* %0, i64 %1, i64* %2) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_v_nxv32i16_nxv32i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,m8,ta,mu
 ; CHECK-NEXT:    vle16ff.v v8, (a0)
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sd a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 32 x i16> @llvm.riscv.vleff.nxv32i16(
+  %a = call { <vscale x 32 x i16>, i64 } @llvm.riscv.vleff.nxv32i16(
     <vscale x 32 x i16>* %0,
     i64 %1)
-
-  ret <vscale x 32 x i16> %a
+  %b = extractvalue { <vscale x 32 x i16>, i64 } %a, 0
+  %c = extractvalue { <vscale x 32 x i16>, i64 } %a, 1
+  store i64 %c, i64* %2
+  ret <vscale x 32 x i16> %b
 }
 
-declare <vscale x 32 x i16> @llvm.riscv.vleff.mask.nxv32i16(
+declare { <vscale x 32 x i16>, i64 } @llvm.riscv.vleff.mask.nxv32i16(
   <vscale x 32 x i16>,
   <vscale x 32 x i16>*,
   <vscale x 32 x i1>,
   i64);
 
-define <vscale x 32 x i16> @intrinsic_vleff_mask_v_nxv32i16_nxv32i16(<vscale x 32 x i16> %0, <vscale x 32 x i16>* %1, <vscale x 32 x i1> %2, i64 %3) nounwind {
+define <vscale x 32 x i16> @intrinsic_vleff_mask_v_nxv32i16_nxv32i16(<vscale x 32 x i16> %0, <vscale x 32 x i16>* %1, <vscale x 32 x i1> %2, i64 %3, i64* %4) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_mask_v_nxv32i16_nxv32i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,m8,tu,mu
 ; CHECK-NEXT:    vle16ff.v v8, (a0), v0.t
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sd a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 32 x i16> @llvm.riscv.vleff.mask.nxv32i16(
+  %a = call { <vscale x 32 x i16>, i64 } @llvm.riscv.vleff.mask.nxv32i16(
     <vscale x 32 x i16> %0,
     <vscale x 32 x i16>* %1,
     <vscale x 32 x i1> %2,
     i64 %3)
+  %b = extractvalue { <vscale x 32 x i16>, i64 } %a, 0
+  %c = extractvalue { <vscale x 32 x i16>, i64 } %a, 1
+  store i64 %c, i64* %4
 
-  ret <vscale x 32 x i16> %a
+  ret <vscale x 32 x i16> %b
 }
 
-declare <vscale x 1 x half> @llvm.riscv.vleff.nxv1f16(
+declare { <vscale x 1 x half>, i64 } @llvm.riscv.vleff.nxv1f16(
   <vscale x 1 x half>*,
   i64);
 
-define <vscale x 1 x half> @intrinsic_vleff_v_nxv1f16_nxv1f16(<vscale x 1 x half>* %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vleff_v_nxv1f16_nxv1f16:
+define <vscale x 1 x half> @intrinsic_vleff_v_nxv1half_nxv1f16(<vscale x 1 x half>* %0, i64 %1, i64* %2) nounwind {
+; CHECK-LABEL: intrinsic_vleff_v_nxv1half_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,mf4,ta,mu
 ; CHECK-NEXT:    vle16ff.v v8, (a0)
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sd a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vleff.nxv1f16(
+  %a = call { <vscale x 1 x half>, i64 } @llvm.riscv.vleff.nxv1f16(
     <vscale x 1 x half>* %0,
     i64 %1)
-
-  ret <vscale x 1 x half> %a
+  %b = extractvalue { <vscale x 1 x half>, i64 } %a, 0
+  %c = extractvalue { <vscale x 1 x half>, i64 } %a, 1
+  store i64 %c, i64* %2
+  ret <vscale x 1 x half> %b
 }
 
-declare <vscale x 1 x half> @llvm.riscv.vleff.mask.nxv1f16(
+declare { <vscale x 1 x half>, i64 } @llvm.riscv.vleff.mask.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>*,
   <vscale x 1 x i1>,
   i64);
 
-define <vscale x 1 x half> @intrinsic_vleff_mask_v_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half>* %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vleff_mask_v_nxv1f16_nxv1f16:
+define <vscale x 1 x half> @intrinsic_vleff_mask_v_nxv1half_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half>* %1, <vscale x 1 x i1> %2, i64 %3, i64* %4) nounwind {
+; CHECK-LABEL: intrinsic_vleff_mask_v_nxv1half_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,mf4,tu,mu
 ; CHECK-NEXT:    vle16ff.v v8, (a0), v0.t
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sd a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vleff.mask.nxv1f16(
+  %a = call { <vscale x 1 x half>, i64 } @llvm.riscv.vleff.mask.nxv1f16(
     <vscale x 1 x half> %0,
     <vscale x 1 x half>* %1,
     <vscale x 1 x i1> %2,
     i64 %3)
+  %b = extractvalue { <vscale x 1 x half>, i64 } %a, 0
+  %c = extractvalue { <vscale x 1 x half>, i64 } %a, 1
+  store i64 %c, i64* %4
 
-  ret <vscale x 1 x half> %a
+  ret <vscale x 1 x half> %b
 }
 
-declare <vscale x 2 x half> @llvm.riscv.vleff.nxv2f16(
+declare { <vscale x 2 x half>, i64 } @llvm.riscv.vleff.nxv2f16(
   <vscale x 2 x half>*,
   i64);
 
-define <vscale x 2 x half> @intrinsic_vleff_v_nxv2f16_nxv2f16(<vscale x 2 x half>* %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vleff_v_nxv2f16_nxv2f16:
+define <vscale x 2 x half> @intrinsic_vleff_v_nxv2half_nxv2f16(<vscale x 2 x half>* %0, i64 %1, i64* %2) nounwind {
+; CHECK-LABEL: intrinsic_vleff_v_nxv2half_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,mf2,ta,mu
 ; CHECK-NEXT:    vle16ff.v v8, (a0)
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sd a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vleff.nxv2f16(
+  %a = call { <vscale x 2 x half>, i64 } @llvm.riscv.vleff.nxv2f16(
     <vscale x 2 x half>* %0,
     i64 %1)
-
-  ret <vscale x 2 x half> %a
+  %b = extractvalue { <vscale x 2 x half>, i64 } %a, 0
+  %c = extractvalue { <vscale x 2 x half>, i64 } %a, 1
+  store i64 %c, i64* %2
+  ret <vscale x 2 x half> %b
 }
 
-declare <vscale x 2 x half> @llvm.riscv.vleff.mask.nxv2f16(
+declare { <vscale x 2 x half>, i64 } @llvm.riscv.vleff.mask.nxv2f16(
   <vscale x 2 x half>,
   <vscale x 2 x half>*,
   <vscale x 2 x i1>,
   i64);
 
-define <vscale x 2 x half> @intrinsic_vleff_mask_v_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half>* %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vleff_mask_v_nxv2f16_nxv2f16:
+define <vscale x 2 x half> @intrinsic_vleff_mask_v_nxv2half_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half>* %1, <vscale x 2 x i1> %2, i64 %3, i64* %4) nounwind {
+; CHECK-LABEL: intrinsic_vleff_mask_v_nxv2half_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,mf2,tu,mu
 ; CHECK-NEXT:    vle16ff.v v8, (a0), v0.t
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sd a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vleff.mask.nxv2f16(
+  %a = call { <vscale x 2 x half>, i64 } @llvm.riscv.vleff.mask.nxv2f16(
     <vscale x 2 x half> %0,
     <vscale x 2 x half>* %1,
     <vscale x 2 x i1> %2,
     i64 %3)
+  %b = extractvalue { <vscale x 2 x half>, i64 } %a, 0
+  %c = extractvalue { <vscale x 2 x half>, i64 } %a, 1
+  store i64 %c, i64* %4
 
-  ret <vscale x 2 x half> %a
+  ret <vscale x 2 x half> %b
 }
 
-declare <vscale x 4 x half> @llvm.riscv.vleff.nxv4f16(
+declare { <vscale x 4 x half>, i64 } @llvm.riscv.vleff.nxv4f16(
   <vscale x 4 x half>*,
   i64);
 
-define <vscale x 4 x half> @intrinsic_vleff_v_nxv4f16_nxv4f16(<vscale x 4 x half>* %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vleff_v_nxv4f16_nxv4f16:
+define <vscale x 4 x half> @intrinsic_vleff_v_nxv4half_nxv4f16(<vscale x 4 x half>* %0, i64 %1, i64* %2) nounwind {
+; CHECK-LABEL: intrinsic_vleff_v_nxv4half_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,m1,ta,mu
 ; CHECK-NEXT:    vle16ff.v v8, (a0)
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sd a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vleff.nxv4f16(
+  %a = call { <vscale x 4 x half>, i64 } @llvm.riscv.vleff.nxv4f16(
     <vscale x 4 x half>* %0,
     i64 %1)
-
-  ret <vscale x 4 x half> %a
+  %b = extractvalue { <vscale x 4 x half>, i64 } %a, 0
+  %c = extractvalue { <vscale x 4 x half>, i64 } %a, 1
+  store i64 %c, i64* %2
+  ret <vscale x 4 x half> %b
 }
 
-declare <vscale x 4 x half> @llvm.riscv.vleff.mask.nxv4f16(
+declare { <vscale x 4 x half>, i64 } @llvm.riscv.vleff.mask.nxv4f16(
   <vscale x 4 x half>,
   <vscale x 4 x half>*,
   <vscale x 4 x i1>,
   i64);
 
-define <vscale x 4 x half> @intrinsic_vleff_mask_v_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half>* %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vleff_mask_v_nxv4f16_nxv4f16:
+define <vscale x 4 x half> @intrinsic_vleff_mask_v_nxv4half_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half>* %1, <vscale x 4 x i1> %2, i64 %3, i64* %4) nounwind {
+; CHECK-LABEL: intrinsic_vleff_mask_v_nxv4half_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,m1,tu,mu
 ; CHECK-NEXT:    vle16ff.v v8, (a0), v0.t
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sd a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vleff.mask.nxv4f16(
+  %a = call { <vscale x 4 x half>, i64 } @llvm.riscv.vleff.mask.nxv4f16(
     <vscale x 4 x half> %0,
     <vscale x 4 x half>* %1,
     <vscale x 4 x i1> %2,
     i64 %3)
+  %b = extractvalue { <vscale x 4 x half>, i64 } %a, 0
+  %c = extractvalue { <vscale x 4 x half>, i64 } %a, 1
+  store i64 %c, i64* %4
 
-  ret <vscale x 4 x half> %a
+  ret <vscale x 4 x half> %b
 }
 
-declare <vscale x 8 x half> @llvm.riscv.vleff.nxv8f16(
+declare { <vscale x 8 x half>, i64 } @llvm.riscv.vleff.nxv8f16(
   <vscale x 8 x half>*,
   i64);
 
-define <vscale x 8 x half> @intrinsic_vleff_v_nxv8f16_nxv8f16(<vscale x 8 x half>* %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vleff_v_nxv8f16_nxv8f16:
+define <vscale x 8 x half> @intrinsic_vleff_v_nxv8half_nxv8f16(<vscale x 8 x half>* %0, i64 %1, i64* %2) nounwind {
+; CHECK-LABEL: intrinsic_vleff_v_nxv8half_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,m2,ta,mu
 ; CHECK-NEXT:    vle16ff.v v8, (a0)
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sd a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vleff.nxv8f16(
+  %a = call { <vscale x 8 x half>, i64 } @llvm.riscv.vleff.nxv8f16(
     <vscale x 8 x half>* %0,
     i64 %1)
-
-  ret <vscale x 8 x half> %a
+  %b = extractvalue { <vscale x 8 x half>, i64 } %a, 0
+  %c = extractvalue { <vscale x 8 x half>, i64 } %a, 1
+  store i64 %c, i64* %2
+  ret <vscale x 8 x half> %b
 }
 
-declare <vscale x 8 x half> @llvm.riscv.vleff.mask.nxv8f16(
+declare { <vscale x 8 x half>, i64 } @llvm.riscv.vleff.mask.nxv8f16(
   <vscale x 8 x half>,
   <vscale x 8 x half>*,
   <vscale x 8 x i1>,
   i64);
 
-define <vscale x 8 x half> @intrinsic_vleff_mask_v_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half>* %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vleff_mask_v_nxv8f16_nxv8f16:
+define <vscale x 8 x half> @intrinsic_vleff_mask_v_nxv8half_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half>* %1, <vscale x 8 x i1> %2, i64 %3, i64* %4) nounwind {
+; CHECK-LABEL: intrinsic_vleff_mask_v_nxv8half_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,m2,tu,mu
 ; CHECK-NEXT:    vle16ff.v v8, (a0), v0.t
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sd a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vleff.mask.nxv8f16(
+  %a = call { <vscale x 8 x half>, i64 } @llvm.riscv.vleff.mask.nxv8f16(
     <vscale x 8 x half> %0,
     <vscale x 8 x half>* %1,
     <vscale x 8 x i1> %2,
     i64 %3)
+  %b = extractvalue { <vscale x 8 x half>, i64 } %a, 0
+  %c = extractvalue { <vscale x 8 x half>, i64 } %a, 1
+  store i64 %c, i64* %4
 
-  ret <vscale x 8 x half> %a
+  ret <vscale x 8 x half> %b
 }
 
-declare <vscale x 16 x half> @llvm.riscv.vleff.nxv16f16(
+declare { <vscale x 16 x half>, i64 } @llvm.riscv.vleff.nxv16f16(
   <vscale x 16 x half>*,
   i64);
 
-define <vscale x 16 x half> @intrinsic_vleff_v_nxv16f16_nxv16f16(<vscale x 16 x half>* %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vleff_v_nxv16f16_nxv16f16:
+define <vscale x 16 x half> @intrinsic_vleff_v_nxv16half_nxv16f16(<vscale x 16 x half>* %0, i64 %1, i64* %2) nounwind {
+; CHECK-LABEL: intrinsic_vleff_v_nxv16half_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,m4,ta,mu
 ; CHECK-NEXT:    vle16ff.v v8, (a0)
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sd a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vleff.nxv16f16(
+  %a = call { <vscale x 16 x half>, i64 } @llvm.riscv.vleff.nxv16f16(
     <vscale x 16 x half>* %0,
     i64 %1)
-
-  ret <vscale x 16 x half> %a
+  %b = extractvalue { <vscale x 16 x half>, i64 } %a, 0
+  %c = extractvalue { <vscale x 16 x half>, i64 } %a, 1
+  store i64 %c, i64* %2
+  ret <vscale x 16 x half> %b
 }
 
-declare <vscale x 16 x half> @llvm.riscv.vleff.mask.nxv16f16(
+declare { <vscale x 16 x half>, i64 } @llvm.riscv.vleff.mask.nxv16f16(
   <vscale x 16 x half>,
   <vscale x 16 x half>*,
   <vscale x 16 x i1>,
   i64);
 
-define <vscale x 16 x half> @intrinsic_vleff_mask_v_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half>* %1, <vscale x 16 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vleff_mask_v_nxv16f16_nxv16f16:
+define <vscale x 16 x half> @intrinsic_vleff_mask_v_nxv16half_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half>* %1, <vscale x 16 x i1> %2, i64 %3, i64* %4) nounwind {
+; CHECK-LABEL: intrinsic_vleff_mask_v_nxv16half_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,m4,tu,mu
 ; CHECK-NEXT:    vle16ff.v v8, (a0), v0.t
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sd a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vleff.mask.nxv16f16(
+  %a = call { <vscale x 16 x half>, i64 } @llvm.riscv.vleff.mask.nxv16f16(
     <vscale x 16 x half> %0,
     <vscale x 16 x half>* %1,
     <vscale x 16 x i1> %2,
     i64 %3)
+  %b = extractvalue { <vscale x 16 x half>, i64 } %a, 0
+  %c = extractvalue { <vscale x 16 x half>, i64 } %a, 1
+  store i64 %c, i64* %4
 
-  ret <vscale x 16 x half> %a
+  ret <vscale x 16 x half> %b
 }
 
-declare <vscale x 32 x half> @llvm.riscv.vleff.nxv32f16(
+declare { <vscale x 32 x half>, i64 } @llvm.riscv.vleff.nxv32f16(
   <vscale x 32 x half>*,
   i64);
 
-define <vscale x 32 x half> @intrinsic_vleff_v_nxv32f16_nxv32f16(<vscale x 32 x half>* %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vleff_v_nxv32f16_nxv32f16:
+define <vscale x 32 x half> @intrinsic_vleff_v_nxv32half_nxv32f16(<vscale x 32 x half>* %0, i64 %1, i64* %2) nounwind {
+; CHECK-LABEL: intrinsic_vleff_v_nxv32half_nxv32f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,m8,ta,mu
 ; CHECK-NEXT:    vle16ff.v v8, (a0)
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sd a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 32 x half> @llvm.riscv.vleff.nxv32f16(
+  %a = call { <vscale x 32 x half>, i64 } @llvm.riscv.vleff.nxv32f16(
     <vscale x 32 x half>* %0,
     i64 %1)
-
-  ret <vscale x 32 x half> %a
+  %b = extractvalue { <vscale x 32 x half>, i64 } %a, 0
+  %c = extractvalue { <vscale x 32 x half>, i64 } %a, 1
+  store i64 %c, i64* %2
+  ret <vscale x 32 x half> %b
 }
 
-declare <vscale x 32 x half> @llvm.riscv.vleff.mask.nxv32f16(
+declare { <vscale x 32 x half>, i64 } @llvm.riscv.vleff.mask.nxv32f16(
   <vscale x 32 x half>,
   <vscale x 32 x half>*,
   <vscale x 32 x i1>,
   i64);
 
-define <vscale x 32 x half> @intrinsic_vleff_mask_v_nxv32f16_nxv32f16(<vscale x 32 x half> %0, <vscale x 32 x half>* %1, <vscale x 32 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vleff_mask_v_nxv32f16_nxv32f16:
+define <vscale x 32 x half> @intrinsic_vleff_mask_v_nxv32half_nxv32f16(<vscale x 32 x half> %0, <vscale x 32 x half>* %1, <vscale x 32 x i1> %2, i64 %3, i64* %4) nounwind {
+; CHECK-LABEL: intrinsic_vleff_mask_v_nxv32half_nxv32f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,m8,tu,mu
 ; CHECK-NEXT:    vle16ff.v v8, (a0), v0.t
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sd a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 32 x half> @llvm.riscv.vleff.mask.nxv32f16(
+  %a = call { <vscale x 32 x half>, i64 } @llvm.riscv.vleff.mask.nxv32f16(
     <vscale x 32 x half> %0,
     <vscale x 32 x half>* %1,
     <vscale x 32 x i1> %2,
     i64 %3)
+  %b = extractvalue { <vscale x 32 x half>, i64 } %a, 0
+  %c = extractvalue { <vscale x 32 x half>, i64 } %a, 1
+  store i64 %c, i64* %4
 
-  ret <vscale x 32 x half> %a
+  ret <vscale x 32 x half> %b
 }
 
-declare <vscale x 1 x i8> @llvm.riscv.vleff.nxv1i8(
+declare { <vscale x 1 x i8>, i64 } @llvm.riscv.vleff.nxv1i8(
   <vscale x 1 x i8>*,
   i64);
 
-define <vscale x 1 x i8> @intrinsic_vleff_v_nxv1i8_nxv1i8(<vscale x 1 x i8>* %0, i64 %1) nounwind {
+define <vscale x 1 x i8> @intrinsic_vleff_v_nxv1i8_nxv1i8(<vscale x 1 x i8>* %0, i64 %1, i64* %2) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_v_nxv1i8_nxv1i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e8,mf8,ta,mu
 ; CHECK-NEXT:    vle8ff.v v8, (a0)
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sd a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 1 x i8> @llvm.riscv.vleff.nxv1i8(
+  %a = call { <vscale x 1 x i8>, i64 } @llvm.riscv.vleff.nxv1i8(
     <vscale x 1 x i8>* %0,
     i64 %1)
-
-  ret <vscale x 1 x i8> %a
+  %b = extractvalue { <vscale x 1 x i8>, i64 } %a, 0
+  %c = extractvalue { <vscale x 1 x i8>, i64 } %a, 1
+  store i64 %c, i64* %2
+  ret <vscale x 1 x i8> %b
 }
 
-declare <vscale x 1 x i8> @llvm.riscv.vleff.mask.nxv1i8(
+declare { <vscale x 1 x i8>, i64 } @llvm.riscv.vleff.mask.nxv1i8(
   <vscale x 1 x i8>,
   <vscale x 1 x i8>*,
   <vscale x 1 x i1>,
   i64);
 
-define <vscale x 1 x i8> @intrinsic_vleff_mask_v_nxv1i8_nxv1i8(<vscale x 1 x i8> %0, <vscale x 1 x i8>* %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
+define <vscale x 1 x i8> @intrinsic_vleff_mask_v_nxv1i8_nxv1i8(<vscale x 1 x i8> %0, <vscale x 1 x i8>* %1, <vscale x 1 x i1> %2, i64 %3, i64* %4) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_mask_v_nxv1i8_nxv1i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e8,mf8,tu,mu
 ; CHECK-NEXT:    vle8ff.v v8, (a0), v0.t
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sd a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 1 x i8> @llvm.riscv.vleff.mask.nxv1i8(
+  %a = call { <vscale x 1 x i8>, i64 } @llvm.riscv.vleff.mask.nxv1i8(
     <vscale x 1 x i8> %0,
     <vscale x 1 x i8>* %1,
     <vscale x 1 x i1> %2,
     i64 %3)
+  %b = extractvalue { <vscale x 1 x i8>, i64 } %a, 0
+  %c = extractvalue { <vscale x 1 x i8>, i64 } %a, 1
+  store i64 %c, i64* %4
 
-  ret <vscale x 1 x i8> %a
+  ret <vscale x 1 x i8> %b
 }
 
-declare <vscale x 2 x i8> @llvm.riscv.vleff.nxv2i8(
+declare { <vscale x 2 x i8>, i64 } @llvm.riscv.vleff.nxv2i8(
   <vscale x 2 x i8>*,
   i64);
 
-define <vscale x 2 x i8> @intrinsic_vleff_v_nxv2i8_nxv2i8(<vscale x 2 x i8>* %0, i64 %1) nounwind {
+define <vscale x 2 x i8> @intrinsic_vleff_v_nxv2i8_nxv2i8(<vscale x 2 x i8>* %0, i64 %1, i64* %2) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_v_nxv2i8_nxv2i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e8,mf4,ta,mu
 ; CHECK-NEXT:    vle8ff.v v8, (a0)
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sd a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 2 x i8> @llvm.riscv.vleff.nxv2i8(
+  %a = call { <vscale x 2 x i8>, i64 } @llvm.riscv.vleff.nxv2i8(
     <vscale x 2 x i8>* %0,
     i64 %1)
-
-  ret <vscale x 2 x i8> %a
+  %b = extractvalue { <vscale x 2 x i8>, i64 } %a, 0
+  %c = extractvalue { <vscale x 2 x i8>, i64 } %a, 1
+  store i64 %c, i64* %2
+  ret <vscale x 2 x i8> %b
 }
 
-declare <vscale x 2 x i8> @llvm.riscv.vleff.mask.nxv2i8(
+declare { <vscale x 2 x i8>, i64 } @llvm.riscv.vleff.mask.nxv2i8(
   <vscale x 2 x i8>,
   <vscale x 2 x i8>*,
   <vscale x 2 x i1>,
   i64);
 
-define <vscale x 2 x i8> @intrinsic_vleff_mask_v_nxv2i8_nxv2i8(<vscale x 2 x i8> %0, <vscale x 2 x i8>* %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
+define <vscale x 2 x i8> @intrinsic_vleff_mask_v_nxv2i8_nxv2i8(<vscale x 2 x i8> %0, <vscale x 2 x i8>* %1, <vscale x 2 x i1> %2, i64 %3, i64* %4) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_mask_v_nxv2i8_nxv2i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e8,mf4,tu,mu
 ; CHECK-NEXT:    vle8ff.v v8, (a0), v0.t
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sd a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 2 x i8> @llvm.riscv.vleff.mask.nxv2i8(
+  %a = call { <vscale x 2 x i8>, i64 } @llvm.riscv.vleff.mask.nxv2i8(
     <vscale x 2 x i8> %0,
     <vscale x 2 x i8>* %1,
     <vscale x 2 x i1> %2,
     i64 %3)
+  %b = extractvalue { <vscale x 2 x i8>, i64 } %a, 0
+  %c = extractvalue { <vscale x 2 x i8>, i64 } %a, 1
+  store i64 %c, i64* %4
 
-  ret <vscale x 2 x i8> %a
+  ret <vscale x 2 x i8> %b
 }
 
-declare <vscale x 4 x i8> @llvm.riscv.vleff.nxv4i8(
+declare { <vscale x 4 x i8>, i64 } @llvm.riscv.vleff.nxv4i8(
   <vscale x 4 x i8>*,
   i64);
 
-define <vscale x 4 x i8> @intrinsic_vleff_v_nxv4i8_nxv4i8(<vscale x 4 x i8>* %0, i64 %1) nounwind {
+define <vscale x 4 x i8> @intrinsic_vleff_v_nxv4i8_nxv4i8(<vscale x 4 x i8>* %0, i64 %1, i64* %2) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_v_nxv4i8_nxv4i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e8,mf2,ta,mu
 ; CHECK-NEXT:    vle8ff.v v8, (a0)
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sd a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 4 x i8> @llvm.riscv.vleff.nxv4i8(
+  %a = call { <vscale x 4 x i8>, i64 } @llvm.riscv.vleff.nxv4i8(
     <vscale x 4 x i8>* %0,
     i64 %1)
-
-  ret <vscale x 4 x i8> %a
+  %b = extractvalue { <vscale x 4 x i8>, i64 } %a, 0
+  %c = extractvalue { <vscale x 4 x i8>, i64 } %a, 1
+  store i64 %c, i64* %2
+  ret <vscale x 4 x i8> %b
 }
 
-declare <vscale x 4 x i8> @llvm.riscv.vleff.mask.nxv4i8(
+declare { <vscale x 4 x i8>, i64 } @llvm.riscv.vleff.mask.nxv4i8(
   <vscale x 4 x i8>,
   <vscale x 4 x i8>*,
   <vscale x 4 x i1>,
   i64);
 
-define <vscale x 4 x i8> @intrinsic_vleff_mask_v_nxv4i8_nxv4i8(<vscale x 4 x i8> %0, <vscale x 4 x i8>* %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
+define <vscale x 4 x i8> @intrinsic_vleff_mask_v_nxv4i8_nxv4i8(<vscale x 4 x i8> %0, <vscale x 4 x i8>* %1, <vscale x 4 x i1> %2, i64 %3, i64* %4) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_mask_v_nxv4i8_nxv4i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e8,mf2,tu,mu
 ; CHECK-NEXT:    vle8ff.v v8, (a0), v0.t
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sd a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 4 x i8> @llvm.riscv.vleff.mask.nxv4i8(
+  %a = call { <vscale x 4 x i8>, i64 } @llvm.riscv.vleff.mask.nxv4i8(
     <vscale x 4 x i8> %0,
     <vscale x 4 x i8>* %1,
     <vscale x 4 x i1> %2,
     i64 %3)
+  %b = extractvalue { <vscale x 4 x i8>, i64 } %a, 0
+  %c = extractvalue { <vscale x 4 x i8>, i64 } %a, 1
+  store i64 %c, i64* %4
 
-  ret <vscale x 4 x i8> %a
+  ret <vscale x 4 x i8> %b
 }
 
-declare <vscale x 8 x i8> @llvm.riscv.vleff.nxv8i8(
+declare { <vscale x 8 x i8>, i64 } @llvm.riscv.vleff.nxv8i8(
   <vscale x 8 x i8>*,
   i64);
 
-define <vscale x 8 x i8> @intrinsic_vleff_v_nxv8i8_nxv8i8(<vscale x 8 x i8>* %0, i64 %1) nounwind {
+define <vscale x 8 x i8> @intrinsic_vleff_v_nxv8i8_nxv8i8(<vscale x 8 x i8>* %0, i64 %1, i64* %2) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_v_nxv8i8_nxv8i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e8,m1,ta,mu
 ; CHECK-NEXT:    vle8ff.v v8, (a0)
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sd a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 8 x i8> @llvm.riscv.vleff.nxv8i8(
+  %a = call { <vscale x 8 x i8>, i64 } @llvm.riscv.vleff.nxv8i8(
     <vscale x 8 x i8>* %0,
     i64 %1)
-
-  ret <vscale x 8 x i8> %a
+  %b = extractvalue { <vscale x 8 x i8>, i64 } %a, 0
+  %c = extractvalue { <vscale x 8 x i8>, i64 } %a, 1
+  store i64 %c, i64* %2
+  ret <vscale x 8 x i8> %b
 }
 
-declare <vscale x 8 x i8> @llvm.riscv.vleff.mask.nxv8i8(
+declare { <vscale x 8 x i8>, i64 } @llvm.riscv.vleff.mask.nxv8i8(
   <vscale x 8 x i8>,
   <vscale x 8 x i8>*,
   <vscale x 8 x i1>,
   i64);
 
-define <vscale x 8 x i8> @intrinsic_vleff_mask_v_nxv8i8_nxv8i8(<vscale x 8 x i8> %0, <vscale x 8 x i8>* %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
+define <vscale x 8 x i8> @intrinsic_vleff_mask_v_nxv8i8_nxv8i8(<vscale x 8 x i8> %0, <vscale x 8 x i8>* %1, <vscale x 8 x i1> %2, i64 %3, i64* %4) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_mask_v_nxv8i8_nxv8i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e8,m1,tu,mu
 ; CHECK-NEXT:    vle8ff.v v8, (a0), v0.t
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sd a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 8 x i8> @llvm.riscv.vleff.mask.nxv8i8(
+  %a = call { <vscale x 8 x i8>, i64 } @llvm.riscv.vleff.mask.nxv8i8(
     <vscale x 8 x i8> %0,
     <vscale x 8 x i8>* %1,
     <vscale x 8 x i1> %2,
     i64 %3)
+  %b = extractvalue { <vscale x 8 x i8>, i64 } %a, 0
+  %c = extractvalue { <vscale x 8 x i8>, i64 } %a, 1
+  store i64 %c, i64* %4
 
-  ret <vscale x 8 x i8> %a
+  ret <vscale x 8 x i8> %b
 }
 
-declare <vscale x 16 x i8> @llvm.riscv.vleff.nxv16i8(
+declare { <vscale x 16 x i8>, i64 } @llvm.riscv.vleff.nxv16i8(
   <vscale x 16 x i8>*,
   i64);
 
-define <vscale x 16 x i8> @intrinsic_vleff_v_nxv16i8_nxv16i8(<vscale x 16 x i8>* %0, i64 %1) nounwind {
+define <vscale x 16 x i8> @intrinsic_vleff_v_nxv16i8_nxv16i8(<vscale x 16 x i8>* %0, i64 %1, i64* %2) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_v_nxv16i8_nxv16i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e8,m2,ta,mu
 ; CHECK-NEXT:    vle8ff.v v8, (a0)
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sd a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 16 x i8> @llvm.riscv.vleff.nxv16i8(
+  %a = call { <vscale x 16 x i8>, i64 } @llvm.riscv.vleff.nxv16i8(
     <vscale x 16 x i8>* %0,
     i64 %1)
-
-  ret <vscale x 16 x i8> %a
+  %b = extractvalue { <vscale x 16 x i8>, i64 } %a, 0
+  %c = extractvalue { <vscale x 16 x i8>, i64 } %a, 1
+  store i64 %c, i64* %2
+  ret <vscale x 16 x i8> %b
 }
 
-declare <vscale x 16 x i8> @llvm.riscv.vleff.mask.nxv16i8(
+declare { <vscale x 16 x i8>, i64 } @llvm.riscv.vleff.mask.nxv16i8(
   <vscale x 16 x i8>,
   <vscale x 16 x i8>*,
   <vscale x 16 x i1>,
   i64);
 
-define <vscale x 16 x i8> @intrinsic_vleff_mask_v_nxv16i8_nxv16i8(<vscale x 16 x i8> %0, <vscale x 16 x i8>* %1, <vscale x 16 x i1> %2, i64 %3) nounwind {
+define <vscale x 16 x i8> @intrinsic_vleff_mask_v_nxv16i8_nxv16i8(<vscale x 16 x i8> %0, <vscale x 16 x i8>* %1, <vscale x 16 x i1> %2, i64 %3, i64* %4) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_mask_v_nxv16i8_nxv16i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e8,m2,tu,mu
 ; CHECK-NEXT:    vle8ff.v v8, (a0), v0.t
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sd a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 16 x i8> @llvm.riscv.vleff.mask.nxv16i8(
+  %a = call { <vscale x 16 x i8>, i64 } @llvm.riscv.vleff.mask.nxv16i8(
     <vscale x 16 x i8> %0,
     <vscale x 16 x i8>* %1,
     <vscale x 16 x i1> %2,
     i64 %3)
+  %b = extractvalue { <vscale x 16 x i8>, i64 } %a, 0
+  %c = extractvalue { <vscale x 16 x i8>, i64 } %a, 1
+  store i64 %c, i64* %4
 
-  ret <vscale x 16 x i8> %a
+  ret <vscale x 16 x i8> %b
 }
 
-declare <vscale x 32 x i8> @llvm.riscv.vleff.nxv32i8(
+declare { <vscale x 32 x i8>, i64 } @llvm.riscv.vleff.nxv32i8(
   <vscale x 32 x i8>*,
   i64);
 
-define <vscale x 32 x i8> @intrinsic_vleff_v_nxv32i8_nxv32i8(<vscale x 32 x i8>* %0, i64 %1) nounwind {
+define <vscale x 32 x i8> @intrinsic_vleff_v_nxv32i8_nxv32i8(<vscale x 32 x i8>* %0, i64 %1, i64* %2) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_v_nxv32i8_nxv32i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e8,m4,ta,mu
 ; CHECK-NEXT:    vle8ff.v v8, (a0)
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sd a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 32 x i8> @llvm.riscv.vleff.nxv32i8(
+  %a = call { <vscale x 32 x i8>, i64 } @llvm.riscv.vleff.nxv32i8(
     <vscale x 32 x i8>* %0,
     i64 %1)
-
-  ret <vscale x 32 x i8> %a
+  %b = extractvalue { <vscale x 32 x i8>, i64 } %a, 0
+  %c = extractvalue { <vscale x 32 x i8>, i64 } %a, 1
+  store i64 %c, i64* %2
+  ret <vscale x 32 x i8> %b
 }
 
-declare <vscale x 32 x i8> @llvm.riscv.vleff.mask.nxv32i8(
+declare { <vscale x 32 x i8>, i64 } @llvm.riscv.vleff.mask.nxv32i8(
   <vscale x 32 x i8>,
   <vscale x 32 x i8>*,
   <vscale x 32 x i1>,
   i64);
 
-define <vscale x 32 x i8> @intrinsic_vleff_mask_v_nxv32i8_nxv32i8(<vscale x 32 x i8> %0, <vscale x 32 x i8>* %1, <vscale x 32 x i1> %2, i64 %3) nounwind {
+define <vscale x 32 x i8> @intrinsic_vleff_mask_v_nxv32i8_nxv32i8(<vscale x 32 x i8> %0, <vscale x 32 x i8>* %1, <vscale x 32 x i1> %2, i64 %3, i64* %4) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_mask_v_nxv32i8_nxv32i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e8,m4,tu,mu
 ; CHECK-NEXT:    vle8ff.v v8, (a0), v0.t
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sd a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 32 x i8> @llvm.riscv.vleff.mask.nxv32i8(
+  %a = call { <vscale x 32 x i8>, i64 } @llvm.riscv.vleff.mask.nxv32i8(
     <vscale x 32 x i8> %0,
     <vscale x 32 x i8>* %1,
     <vscale x 32 x i1> %2,
     i64 %3)
+  %b = extractvalue { <vscale x 32 x i8>, i64 } %a, 0
+  %c = extractvalue { <vscale x 32 x i8>, i64 } %a, 1
+  store i64 %c, i64* %4
 
-  ret <vscale x 32 x i8> %a
+  ret <vscale x 32 x i8> %b
 }
 
-declare <vscale x 64 x i8> @llvm.riscv.vleff.nxv64i8(
+declare { <vscale x 64 x i8>, i64 } @llvm.riscv.vleff.nxv64i8(
   <vscale x 64 x i8>*,
   i64);
 
-define <vscale x 64 x i8> @intrinsic_vleff_v_nxv64i8_nxv64i8(<vscale x 64 x i8>* %0, i64 %1) nounwind {
+define <vscale x 64 x i8> @intrinsic_vleff_v_nxv64i8_nxv64i8(<vscale x 64 x i8>* %0, i64 %1, i64* %2) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_v_nxv64i8_nxv64i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e8,m8,ta,mu
 ; CHECK-NEXT:    vle8ff.v v8, (a0)
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sd a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 64 x i8> @llvm.riscv.vleff.nxv64i8(
+  %a = call { <vscale x 64 x i8>, i64 } @llvm.riscv.vleff.nxv64i8(
     <vscale x 64 x i8>* %0,
     i64 %1)
-
-  ret <vscale x 64 x i8> %a
+  %b = extractvalue { <vscale x 64 x i8>, i64 } %a, 0
+  %c = extractvalue { <vscale x 64 x i8>, i64 } %a, 1
+  store i64 %c, i64* %2
+  ret <vscale x 64 x i8> %b
 }
 
-declare <vscale x 64 x i8> @llvm.riscv.vleff.mask.nxv64i8(
+declare { <vscale x 64 x i8>, i64 } @llvm.riscv.vleff.mask.nxv64i8(
   <vscale x 64 x i8>,
   <vscale x 64 x i8>*,
   <vscale x 64 x i1>,
   i64);
 
-define <vscale x 64 x i8> @intrinsic_vleff_mask_v_nxv64i8_nxv64i8(<vscale x 64 x i8> %0, <vscale x 64 x i8>* %1, <vscale x 64 x i1> %2, i64 %3) nounwind {
+define <vscale x 64 x i8> @intrinsic_vleff_mask_v_nxv64i8_nxv64i8(<vscale x 64 x i8> %0, <vscale x 64 x i8>* %1, <vscale x 64 x i1> %2, i64 %3, i64* %4) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_mask_v_nxv64i8_nxv64i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e8,m8,tu,mu
 ; CHECK-NEXT:    vle8ff.v v8, (a0), v0.t
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sd a0, 0(a2)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 64 x i8> @llvm.riscv.vleff.mask.nxv64i8(
+  %a = call { <vscale x 64 x i8>, i64 } @llvm.riscv.vleff.mask.nxv64i8(
     <vscale x 64 x i8> %0,
     <vscale x 64 x i8>* %1,
     <vscale x 64 x i1> %2,
     i64 %3)
+  %b = extractvalue { <vscale x 64 x i8>, i64 } %a, 0
+  %c = extractvalue { <vscale x 64 x i8>, i64 } %a, 1
+  store i64 %c, i64* %4
+
+  ret <vscale x 64 x i8> %b
+}
+
+; Test with the VL output unused
+define <vscale x 1 x double> @intrinsic_vleff_dead_vl(<vscale x 1 x double>* %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vleff_dead_vl:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a1, a1, e64,m1,ta,mu
+; CHECK-NEXT:    vle64ff.v v8, (a0)
+; CHECK-NEXT:    ret
+entry:
+  %a = call { <vscale x 1 x double>, i64 } @llvm.riscv.vleff.nxv1f64(
+    <vscale x 1 x double>* %0,
+    i64 %1)
+  %b = extractvalue { <vscale x 1 x double>, i64 } %a, 0
+  ret <vscale x 1 x double> %b
+}
+
+define <vscale x 1 x double> @intrinsic_vleff_mask_dead_vl(<vscale x 1 x double> %0, <vscale x 1 x double>* %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vleff_mask_dead_vl:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a1, a1, e64,m1,tu,mu
+; CHECK-NEXT:    vle64ff.v v8, (a0), v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call { <vscale x 1 x double>, i64 } @llvm.riscv.vleff.mask.nxv1f64(
+    <vscale x 1 x double> %0,
+    <vscale x 1 x double>* %1,
+    <vscale x 1 x i1> %2,
+    i64 %3)
+  %b = extractvalue { <vscale x 1 x double>, i64 } %a, 0
+
+  ret <vscale x 1 x double> %b
+}
+
+; Test with the loaded value unused
+define void @intrinsic_vleff_dead_value(<vscale x 1 x double>* %0, i64 %1, i64* %2) nounwind {
+; CHECK-LABEL: intrinsic_vleff_dead_value:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a1, a1, e64,m1,ta,mu
+; CHECK-NEXT:    vle64ff.v v25, (a0)
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sd a0, 0(a2)
+; CHECK-NEXT:    ret
+entry:
+  %a = call { <vscale x 1 x double>, i64 } @llvm.riscv.vleff.nxv1f64(
+    <vscale x 1 x double>* %0,
+    i64 %1)
+  %b = extractvalue { <vscale x 1 x double>, i64 } %a, 1
+  store i64 %b, i64* %2
+  ret void
+}
+
+define void @intrinsic_vleff_mask_dead_value(<vscale x 1 x double> %0, <vscale x 1 x double>* %1, <vscale x 1 x i1> %2, i64 %3, i64* %4) nounwind {
+; CHECK-LABEL: intrinsic_vleff_mask_dead_value:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a1, a1, e64,m1,tu,mu
+; CHECK-NEXT:    vle64ff.v v8, (a0), v0.t
+; CHECK-NEXT:    csrr a0, vl
+; CHECK-NEXT:    sd a0, 0(a2)
+; CHECK-NEXT:    ret
+entry:
+  %a = call { <vscale x 1 x double>, i64 } @llvm.riscv.vleff.mask.nxv1f64(
+    <vscale x 1 x double> %0,
+    <vscale x 1 x double>* %1,
+    <vscale x 1 x i1> %2,
+    i64 %3)
+  %b = extractvalue { <vscale x 1 x double>, i64 } %a, 1
+  store i64 %b, i64* %4
+
+  ret void
+}
+
+; Test with both outputs dead. Make sure the vleff isn't deleted.
+define void @intrinsic_vleff_dead_all(<vscale x 1 x double>* %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vleff_dead_all:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a1, a1, e64,m1,ta,mu
+; CHECK-NEXT:    vle64ff.v v25, (a0)
+; CHECK-NEXT:    ret
+entry:
+  %a = call { <vscale x 1 x double>, i64 } @llvm.riscv.vleff.nxv1f64(
+    <vscale x 1 x double>* %0,
+    i64 %1)
+  ret void
+}
+
+define void @intrinsic_vleff_mask_dead_all(<vscale x 1 x double> %0, <vscale x 1 x double>* %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vleff_mask_dead_all:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a1, a1, e64,m1,tu,mu
+; CHECK-NEXT:    vle64ff.v v8, (a0), v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call { <vscale x 1 x double>, i64 } @llvm.riscv.vleff.mask.nxv1f64(
+    <vscale x 1 x double> %0,
+    <vscale x 1 x double>* %1,
+    <vscale x 1 x i1> %2,
+    i64 %3)
 
-  ret <vscale x 64 x i8> %a
+  ret void
 }


        


More information about the llvm-commits mailing list