[llvm-branch-commits] [llvm] 096b02e - [RISCV] Add intrinsics for vcompress instruction

Tue Dec 29 18:47:51 PST 2020

Author: ShihPo Hung
Date: 2020-12-29T18:38:15-08:00
New Revision: 096b02ebbff72c403379b28a40f14a8c48e640f8

URL: https://github.com/llvm/llvm-project/commit/096b02ebbff72c403379b28a40f14a8c48e640f8
DIFF: https://github.com/llvm/llvm-project/commit/096b02ebbff72c403379b28a40f14a8c48e640f8.diff

LOG: [RISCV] Add intrinsics for vcompress instruction

This patch defines vcompress intrinsics and lower to V instructions.

We work with @rogfer01 from BSC to come out this patch.

Authored-by: Roger Ferrer Ibanez <rofirrim at gmail.com>
Co-Authored-by: ShihPo Hung <shihpo.hung at sifive.com>

Differential revision: https://reviews.llvm.org/D93809

Added: 
    llvm/test/CodeGen/RISCV/rvv/vcompress-rv32.ll
    llvm/test/CodeGen/RISCV/rvv/vcompress-rv64.ll

Modified: 
    llvm/include/llvm/IR/IntrinsicsRISCV.td
    llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td
index 8e3d6f2ed675..430687d796ba 100644

--- a/llvm/include/llvm/IR/IntrinsicsRISCV.td
+++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td
@@ -189,6 +189,13 @@ let TargetPrefix = "riscv" in {
                      LLVMPointerType<LLVMMatchType<0>>, llvm_anyvector_ty,
                      LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty],
                     [NoCapture<ArgIndex<1>>, IntrWriteMem]>, RISCVVIntrinsic;
+  // For destination vector type is the same as first source vector (with mask).
+  // Input: (maskedoff, vector_in, mask, vl)
+  class RISCVUnaryAAMask
+        : Intrinsic<[llvm_anyvector_ty],
+                    [LLVMMatchType<0>, LLVMMatchType<0>,
+                     LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty],
+                    [IntrNoMem]>, RISCVVIntrinsic;
   // For destination vector type is the same as first and second source vector.
   // Input: (vector_in, vector_in, vl)
   class RISCVBinaryAAANoMask
@@ -680,6 +687,8 @@ let TargetPrefix = "riscv" in {
 
   defm vrgather : RISCVBinaryAAX;
 
+  def "int_riscv_vcompress_mask" : RISCVUnaryAAMask;
+
   defm vaaddu : RISCVSaturatingBinaryAAX;
   defm vaadd : RISCVSaturatingBinaryAAX;
   defm vasubu : RISCVSaturatingBinaryAAX;

diff  --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
index b0e3b455339f..6a89f34f9047 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
@@ -669,6 +669,27 @@ class VPseudoUnaryMOutMask:
   let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
 }
 
+// Mask can be V0~V31
+class VPseudoUnaryAnyMask<VReg RetClass,
+                          VReg Op1Class> :
+      Pseudo<(outs RetClass:$rd),
+             (ins RetClass:$merge,
+                  Op1Class:$rs2,
+                  VR:$vm, GPR:$vl, ixlenimm:$sew),
+             []>,
+      RISCVVPseudo {
+  let mayLoad = 0;
+  let mayStore = 0;
+  let hasSideEffects = 0;
+  let usesCustomInserter = 1;
+  let Constraints = "@earlyclobber $rd, $rd = $merge";
+  let Uses = [VL, VTYPE];
+  let VLIndex = 4;
+  let SEWIndex = 5;
+  let MergeOpIndex = 1;
+  let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
+}
+
 class VPseudoBinaryNoMask<VReg RetClass,
                           VReg Op1Class,
                           DAGOperand Op2Class,
@@ -905,6 +926,13 @@ multiclass VPseudoUnaryV_M {
   }
 }
 
+multiclass VPseudoUnaryV_V_AnyMask {
+  foreach m = MxList.m in {
+    let VLMul = m.value in
+      def _VM # "_" # m.MX # "_MASK" : VPseudoUnaryAnyMask<m.vrclass, m.vrclass>;
+  }
+}
+
 multiclass VPseudoBinary<VReg RetClass,
                          VReg Op1Class,
                          DAGOperand Op2Class,
@@ -1311,6 +1339,27 @@ class VPatMaskUnaryMask<string intrinsic_name,
                 (mti.Mask VR:$rs2),
                 (mti.Mask V0), (NoX0 GPR:$vl), mti.SEW)>;
 
+class VPatUnaryAnyMask<string intrinsic,
+                       string inst,
+                       string kind,
+                       ValueType result_type,
+                       ValueType op1_type,
+                       ValueType mask_type,
+                       int sew,
+                       LMULInfo vlmul,
+                       VReg result_reg_class,
+                       VReg op1_reg_class> :
+  Pat<(result_type (!cast<Intrinsic>(intrinsic#"_mask")
+                   (result_type result_reg_class:$merge),
+                   (op1_type op1_reg_class:$rs1),
+                   (mask_type VR:$rs2),
+                   (XLenVT GPR:$vl))),
+                   (!cast<Instruction>(inst#"_"#kind#"_"#vlmul.MX#"_MASK")
+                   (result_type result_reg_class:$merge),
+                   (op1_type op1_reg_class:$rs1),
+                   (mask_type VR:$rs2),
+                   (NoX0 GPR:$vl), sew)>;
+
 class VPatBinaryNoMask<string intrinsic_name,
                        string inst,
                        string kind,
@@ -1541,6 +1590,16 @@ multiclass VPatUnaryS_M<string intrinsic_name,
   }
 }
 
+multiclass VPatUnaryV_V_AnyMask<string intrinsic, string instruction,
+                                list<VTypeInfo> vtilist> {
+  foreach vti = vtilist in {
+    def : VPatUnaryAnyMask<intrinsic, instruction, "VM",
+                           vti.Vector, vti.Vector, vti.Mask,
+                           vti.SEW, vti.LMul, vti.RegClass,
+                           vti.RegClass>;
+  }
+}
+
 multiclass VPatUnaryM_M<string intrinsic,
                          string inst>
 {
@@ -2645,6 +2704,11 @@ let Predicates = [HasStdExtV, HasStdExtF] in {
 //===----------------------------------------------------------------------===//
 defm PseudoVRGATHER    : VPseudoBinaryV_VV_VX_VI<uimm5, "@earlyclobber $rd">;
 
+//===----------------------------------------------------------------------===//
+// 17.5. Vector Compress Instruction
+//===----------------------------------------------------------------------===//
+defm PseudoVCOMPRESS : VPseudoUnaryV_V_AnyMask;
+
 //===----------------------------------------------------------------------===//
 // Patterns.
 //===----------------------------------------------------------------------===//
@@ -3201,5 +3265,16 @@ let Predicates = [HasStdExtV, HasStdExtF] in {
                                      AllFloatVectors, uimm5>;
 } // Predicates = [HasStdExtV, HasStdExtF]
 
+//===----------------------------------------------------------------------===//
+// 17.5. Vector Compress Instruction
+//===----------------------------------------------------------------------===//
+let Predicates = [HasStdExtV] in {
+  defm "" : VPatUnaryV_V_AnyMask<"int_riscv_vcompress", "PseudoVCOMPRESS", AllIntegerVectors>;
+} // Predicates = [HasStdExtV]
+
+let Predicates = [HasStdExtV, HasStdExtF] in {
+  defm "" : VPatUnaryV_V_AnyMask<"int_riscv_vcompress", "PseudoVCOMPRESS", AllFloatVectors>;
+} // Predicates = [HasStdExtV, HasStdExtF]
+
 // Include the non-intrinsic ISel patterns
 include "RISCVInstrInfoVSDPatterns.td"

diff  --git a/llvm/test/CodeGen/RISCV/rvv/vcompress-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vcompress-rv32.ll
new file mode 100644
index 000000000000..b8d42eeb9e6c
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vcompress-rv32.ll
@@ -0,0 +1,650 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f,+experimental-zfh -verify-machineinstrs \
+; RUN:   --riscv-no-aliases < %s | FileCheck %s
+declare <vscale x 1 x i8> @llvm.riscv.vcompress.mask.nxv1i8(
+  <vscale x 1 x i8>,
+  <vscale x 1 x i8>,
+  <vscale x 1 x i1>,
+  i32);
+
+define <vscale x 1 x i8> @intrinsic_vcompress_mask_vm_nxv1i8_nxv1i8(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv1i8_nxv1i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e8,mf8,tu,mu
+; CHECK-NEXT:    vcompress.vm v16, v17, v0
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 1 x i8> @llvm.riscv.vcompress.mask.nxv1i8(
+    <vscale x 1 x i8> %0,
+    <vscale x 1 x i8> %1,
+    <vscale x 1 x i1> %2,
+    i32 %3)
+
+  ret <vscale x 1 x i8> %a
+}
+
+declare <vscale x 2 x i8> @llvm.riscv.vcompress.mask.nxv2i8(
+  <vscale x 2 x i8>,
+  <vscale x 2 x i8>,
+  <vscale x 2 x i1>,
+  i32);
+
+define <vscale x 2 x i8> @intrinsic_vcompress_mask_vm_nxv2i8_nxv2i8(<vscale x 2 x i8> %0, <vscale x 2 x i8> %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv2i8_nxv2i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e8,mf4,tu,mu
+; CHECK-NEXT:    vcompress.vm v16, v17, v0
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 2 x i8> @llvm.riscv.vcompress.mask.nxv2i8(
+    <vscale x 2 x i8> %0,
+    <vscale x 2 x i8> %1,
+    <vscale x 2 x i1> %2,
+    i32 %3)
+
+  ret <vscale x 2 x i8> %a
+}
+
+declare <vscale x 4 x i8> @llvm.riscv.vcompress.mask.nxv4i8(
+  <vscale x 4 x i8>,
+  <vscale x 4 x i8>,
+  <vscale x 4 x i1>,
+  i32);
+
+define <vscale x 4 x i8> @intrinsic_vcompress_mask_vm_nxv4i8_nxv4i8(<vscale x 4 x i8> %0, <vscale x 4 x i8> %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv4i8_nxv4i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e8,mf2,tu,mu
+; CHECK-NEXT:    vcompress.vm v16, v17, v0
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 4 x i8> @llvm.riscv.vcompress.mask.nxv4i8(
+    <vscale x 4 x i8> %0,
+    <vscale x 4 x i8> %1,
+    <vscale x 4 x i1> %2,
+    i32 %3)
+
+  ret <vscale x 4 x i8> %a
+}
+
+declare <vscale x 8 x i8> @llvm.riscv.vcompress.mask.nxv8i8(
+  <vscale x 8 x i8>,
+  <vscale x 8 x i8>,
+  <vscale x 8 x i1>,
+  i32);
+
+define <vscale x 8 x i8> @intrinsic_vcompress_mask_vm_nxv8i8_nxv8i8(<vscale x 8 x i8> %0, <vscale x 8 x i8> %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv8i8_nxv8i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e8,m1,tu,mu
+; CHECK-NEXT:    vcompress.vm v16, v17, v0
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 8 x i8> @llvm.riscv.vcompress.mask.nxv8i8(
+    <vscale x 8 x i8> %0,
+    <vscale x 8 x i8> %1,
+    <vscale x 8 x i1> %2,
+    i32 %3)
+
+  ret <vscale x 8 x i8> %a
+}
+
+declare <vscale x 16 x i8> @llvm.riscv.vcompress.mask.nxv16i8(
+  <vscale x 16 x i8>,
+  <vscale x 16 x i8>,
+  <vscale x 16 x i1>,
+  i32);
+
+define <vscale x 16 x i8> @intrinsic_vcompress_mask_vm_nxv16i8_nxv16i8(<vscale x 16 x i8> %0, <vscale x 16 x i8> %1, <vscale x 16 x i1> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv16i8_nxv16i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e8,m2,tu,mu
+; CHECK-NEXT:    vcompress.vm v16, v18, v0
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 16 x i8> @llvm.riscv.vcompress.mask.nxv16i8(
+    <vscale x 16 x i8> %0,
+    <vscale x 16 x i8> %1,
+    <vscale x 16 x i1> %2,
+    i32 %3)
+
+  ret <vscale x 16 x i8> %a
+}
+
+declare <vscale x 32 x i8> @llvm.riscv.vcompress.mask.nxv32i8(
+  <vscale x 32 x i8>,
+  <vscale x 32 x i8>,
+  <vscale x 32 x i1>,
+  i32);
+
+define <vscale x 32 x i8> @intrinsic_vcompress_mask_vm_nxv32i8_nxv32i8(<vscale x 32 x i8> %0, <vscale x 32 x i8> %1, <vscale x 32 x i1> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv32i8_nxv32i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e8,m4,tu,mu
+; CHECK-NEXT:    vcompress.vm v16, v20, v0
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 32 x i8> @llvm.riscv.vcompress.mask.nxv32i8(
+    <vscale x 32 x i8> %0,
+    <vscale x 32 x i8> %1,
+    <vscale x 32 x i1> %2,
+    i32 %3)
+
+  ret <vscale x 32 x i8> %a
+}
+
+declare <vscale x 64 x i8> @llvm.riscv.vcompress.mask.nxv64i8(
+  <vscale x 64 x i8>,
+  <vscale x 64 x i8>,
+  <vscale x 64 x i1>,
+  i32);
+
+define <vscale x 64 x i8> @intrinsic_vcompress_mask_vm_nxv64i8_nxv64i8(<vscale x 64 x i8> %0, <vscale x 64 x i8> %1, <vscale x 64 x i1> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv64i8_nxv64i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a2, zero, e8,m8,ta,mu
+; CHECK-NEXT:    vle8.v v8, (a0)
+; CHECK-NEXT:    vsetvli a0, a1, e8,m8,tu,mu
+; CHECK-NEXT:    vcompress.vm v16, v8, v0
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 64 x i8> @llvm.riscv.vcompress.mask.nxv64i8(
+    <vscale x 64 x i8> %0,
+    <vscale x 64 x i8> %1,
+    <vscale x 64 x i1> %2,
+    i32 %3)
+
+  ret <vscale x 64 x i8> %a
+}
+
+declare <vscale x 1 x i16> @llvm.riscv.vcompress.mask.nxv1i16(
+  <vscale x 1 x i16>,
+  <vscale x 1 x i16>,
+  <vscale x 1 x i1>,
+  i32);
+
+define <vscale x 1 x i16> @intrinsic_vcompress_mask_vm_nxv1i16_nxv1i16(<vscale x 1 x i16> %0, <vscale x 1 x i16> %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv1i16_nxv1i16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e16,mf4,tu,mu
+; CHECK-NEXT:    vcompress.vm v16, v17, v0
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 1 x i16> @llvm.riscv.vcompress.mask.nxv1i16(
+    <vscale x 1 x i16> %0,
+    <vscale x 1 x i16> %1,
+    <vscale x 1 x i1> %2,
+    i32 %3)
+
+  ret <vscale x 1 x i16> %a
+}
+
+declare <vscale x 2 x i16> @llvm.riscv.vcompress.mask.nxv2i16(
+  <vscale x 2 x i16>,
+  <vscale x 2 x i16>,
+  <vscale x 2 x i1>,
+  i32);
+
+define <vscale x 2 x i16> @intrinsic_vcompress_mask_vm_nxv2i16_nxv2i16(<vscale x 2 x i16> %0, <vscale x 2 x i16> %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv2i16_nxv2i16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e16,mf2,tu,mu
+; CHECK-NEXT:    vcompress.vm v16, v17, v0
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 2 x i16> @llvm.riscv.vcompress.mask.nxv2i16(
+    <vscale x 2 x i16> %0,
+    <vscale x 2 x i16> %1,
+    <vscale x 2 x i1> %2,
+    i32 %3)
+
+  ret <vscale x 2 x i16> %a
+}
+
+declare <vscale x 4 x i16> @llvm.riscv.vcompress.mask.nxv4i16(
+  <vscale x 4 x i16>,
+  <vscale x 4 x i16>,
+  <vscale x 4 x i1>,
+  i32);
+
+define <vscale x 4 x i16> @intrinsic_vcompress_mask_vm_nxv4i16_nxv4i16(<vscale x 4 x i16> %0, <vscale x 4 x i16> %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv4i16_nxv4i16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e16,m1,tu,mu
+; CHECK-NEXT:    vcompress.vm v16, v17, v0
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 4 x i16> @llvm.riscv.vcompress.mask.nxv4i16(
+    <vscale x 4 x i16> %0,
+    <vscale x 4 x i16> %1,
+    <vscale x 4 x i1> %2,
+    i32 %3)
+
+  ret <vscale x 4 x i16> %a
+}
+
+declare <vscale x 8 x i16> @llvm.riscv.vcompress.mask.nxv8i16(
+  <vscale x 8 x i16>,
+  <vscale x 8 x i16>,
+  <vscale x 8 x i1>,
+  i32);
+
+define <vscale x 8 x i16> @intrinsic_vcompress_mask_vm_nxv8i16_nxv8i16(<vscale x 8 x i16> %0, <vscale x 8 x i16> %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv8i16_nxv8i16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e16,m2,tu,mu
+; CHECK-NEXT:    vcompress.vm v16, v18, v0
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 8 x i16> @llvm.riscv.vcompress.mask.nxv8i16(
+    <vscale x 8 x i16> %0,
+    <vscale x 8 x i16> %1,
+    <vscale x 8 x i1> %2,
+    i32 %3)
+
+  ret <vscale x 8 x i16> %a
+}
+
+declare <vscale x 16 x i16> @llvm.riscv.vcompress.mask.nxv16i16(
+  <vscale x 16 x i16>,
+  <vscale x 16 x i16>,
+  <vscale x 16 x i1>,
+  i32);
+
+define <vscale x 16 x i16> @intrinsic_vcompress_mask_vm_nxv16i16_nxv16i16(<vscale x 16 x i16> %0, <vscale x 16 x i16> %1, <vscale x 16 x i1> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv16i16_nxv16i16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e16,m4,tu,mu
+; CHECK-NEXT:    vcompress.vm v16, v20, v0
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 16 x i16> @llvm.riscv.vcompress.mask.nxv16i16(
+    <vscale x 16 x i16> %0,
+    <vscale x 16 x i16> %1,
+    <vscale x 16 x i1> %2,
+    i32 %3)
+
+  ret <vscale x 16 x i16> %a
+}
+
+declare <vscale x 32 x i16> @llvm.riscv.vcompress.mask.nxv32i16(
+  <vscale x 32 x i16>,
+  <vscale x 32 x i16>,
+  <vscale x 32 x i1>,
+  i32);
+
+define <vscale x 32 x i16> @intrinsic_vcompress_mask_vm_nxv32i16_nxv32i16(<vscale x 32 x i16> %0, <vscale x 32 x i16> %1, <vscale x 32 x i1> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv32i16_nxv32i16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a2, zero, e16,m8,ta,mu
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    vsetvli a0, a1, e16,m8,tu,mu
+; CHECK-NEXT:    vcompress.vm v16, v8, v0
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 32 x i16> @llvm.riscv.vcompress.mask.nxv32i16(
+    <vscale x 32 x i16> %0,
+    <vscale x 32 x i16> %1,
+    <vscale x 32 x i1> %2,
+    i32 %3)
+
+  ret <vscale x 32 x i16> %a
+}
+
+declare <vscale x 1 x i32> @llvm.riscv.vcompress.mask.nxv1i32(
+  <vscale x 1 x i32>,
+  <vscale x 1 x i32>,
+  <vscale x 1 x i1>,
+  i32);
+
+define <vscale x 1 x i32> @intrinsic_vcompress_mask_vm_nxv1i32_nxv1i32(<vscale x 1 x i32> %0, <vscale x 1 x i32> %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv1i32_nxv1i32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e32,mf2,tu,mu
+; CHECK-NEXT:    vcompress.vm v16, v17, v0
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 1 x i32> @llvm.riscv.vcompress.mask.nxv1i32(
+    <vscale x 1 x i32> %0,
+    <vscale x 1 x i32> %1,
+    <vscale x 1 x i1> %2,
+    i32 %3)
+
+  ret <vscale x 1 x i32> %a
+}
+
+declare <vscale x 2 x i32> @llvm.riscv.vcompress.mask.nxv2i32(
+  <vscale x 2 x i32>,
+  <vscale x 2 x i32>,
+  <vscale x 2 x i1>,
+  i32);
+
+define <vscale x 2 x i32> @intrinsic_vcompress_mask_vm_nxv2i32_nxv2i32(<vscale x 2 x i32> %0, <vscale x 2 x i32> %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv2i32_nxv2i32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e32,m1,tu,mu
+; CHECK-NEXT:    vcompress.vm v16, v17, v0
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 2 x i32> @llvm.riscv.vcompress.mask.nxv2i32(
+    <vscale x 2 x i32> %0,
+    <vscale x 2 x i32> %1,
+    <vscale x 2 x i1> %2,
+    i32 %3)
+
+  ret <vscale x 2 x i32> %a
+}
+
+declare <vscale x 4 x i32> @llvm.riscv.vcompress.mask.nxv4i32(
+  <vscale x 4 x i32>,
+  <vscale x 4 x i32>,
+  <vscale x 4 x i1>,
+  i32);
+
+define <vscale x 4 x i32> @intrinsic_vcompress_mask_vm_nxv4i32_nxv4i32(<vscale x 4 x i32> %0, <vscale x 4 x i32> %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv4i32_nxv4i32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e32,m2,tu,mu
+; CHECK-NEXT:    vcompress.vm v16, v18, v0
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 4 x i32> @llvm.riscv.vcompress.mask.nxv4i32(
+    <vscale x 4 x i32> %0,
+    <vscale x 4 x i32> %1,
+    <vscale x 4 x i1> %2,
+    i32 %3)
+
+  ret <vscale x 4 x i32> %a
+}
+
+declare <vscale x 8 x i32> @llvm.riscv.vcompress.mask.nxv8i32(
+  <vscale x 8 x i32>,
+  <vscale x 8 x i32>,
+  <vscale x 8 x i1>,
+  i32);
+
+define <vscale x 8 x i32> @intrinsic_vcompress_mask_vm_nxv8i32_nxv8i32(<vscale x 8 x i32> %0, <vscale x 8 x i32> %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv8i32_nxv8i32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e32,m4,tu,mu
+; CHECK-NEXT:    vcompress.vm v16, v20, v0
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 8 x i32> @llvm.riscv.vcompress.mask.nxv8i32(
+    <vscale x 8 x i32> %0,
+    <vscale x 8 x i32> %1,
+    <vscale x 8 x i1> %2,
+    i32 %3)
+
+  ret <vscale x 8 x i32> %a
+}
+
+declare <vscale x 16 x i32> @llvm.riscv.vcompress.mask.nxv16i32(
+  <vscale x 16 x i32>,
+  <vscale x 16 x i32>,
+  <vscale x 16 x i1>,
+  i32);
+
+define <vscale x 16 x i32> @intrinsic_vcompress_mask_vm_nxv16i32_nxv16i32(<vscale x 16 x i32> %0, <vscale x 16 x i32> %1, <vscale x 16 x i1> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv16i32_nxv16i32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a2, zero, e32,m8,ta,mu
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    vsetvli a0, a1, e32,m8,tu,mu
+; CHECK-NEXT:    vcompress.vm v16, v8, v0
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 16 x i32> @llvm.riscv.vcompress.mask.nxv16i32(
+    <vscale x 16 x i32> %0,
+    <vscale x 16 x i32> %1,
+    <vscale x 16 x i1> %2,
+    i32 %3)
+
+  ret <vscale x 16 x i32> %a
+}
+
+declare <vscale x 1 x half> @llvm.riscv.vcompress.mask.nxv1f16(
+  <vscale x 1 x half>,
+  <vscale x 1 x half>,
+  <vscale x 1 x i1>,
+  i32);
+
+define <vscale x 1 x half> @intrinsic_vcompress_mask_vm_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv1f16_nxv1f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e16,mf4,tu,mu
+; CHECK-NEXT:    vcompress.vm v16, v17, v0
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 1 x half> @llvm.riscv.vcompress.mask.nxv1f16(
+    <vscale x 1 x half> %0,
+    <vscale x 1 x half> %1,
+    <vscale x 1 x i1> %2,
+    i32 %3)
+
+  ret <vscale x 1 x half> %a
+}
+
+declare <vscale x 2 x half> @llvm.riscv.vcompress.mask.nxv2f16(
+  <vscale x 2 x half>,
+  <vscale x 2 x half>,
+  <vscale x 2 x i1>,
+  i32);
+
+define <vscale x 2 x half> @intrinsic_vcompress_mask_vm_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv2f16_nxv2f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e16,mf2,tu,mu
+; CHECK-NEXT:    vcompress.vm v16, v17, v0
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 2 x half> @llvm.riscv.vcompress.mask.nxv2f16(
+    <vscale x 2 x half> %0,
+    <vscale x 2 x half> %1,
+    <vscale x 2 x i1> %2,
+    i32 %3)
+
+  ret <vscale x 2 x half> %a
+}
+
+declare <vscale x 4 x half> @llvm.riscv.vcompress.mask.nxv4f16(
+  <vscale x 4 x half>,
+  <vscale x 4 x half>,
+  <vscale x 4 x i1>,
+  i32);
+
+define <vscale x 4 x half> @intrinsic_vcompress_mask_vm_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv4f16_nxv4f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e16,m1,tu,mu
+; CHECK-NEXT:    vcompress.vm v16, v17, v0
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 4 x half> @llvm.riscv.vcompress.mask.nxv4f16(
+    <vscale x 4 x half> %0,
+    <vscale x 4 x half> %1,
+    <vscale x 4 x i1> %2,
+    i32 %3)
+
+  ret <vscale x 4 x half> %a
+}
+
+declare <vscale x 8 x half> @llvm.riscv.vcompress.mask.nxv8f16(
+  <vscale x 8 x half>,
+  <vscale x 8 x half>,
+  <vscale x 8 x i1>,
+  i32);
+
+define <vscale x 8 x half> @intrinsic_vcompress_mask_vm_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv8f16_nxv8f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e16,m2,tu,mu
+; CHECK-NEXT:    vcompress.vm v16, v18, v0
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 8 x half> @llvm.riscv.vcompress.mask.nxv8f16(
+    <vscale x 8 x half> %0,
+    <vscale x 8 x half> %1,
+    <vscale x 8 x i1> %2,
+    i32 %3)
+
+  ret <vscale x 8 x half> %a
+}
+
+declare <vscale x 16 x half> @llvm.riscv.vcompress.mask.nxv16f16(
+  <vscale x 16 x half>,
+  <vscale x 16 x half>,
+  <vscale x 16 x i1>,
+  i32);
+
+define <vscale x 16 x half> @intrinsic_vcompress_mask_vm_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, <vscale x 16 x i1> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv16f16_nxv16f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e16,m4,tu,mu
+; CHECK-NEXT:    vcompress.vm v16, v20, v0
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 16 x half> @llvm.riscv.vcompress.mask.nxv16f16(
+    <vscale x 16 x half> %0,
+    <vscale x 16 x half> %1,
+    <vscale x 16 x i1> %2,
+    i32 %3)
+
+  ret <vscale x 16 x half> %a
+}
+
+declare <vscale x 32 x half> @llvm.riscv.vcompress.mask.nxv32f16(
+  <vscale x 32 x half>,
+  <vscale x 32 x half>,
+  <vscale x 32 x i1>,
+  i32);
+
+define <vscale x 32 x half> @intrinsic_vcompress_mask_vm_nxv32f16_nxv32f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, <vscale x 32 x i1> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv32f16_nxv32f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a2, zero, e16,m8,ta,mu
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    vsetvli a0, a1, e16,m8,tu,mu
+; CHECK-NEXT:    vcompress.vm v16, v8, v0
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 32 x half> @llvm.riscv.vcompress.mask.nxv32f16(
+    <vscale x 32 x half> %0,
+    <vscale x 32 x half> %1,
+    <vscale x 32 x i1> %2,
+    i32 %3)
+
+  ret <vscale x 32 x half> %a
+}
+
+declare <vscale x 1 x float> @llvm.riscv.vcompress.mask.nxv1f32(
+  <vscale x 1 x float>,
+  <vscale x 1 x float>,
+  <vscale x 1 x i1>,
+  i32);
+
+define <vscale x 1 x float> @intrinsic_vcompress_mask_vm_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv1f32_nxv1f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e32,mf2,tu,mu
+; CHECK-NEXT:    vcompress.vm v16, v17, v0
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 1 x float> @llvm.riscv.vcompress.mask.nxv1f32(
+    <vscale x 1 x float> %0,
+    <vscale x 1 x float> %1,
+    <vscale x 1 x i1> %2,
+    i32 %3)
+
+  ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vcompress.mask.nxv2f32(
+  <vscale x 2 x float>,
+  <vscale x 2 x float>,
+  <vscale x 2 x i1>,
+  i32);
+
+define <vscale x 2 x float> @intrinsic_vcompress_mask_vm_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv2f32_nxv2f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e32,m1,tu,mu
+; CHECK-NEXT:    vcompress.vm v16, v17, v0
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 2 x float> @llvm.riscv.vcompress.mask.nxv2f32(
+    <vscale x 2 x float> %0,
+    <vscale x 2 x float> %1,
+    <vscale x 2 x i1> %2,
+    i32 %3)
+
+  ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.vcompress.mask.nxv4f32(
+  <vscale x 4 x float>,
+  <vscale x 4 x float>,
+  <vscale x 4 x i1>,
+  i32);
+
+define <vscale x 4 x float> @intrinsic_vcompress_mask_vm_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv4f32_nxv4f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e32,m2,tu,mu
+; CHECK-NEXT:    vcompress.vm v16, v18, v0
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 4 x float> @llvm.riscv.vcompress.mask.nxv4f32(
+    <vscale x 4 x float> %0,
+    <vscale x 4 x float> %1,
+    <vscale x 4 x i1> %2,
+    i32 %3)
+
+  ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.vcompress.mask.nxv8f32(
+  <vscale x 8 x float>,
+  <vscale x 8 x float>,
+  <vscale x 8 x i1>,
+  i32);
+
+define <vscale x 8 x float> @intrinsic_vcompress_mask_vm_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv8f32_nxv8f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e32,m4,tu,mu
+; CHECK-NEXT:    vcompress.vm v16, v20, v0
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 8 x float> @llvm.riscv.vcompress.mask.nxv8f32(
+    <vscale x 8 x float> %0,
+    <vscale x 8 x float> %1,
+    <vscale x 8 x i1> %2,
+    i32 %3)
+
+  ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vcompress.mask.nxv16f32(
+  <vscale x 16 x float>,
+  <vscale x 16 x float>,
+  <vscale x 16 x i1>,
+  i32);
+
+define <vscale x 16 x float> @intrinsic_vcompress_mask_vm_nxv16f32_nxv16f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, <vscale x 16 x i1> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv16f32_nxv16f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a2, zero, e32,m8,ta,mu
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    vsetvli a0, a1, e32,m8,tu,mu
+; CHECK-NEXT:    vcompress.vm v16, v8, v0
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 16 x float> @llvm.riscv.vcompress.mask.nxv16f32(
+    <vscale x 16 x float> %0,
+    <vscale x 16 x float> %1,
+    <vscale x 16 x i1> %2,
+    i32 %3)
+
+  ret <vscale x 16 x float> %a
+}

diff  --git a/llvm/test/CodeGen/RISCV/rvv/vcompress-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vcompress-rv64.ll
new file mode 100644
index 000000000000..a2e9df6e3fa2
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vcompress-rv64.ll
@@ -0,0 +1,830 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+experimental-zfh -verify-machineinstrs \
+; RUN:   --riscv-no-aliases < %s | FileCheck %s
+declare <vscale x 1 x i8> @llvm.riscv.vcompress.mask.nxv1i8(
+  <vscale x 1 x i8>,
+  <vscale x 1 x i8>,
+  <vscale x 1 x i1>,
+  i64);
+
+define <vscale x 1 x i8> @intrinsic_vcompress_mask_vm_nxv1i8_nxv1i8(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv1i8_nxv1i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e8,mf8,tu,mu
+; CHECK-NEXT:    vcompress.vm v16, v17, v0
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 1 x i8> @llvm.riscv.vcompress.mask.nxv1i8(
+    <vscale x 1 x i8> %0,
+    <vscale x 1 x i8> %1,
+    <vscale x 1 x i1> %2,
+    i64 %3)
+
+  ret <vscale x 1 x i8> %a
+}
+
+declare <vscale x 2 x i8> @llvm.riscv.vcompress.mask.nxv2i8(
+  <vscale x 2 x i8>,
+  <vscale x 2 x i8>,
+  <vscale x 2 x i1>,
+  i64);
+
+define <vscale x 2 x i8> @intrinsic_vcompress_mask_vm_nxv2i8_nxv2i8(<vscale x 2 x i8> %0, <vscale x 2 x i8> %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv2i8_nxv2i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e8,mf4,tu,mu
+; CHECK-NEXT:    vcompress.vm v16, v17, v0
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 2 x i8> @llvm.riscv.vcompress.mask.nxv2i8(
+    <vscale x 2 x i8> %0,
+    <vscale x 2 x i8> %1,
+    <vscale x 2 x i1> %2,
+    i64 %3)
+
+  ret <vscale x 2 x i8> %a
+}
+
+declare <vscale x 4 x i8> @llvm.riscv.vcompress.mask.nxv4i8(
+  <vscale x 4 x i8>,
+  <vscale x 4 x i8>,
+  <vscale x 4 x i1>,
+  i64);
+
+define <vscale x 4 x i8> @intrinsic_vcompress_mask_vm_nxv4i8_nxv4i8(<vscale x 4 x i8> %0, <vscale x 4 x i8> %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv4i8_nxv4i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e8,mf2,tu,mu
+; CHECK-NEXT:    vcompress.vm v16, v17, v0
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 4 x i8> @llvm.riscv.vcompress.mask.nxv4i8(
+    <vscale x 4 x i8> %0,
+    <vscale x 4 x i8> %1,
+    <vscale x 4 x i1> %2,
+    i64 %3)
+
+  ret <vscale x 4 x i8> %a
+}
+
+declare <vscale x 8 x i8> @llvm.riscv.vcompress.mask.nxv8i8(
+  <vscale x 8 x i8>,
+  <vscale x 8 x i8>,
+  <vscale x 8 x i1>,
+  i64);
+
+define <vscale x 8 x i8> @intrinsic_vcompress_mask_vm_nxv8i8_nxv8i8(<vscale x 8 x i8> %0, <vscale x 8 x i8> %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv8i8_nxv8i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e8,m1,tu,mu
+; CHECK-NEXT:    vcompress.vm v16, v17, v0
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 8 x i8> @llvm.riscv.vcompress.mask.nxv8i8(
+    <vscale x 8 x i8> %0,
+    <vscale x 8 x i8> %1,
+    <vscale x 8 x i1> %2,
+    i64 %3)
+
+  ret <vscale x 8 x i8> %a
+}
+
+declare <vscale x 16 x i8> @llvm.riscv.vcompress.mask.nxv16i8(
+  <vscale x 16 x i8>,
+  <vscale x 16 x i8>,
+  <vscale x 16 x i1>,
+  i64);
+
+define <vscale x 16 x i8> @intrinsic_vcompress_mask_vm_nxv16i8_nxv16i8(<vscale x 16 x i8> %0, <vscale x 16 x i8> %1, <vscale x 16 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv16i8_nxv16i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e8,m2,tu,mu
+; CHECK-NEXT:    vcompress.vm v16, v18, v0
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 16 x i8> @llvm.riscv.vcompress.mask.nxv16i8(
+    <vscale x 16 x i8> %0,
+    <vscale x 16 x i8> %1,
+    <vscale x 16 x i1> %2,
+    i64 %3)
+
+  ret <vscale x 16 x i8> %a
+}
+
+declare <vscale x 32 x i8> @llvm.riscv.vcompress.mask.nxv32i8(
+  <vscale x 32 x i8>,
+  <vscale x 32 x i8>,
+  <vscale x 32 x i1>,
+  i64);
+
+define <vscale x 32 x i8> @intrinsic_vcompress_mask_vm_nxv32i8_nxv32i8(<vscale x 32 x i8> %0, <vscale x 32 x i8> %1, <vscale x 32 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv32i8_nxv32i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e8,m4,tu,mu
+; CHECK-NEXT:    vcompress.vm v16, v20, v0
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 32 x i8> @llvm.riscv.vcompress.mask.nxv32i8(
+    <vscale x 32 x i8> %0,
+    <vscale x 32 x i8> %1,
+    <vscale x 32 x i1> %2,
+    i64 %3)
+
+  ret <vscale x 32 x i8> %a
+}
+
+declare <vscale x 64 x i8> @llvm.riscv.vcompress.mask.nxv64i8(
+  <vscale x 64 x i8>,
+  <vscale x 64 x i8>,
+  <vscale x 64 x i1>,
+  i64);
+
+define <vscale x 64 x i8> @intrinsic_vcompress_mask_vm_nxv64i8_nxv64i8(<vscale x 64 x i8> %0, <vscale x 64 x i8> %1, <vscale x 64 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv64i8_nxv64i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a2, zero, e8,m8,ta,mu
+; CHECK-NEXT:    vle8.v v8, (a0)
+; CHECK-NEXT:    vsetvli a0, a1, e8,m8,tu,mu
+; CHECK-NEXT:    vcompress.vm v16, v8, v0
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 64 x i8> @llvm.riscv.vcompress.mask.nxv64i8(
+    <vscale x 64 x i8> %0,
+    <vscale x 64 x i8> %1,
+    <vscale x 64 x i1> %2,
+    i64 %3)
+
+  ret <vscale x 64 x i8> %a
+}
+
+declare <vscale x 1 x i16> @llvm.riscv.vcompress.mask.nxv1i16(
+  <vscale x 1 x i16>,
+  <vscale x 1 x i16>,
+  <vscale x 1 x i1>,
+  i64);
+
+define <vscale x 1 x i16> @intrinsic_vcompress_mask_vm_nxv1i16_nxv1i16(<vscale x 1 x i16> %0, <vscale x 1 x i16> %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv1i16_nxv1i16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e16,mf4,tu,mu
+; CHECK-NEXT:    vcompress.vm v16, v17, v0
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 1 x i16> @llvm.riscv.vcompress.mask.nxv1i16(
+    <vscale x 1 x i16> %0,
+    <vscale x 1 x i16> %1,
+    <vscale x 1 x i1> %2,
+    i64 %3)
+
+  ret <vscale x 1 x i16> %a
+}
+
+declare <vscale x 2 x i16> @llvm.riscv.vcompress.mask.nxv2i16(
+  <vscale x 2 x i16>,
+  <vscale x 2 x i16>,
+  <vscale x 2 x i1>,
+  i64);
+
+define <vscale x 2 x i16> @intrinsic_vcompress_mask_vm_nxv2i16_nxv2i16(<vscale x 2 x i16> %0, <vscale x 2 x i16> %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv2i16_nxv2i16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e16,mf2,tu,mu
+; CHECK-NEXT:    vcompress.vm v16, v17, v0
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 2 x i16> @llvm.riscv.vcompress.mask.nxv2i16(
+    <vscale x 2 x i16> %0,
+    <vscale x 2 x i16> %1,
+    <vscale x 2 x i1> %2,
+    i64 %3)
+
+  ret <vscale x 2 x i16> %a
+}
+
+declare <vscale x 4 x i16> @llvm.riscv.vcompress.mask.nxv4i16(
+  <vscale x 4 x i16>,
+  <vscale x 4 x i16>,
+  <vscale x 4 x i1>,
+  i64);
+
+define <vscale x 4 x i16> @intrinsic_vcompress_mask_vm_nxv4i16_nxv4i16(<vscale x 4 x i16> %0, <vscale x 4 x i16> %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv4i16_nxv4i16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e16,m1,tu,mu
+; CHECK-NEXT:    vcompress.vm v16, v17, v0
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 4 x i16> @llvm.riscv.vcompress.mask.nxv4i16(
+    <vscale x 4 x i16> %0,
+    <vscale x 4 x i16> %1,
+    <vscale x 4 x i1> %2,
+    i64 %3)
+
+  ret <vscale x 4 x i16> %a
+}
+
+declare <vscale x 8 x i16> @llvm.riscv.vcompress.mask.nxv8i16(
+  <vscale x 8 x i16>,
+  <vscale x 8 x i16>,
+  <vscale x 8 x i1>,
+  i64);
+
+define <vscale x 8 x i16> @intrinsic_vcompress_mask_vm_nxv8i16_nxv8i16(<vscale x 8 x i16> %0, <vscale x 8 x i16> %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv8i16_nxv8i16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e16,m2,tu,mu
+; CHECK-NEXT:    vcompress.vm v16, v18, v0
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 8 x i16> @llvm.riscv.vcompress.mask.nxv8i16(
+    <vscale x 8 x i16> %0,
+    <vscale x 8 x i16> %1,
+    <vscale x 8 x i1> %2,
+    i64 %3)
+
+  ret <vscale x 8 x i16> %a
+}
+
+declare <vscale x 16 x i16> @llvm.riscv.vcompress.mask.nxv16i16(
+  <vscale x 16 x i16>,
+  <vscale x 16 x i16>,
+  <vscale x 16 x i1>,
+  i64);
+
+define <vscale x 16 x i16> @intrinsic_vcompress_mask_vm_nxv16i16_nxv16i16(<vscale x 16 x i16> %0, <vscale x 16 x i16> %1, <vscale x 16 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv16i16_nxv16i16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e16,m4,tu,mu
+; CHECK-NEXT:    vcompress.vm v16, v20, v0
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 16 x i16> @llvm.riscv.vcompress.mask.nxv16i16(
+    <vscale x 16 x i16> %0,
+    <vscale x 16 x i16> %1,
+    <vscale x 16 x i1> %2,
+    i64 %3)
+
+  ret <vscale x 16 x i16> %a
+}
+
+declare <vscale x 32 x i16> @llvm.riscv.vcompress.mask.nxv32i16(
+  <vscale x 32 x i16>,
+  <vscale x 32 x i16>,
+  <vscale x 32 x i1>,
+  i64);
+
+define <vscale x 32 x i16> @intrinsic_vcompress_mask_vm_nxv32i16_nxv32i16(<vscale x 32 x i16> %0, <vscale x 32 x i16> %1, <vscale x 32 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv32i16_nxv32i16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a2, zero, e16,m8,ta,mu
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    vsetvli a0, a1, e16,m8,tu,mu
+; CHECK-NEXT:    vcompress.vm v16, v8, v0
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 32 x i16> @llvm.riscv.vcompress.mask.nxv32i16(
+    <vscale x 32 x i16> %0,
+    <vscale x 32 x i16> %1,
+    <vscale x 32 x i1> %2,
+    i64 %3)
+
+  ret <vscale x 32 x i16> %a
+}
+
+declare <vscale x 1 x i32> @llvm.riscv.vcompress.mask.nxv1i32(
+  <vscale x 1 x i32>,
+  <vscale x 1 x i32>,
+  <vscale x 1 x i1>,
+  i64);
+
+define <vscale x 1 x i32> @intrinsic_vcompress_mask_vm_nxv1i32_nxv1i32(<vscale x 1 x i32> %0, <vscale x 1 x i32> %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv1i32_nxv1i32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e32,mf2,tu,mu
+; CHECK-NEXT:    vcompress.vm v16, v17, v0
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 1 x i32> @llvm.riscv.vcompress.mask.nxv1i32(
+    <vscale x 1 x i32> %0,
+    <vscale x 1 x i32> %1,
+    <vscale x 1 x i1> %2,
+    i64 %3)
+
+  ret <vscale x 1 x i32> %a
+}
+
+declare <vscale x 2 x i32> @llvm.riscv.vcompress.mask.nxv2i32(
+  <vscale x 2 x i32>,
+  <vscale x 2 x i32>,
+  <vscale x 2 x i1>,
+  i64);
+
+define <vscale x 2 x i32> @intrinsic_vcompress_mask_vm_nxv2i32_nxv2i32(<vscale x 2 x i32> %0, <vscale x 2 x i32> %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv2i32_nxv2i32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e32,m1,tu,mu
+; CHECK-NEXT:    vcompress.vm v16, v17, v0
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 2 x i32> @llvm.riscv.vcompress.mask.nxv2i32(
+    <vscale x 2 x i32> %0,
+    <vscale x 2 x i32> %1,
+    <vscale x 2 x i1> %2,
+    i64 %3)
+
+  ret <vscale x 2 x i32> %a
+}
+
+declare <vscale x 4 x i32> @llvm.riscv.vcompress.mask.nxv4i32(
+  <vscale x 4 x i32>,
+  <vscale x 4 x i32>,
+  <vscale x 4 x i1>,
+  i64);
+
+define <vscale x 4 x i32> @intrinsic_vcompress_mask_vm_nxv4i32_nxv4i32(<vscale x 4 x i32> %0, <vscale x 4 x i32> %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv4i32_nxv4i32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e32,m2,tu,mu
+; CHECK-NEXT:    vcompress.vm v16, v18, v0
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 4 x i32> @llvm.riscv.vcompress.mask.nxv4i32(
+    <vscale x 4 x i32> %0,
+    <vscale x 4 x i32> %1,
+    <vscale x 4 x i1> %2,
+    i64 %3)
+
+  ret <vscale x 4 x i32> %a
+}
+
+declare <vscale x 8 x i32> @llvm.riscv.vcompress.mask.nxv8i32(
+  <vscale x 8 x i32>,
+  <vscale x 8 x i32>,
+  <vscale x 8 x i1>,
+  i64);
+
+define <vscale x 8 x i32> @intrinsic_vcompress_mask_vm_nxv8i32_nxv8i32(<vscale x 8 x i32> %0, <vscale x 8 x i32> %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv8i32_nxv8i32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e32,m4,tu,mu
+; CHECK-NEXT:    vcompress.vm v16, v20, v0
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 8 x i32> @llvm.riscv.vcompress.mask.nxv8i32(
+    <vscale x 8 x i32> %0,
+    <vscale x 8 x i32> %1,
+    <vscale x 8 x i1> %2,
+    i64 %3)
+
+  ret <vscale x 8 x i32> %a
+}
+
+declare <vscale x 16 x i32> @llvm.riscv.vcompress.mask.nxv16i32(
+  <vscale x 16 x i32>,
+  <vscale x 16 x i32>,
+  <vscale x 16 x i1>,
+  i64);
+
+define <vscale x 16 x i32> @intrinsic_vcompress_mask_vm_nxv16i32_nxv16i32(<vscale x 16 x i32> %0, <vscale x 16 x i32> %1, <vscale x 16 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv16i32_nxv16i32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a2, zero, e32,m8,ta,mu
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    vsetvli a0, a1, e32,m8,tu,mu
+; CHECK-NEXT:    vcompress.vm v16, v8, v0
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 16 x i32> @llvm.riscv.vcompress.mask.nxv16i32(
+    <vscale x 16 x i32> %0,
+    <vscale x 16 x i32> %1,
+    <vscale x 16 x i1> %2,
+    i64 %3)
+
+  ret <vscale x 16 x i32> %a
+}
+
+declare <vscale x 1 x i64> @llvm.riscv.vcompress.mask.nxv1i64(
+  <vscale x 1 x i64>,
+  <vscale x 1 x i64>,
+  <vscale x 1 x i1>,
+  i64);
+
+define <vscale x 1 x i64> @intrinsic_vcompress_mask_vm_nxv1i64_nxv1i64(<vscale x 1 x i64> %0, <vscale x 1 x i64> %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv1i64_nxv1i64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e64,m1,tu,mu
+; CHECK-NEXT:    vcompress.vm v16, v17, v0
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 1 x i64> @llvm.riscv.vcompress.mask.nxv1i64(
+    <vscale x 1 x i64> %0,
+    <vscale x 1 x i64> %1,
+    <vscale x 1 x i1> %2,
+    i64 %3)
+
+  ret <vscale x 1 x i64> %a
+}
+
+declare <vscale x 2 x i64> @llvm.riscv.vcompress.mask.nxv2i64(
+  <vscale x 2 x i64>,
+  <vscale x 2 x i64>,
+  <vscale x 2 x i1>,
+  i64);
+
+define <vscale x 2 x i64> @intrinsic_vcompress_mask_vm_nxv2i64_nxv2i64(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv2i64_nxv2i64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e64,m2,tu,mu
+; CHECK-NEXT:    vcompress.vm v16, v18, v0
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 2 x i64> @llvm.riscv.vcompress.mask.nxv2i64(
+    <vscale x 2 x i64> %0,
+    <vscale x 2 x i64> %1,
+    <vscale x 2 x i1> %2,
+    i64 %3)
+
+  ret <vscale x 2 x i64> %a
+}
+
+declare <vscale x 4 x i64> @llvm.riscv.vcompress.mask.nxv4i64(
+  <vscale x 4 x i64>,
+  <vscale x 4 x i64>,
+  <vscale x 4 x i1>,
+  i64);
+
+define <vscale x 4 x i64> @intrinsic_vcompress_mask_vm_nxv4i64_nxv4i64(<vscale x 4 x i64> %0, <vscale x 4 x i64> %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv4i64_nxv4i64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e64,m4,tu,mu
+; CHECK-NEXT:    vcompress.vm v16, v20, v0
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 4 x i64> @llvm.riscv.vcompress.mask.nxv4i64(
+    <vscale x 4 x i64> %0,
+    <vscale x 4 x i64> %1,
+    <vscale x 4 x i1> %2,
+    i64 %3)
+
+  ret <vscale x 4 x i64> %a
+}
+
+declare <vscale x 8 x i64> @llvm.riscv.vcompress.mask.nxv8i64(
+  <vscale x 8 x i64>,
+  <vscale x 8 x i64>,
+  <vscale x 8 x i1>,
+  i64);
+
+define <vscale x 8 x i64> @intrinsic_vcompress_mask_vm_nxv8i64_nxv8i64(<vscale x 8 x i64> %0, <vscale x 8 x i64> %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv8i64_nxv8i64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a2, zero, e64,m8,ta,mu
+; CHECK-NEXT:    vle64.v v8, (a0)
+; CHECK-NEXT:    vsetvli a0, a1, e64,m8,tu,mu
+; CHECK-NEXT:    vcompress.vm v16, v8, v0
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 8 x i64> @llvm.riscv.vcompress.mask.nxv8i64(
+    <vscale x 8 x i64> %0,
+    <vscale x 8 x i64> %1,
+    <vscale x 8 x i1> %2,
+    i64 %3)
+
+  ret <vscale x 8 x i64> %a
+}
+
+declare <vscale x 1 x half> @llvm.riscv.vcompress.mask.nxv1f16(
+  <vscale x 1 x half>,
+  <vscale x 1 x half>,
+  <vscale x 1 x i1>,
+  i64);
+
+define <vscale x 1 x half> @intrinsic_vcompress_mask_vm_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv1f16_nxv1f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e16,mf4,tu,mu
+; CHECK-NEXT:    vcompress.vm v16, v17, v0
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 1 x half> @llvm.riscv.vcompress.mask.nxv1f16(
+    <vscale x 1 x half> %0,
+    <vscale x 1 x half> %1,
+    <vscale x 1 x i1> %2,
+    i64 %3)
+
+  ret <vscale x 1 x half> %a
+}
+
+declare <vscale x 2 x half> @llvm.riscv.vcompress.mask.nxv2f16(
+  <vscale x 2 x half>,
+  <vscale x 2 x half>,
+  <vscale x 2 x i1>,
+  i64);
+
+define <vscale x 2 x half> @intrinsic_vcompress_mask_vm_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv2f16_nxv2f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e16,mf2,tu,mu
+; CHECK-NEXT:    vcompress.vm v16, v17, v0
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 2 x half> @llvm.riscv.vcompress.mask.nxv2f16(
+    <vscale x 2 x half> %0,
+    <vscale x 2 x half> %1,
+    <vscale x 2 x i1> %2,
+    i64 %3)
+
+  ret <vscale x 2 x half> %a
+}
+
+declare <vscale x 4 x half> @llvm.riscv.vcompress.mask.nxv4f16(
+  <vscale x 4 x half>,
+  <vscale x 4 x half>,
+  <vscale x 4 x i1>,
+  i64);
+
+define <vscale x 4 x half> @intrinsic_vcompress_mask_vm_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv4f16_nxv4f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e16,m1,tu,mu
+; CHECK-NEXT:    vcompress.vm v16, v17, v0
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 4 x half> @llvm.riscv.vcompress.mask.nxv4f16(
+    <vscale x 4 x half> %0,
+    <vscale x 4 x half> %1,
+    <vscale x 4 x i1> %2,
+    i64 %3)
+
+  ret <vscale x 4 x half> %a
+}
+
+declare <vscale x 8 x half> @llvm.riscv.vcompress.mask.nxv8f16(
+  <vscale x 8 x half>,
+  <vscale x 8 x half>,
+  <vscale x 8 x i1>,
+  i64);
+
+define <vscale x 8 x half> @intrinsic_vcompress_mask_vm_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv8f16_nxv8f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e16,m2,tu,mu
+; CHECK-NEXT:    vcompress.vm v16, v18, v0
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 8 x half> @llvm.riscv.vcompress.mask.nxv8f16(
+    <vscale x 8 x half> %0,
+    <vscale x 8 x half> %1,
+    <vscale x 8 x i1> %2,
+    i64 %3)
+
+  ret <vscale x 8 x half> %a
+}
+
+declare <vscale x 16 x half> @llvm.riscv.vcompress.mask.nxv16f16(
+  <vscale x 16 x half>,
+  <vscale x 16 x half>,
+  <vscale x 16 x i1>,
+  i64);
+
+define <vscale x 16 x half> @intrinsic_vcompress_mask_vm_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, <vscale x 16 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv16f16_nxv16f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e16,m4,tu,mu
+; CHECK-NEXT:    vcompress.vm v16, v20, v0
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 16 x half> @llvm.riscv.vcompress.mask.nxv16f16(
+    <vscale x 16 x half> %0,
+    <vscale x 16 x half> %1,
+    <vscale x 16 x i1> %2,
+    i64 %3)
+
+  ret <vscale x 16 x half> %a
+}
+
+declare <vscale x 32 x half> @llvm.riscv.vcompress.mask.nxv32f16(
+  <vscale x 32 x half>,
+  <vscale x 32 x half>,
+  <vscale x 32 x i1>,
+  i64);
+
+define <vscale x 32 x half> @intrinsic_vcompress_mask_vm_nxv32f16_nxv32f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, <vscale x 32 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv32f16_nxv32f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a2, zero, e16,m8,ta,mu
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    vsetvli a0, a1, e16,m8,tu,mu
+; CHECK-NEXT:    vcompress.vm v16, v8, v0
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 32 x half> @llvm.riscv.vcompress.mask.nxv32f16(
+    <vscale x 32 x half> %0,
+    <vscale x 32 x half> %1,
+    <vscale x 32 x i1> %2,
+    i64 %3)
+
+  ret <vscale x 32 x half> %a
+}
+
+declare <vscale x 1 x float> @llvm.riscv.vcompress.mask.nxv1f32(
+  <vscale x 1 x float>,
+  <vscale x 1 x float>,
+  <vscale x 1 x i1>,
+  i64);
+
+define <vscale x 1 x float> @intrinsic_vcompress_mask_vm_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv1f32_nxv1f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e32,mf2,tu,mu
+; CHECK-NEXT:    vcompress.vm v16, v17, v0
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 1 x float> @llvm.riscv.vcompress.mask.nxv1f32(
+    <vscale x 1 x float> %0,
+    <vscale x 1 x float> %1,
+    <vscale x 1 x i1> %2,
+    i64 %3)
+
+  ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vcompress.mask.nxv2f32(
+  <vscale x 2 x float>,
+  <vscale x 2 x float>,
+  <vscale x 2 x i1>,
+  i64);
+
+define <vscale x 2 x float> @intrinsic_vcompress_mask_vm_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv2f32_nxv2f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e32,m1,tu,mu
+; CHECK-NEXT:    vcompress.vm v16, v17, v0
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 2 x float> @llvm.riscv.vcompress.mask.nxv2f32(
+    <vscale x 2 x float> %0,
+    <vscale x 2 x float> %1,
+    <vscale x 2 x i1> %2,
+    i64 %3)
+
+  ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.vcompress.mask.nxv4f32(
+  <vscale x 4 x float>,
+  <vscale x 4 x float>,
+  <vscale x 4 x i1>,
+  i64);
+
+define <vscale x 4 x float> @intrinsic_vcompress_mask_vm_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv4f32_nxv4f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e32,m2,tu,mu
+; CHECK-NEXT:    vcompress.vm v16, v18, v0
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 4 x float> @llvm.riscv.vcompress.mask.nxv4f32(
+    <vscale x 4 x float> %0,
+    <vscale x 4 x float> %1,
+    <vscale x 4 x i1> %2,
+    i64 %3)
+
+  ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.vcompress.mask.nxv8f32(
+  <vscale x 8 x float>,
+  <vscale x 8 x float>,
+  <vscale x 8 x i1>,
+  i64);
+
+define <vscale x 8 x float> @intrinsic_vcompress_mask_vm_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv8f32_nxv8f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e32,m4,tu,mu
+; CHECK-NEXT:    vcompress.vm v16, v20, v0
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 8 x float> @llvm.riscv.vcompress.mask.nxv8f32(
+    <vscale x 8 x float> %0,
+    <vscale x 8 x float> %1,
+    <vscale x 8 x i1> %2,
+    i64 %3)
+
+  ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vcompress.mask.nxv16f32(
+  <vscale x 16 x float>,
+  <vscale x 16 x float>,
+  <vscale x 16 x i1>,
+  i64);
+
+define <vscale x 16 x float> @intrinsic_vcompress_mask_vm_nxv16f32_nxv16f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, <vscale x 16 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv16f32_nxv16f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a2, zero, e32,m8,ta,mu
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    vsetvli a0, a1, e32,m8,tu,mu
+; CHECK-NEXT:    vcompress.vm v16, v8, v0
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 16 x float> @llvm.riscv.vcompress.mask.nxv16f32(
+    <vscale x 16 x float> %0,
+    <vscale x 16 x float> %1,
+    <vscale x 16 x i1> %2,
+    i64 %3)
+
+  ret <vscale x 16 x float> %a
+}
+
+declare <vscale x 1 x double> @llvm.riscv.vcompress.mask.nxv1f64(
+  <vscale x 1 x double>,
+  <vscale x 1 x double>,
+  <vscale x 1 x i1>,
+  i64);
+
+define <vscale x 1 x double> @intrinsic_vcompress_mask_vm_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv1f64_nxv1f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e64,m1,tu,mu
+; CHECK-NEXT:    vcompress.vm v16, v17, v0
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 1 x double> @llvm.riscv.vcompress.mask.nxv1f64(
+    <vscale x 1 x double> %0,
+    <vscale x 1 x double> %1,
+    <vscale x 1 x i1> %2,
+    i64 %3)
+
+  ret <vscale x 1 x double> %a
+}
+
+declare <vscale x 2 x double> @llvm.riscv.vcompress.mask.nxv2f64(
+  <vscale x 2 x double>,
+  <vscale x 2 x double>,
+  <vscale x 2 x i1>,
+  i64);
+
+define <vscale x 2 x double> @intrinsic_vcompress_mask_vm_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv2f64_nxv2f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e64,m2,tu,mu
+; CHECK-NEXT:    vcompress.vm v16, v18, v0
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 2 x double> @llvm.riscv.vcompress.mask.nxv2f64(
+    <vscale x 2 x double> %0,
+    <vscale x 2 x double> %1,
+    <vscale x 2 x i1> %2,
+    i64 %3)
+
+  ret <vscale x 2 x double> %a
+}
+
+declare <vscale x 4 x double> @llvm.riscv.vcompress.mask.nxv4f64(
+  <vscale x 4 x double>,
+  <vscale x 4 x double>,
+  <vscale x 4 x i1>,
+  i64);
+
+define <vscale x 4 x double> @intrinsic_vcompress_mask_vm_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv4f64_nxv4f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e64,m4,tu,mu
+; CHECK-NEXT:    vcompress.vm v16, v20, v0
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 4 x double> @llvm.riscv.vcompress.mask.nxv4f64(
+    <vscale x 4 x double> %0,
+    <vscale x 4 x double> %1,
+    <vscale x 4 x i1> %2,
+    i64 %3)
+
+  ret <vscale x 4 x double> %a
+}
+
+declare <vscale x 8 x double> @llvm.riscv.vcompress.mask.nxv8f64(
+  <vscale x 8 x double>,
+  <vscale x 8 x double>,
+  <vscale x 8 x i1>,
+  i64);
+
+define <vscale x 8 x double> @intrinsic_vcompress_mask_vm_nxv8f64_nxv8f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv8f64_nxv8f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a2, zero, e64,m8,ta,mu
+; CHECK-NEXT:    vle64.v v8, (a0)
+; CHECK-NEXT:    vsetvli a0, a1, e64,m8,tu,mu
+; CHECK-NEXT:    vcompress.vm v16, v8, v0
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 8 x double> @llvm.riscv.vcompress.mask.nxv8f64(
+    <vscale x 8 x double> %0,
+    <vscale x 8 x double> %1,
+    <vscale x 8 x i1> %2,
+    i64 %3)
+
+  ret <vscale x 8 x double> %a
+}