[llvm-branch-commits] [llvm] 096b02e - [RISCV] Add intrinsics for vcompress instruction
ShihPo Hung via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Tue Dec 29 18:47:51 PST 2020
Author: ShihPo Hung
Date: 2020-12-29T18:38:15-08:00
New Revision: 096b02ebbff72c403379b28a40f14a8c48e640f8
URL: https://github.com/llvm/llvm-project/commit/096b02ebbff72c403379b28a40f14a8c48e640f8
DIFF: https://github.com/llvm/llvm-project/commit/096b02ebbff72c403379b28a40f14a8c48e640f8.diff
LOG: [RISCV] Add intrinsics for vcompress instruction
This patch defines vcompress intrinsics and lower to V instructions.
We work with @rogfer01 from BSC to come out this patch.
Authored-by: Roger Ferrer Ibanez <rofirrim at gmail.com>
Co-Authored-by: ShihPo Hung <shihpo.hung at sifive.com>
Differential revision: https://reviews.llvm.org/D93809
Added:
llvm/test/CodeGen/RISCV/rvv/vcompress-rv32.ll
llvm/test/CodeGen/RISCV/rvv/vcompress-rv64.ll
Modified:
llvm/include/llvm/IR/IntrinsicsRISCV.td
llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
Removed:
################################################################################
diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td
index 8e3d6f2ed675..430687d796ba 100644
--- a/llvm/include/llvm/IR/IntrinsicsRISCV.td
+++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td
@@ -189,6 +189,13 @@ let TargetPrefix = "riscv" in {
LLVMPointerType<LLVMMatchType<0>>, llvm_anyvector_ty,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty],
[NoCapture<ArgIndex<1>>, IntrWriteMem]>, RISCVVIntrinsic;
+ // For destination vector type is the same as first source vector (with mask).
+ // Input: (maskedoff, vector_in, mask, vl)
+ class RISCVUnaryAAMask
+ : Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty],
+ [IntrNoMem]>, RISCVVIntrinsic;
// For destination vector type is the same as first and second source vector.
// Input: (vector_in, vector_in, vl)
class RISCVBinaryAAANoMask
@@ -680,6 +687,8 @@ let TargetPrefix = "riscv" in {
defm vrgather : RISCVBinaryAAX;
+ def "int_riscv_vcompress_mask" : RISCVUnaryAAMask;
+
defm vaaddu : RISCVSaturatingBinaryAAX;
defm vaadd : RISCVSaturatingBinaryAAX;
defm vasubu : RISCVSaturatingBinaryAAX;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
index b0e3b455339f..6a89f34f9047 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
@@ -669,6 +669,27 @@ class VPseudoUnaryMOutMask:
let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
}
+// Mask can be V0~V31
+class VPseudoUnaryAnyMask<VReg RetClass,
+ VReg Op1Class> :
+ Pseudo<(outs RetClass:$rd),
+ (ins RetClass:$merge,
+ Op1Class:$rs2,
+ VR:$vm, GPR:$vl, ixlenimm:$sew),
+ []>,
+ RISCVVPseudo {
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+ let usesCustomInserter = 1;
+ let Constraints = "@earlyclobber $rd, $rd = $merge";
+ let Uses = [VL, VTYPE];
+ let VLIndex = 4;
+ let SEWIndex = 5;
+ let MergeOpIndex = 1;
+ let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
+}
+
class VPseudoBinaryNoMask<VReg RetClass,
VReg Op1Class,
DAGOperand Op2Class,
@@ -905,6 +926,13 @@ multiclass VPseudoUnaryV_M {
}
}
+multiclass VPseudoUnaryV_V_AnyMask {
+ foreach m = MxList.m in {
+ let VLMul = m.value in
+ def _VM # "_" # m.MX # "_MASK" : VPseudoUnaryAnyMask<m.vrclass, m.vrclass>;
+ }
+}
+
multiclass VPseudoBinary<VReg RetClass,
VReg Op1Class,
DAGOperand Op2Class,
@@ -1311,6 +1339,27 @@ class VPatMaskUnaryMask<string intrinsic_name,
(mti.Mask VR:$rs2),
(mti.Mask V0), (NoX0 GPR:$vl), mti.SEW)>;
+class VPatUnaryAnyMask<string intrinsic,
+ string inst,
+ string kind,
+ ValueType result_type,
+ ValueType op1_type,
+ ValueType mask_type,
+ int sew,
+ LMULInfo vlmul,
+ VReg result_reg_class,
+ VReg op1_reg_class> :
+ Pat<(result_type (!cast<Intrinsic>(intrinsic#"_mask")
+ (result_type result_reg_class:$merge),
+ (op1_type op1_reg_class:$rs1),
+ (mask_type VR:$rs2),
+ (XLenVT GPR:$vl))),
+ (!cast<Instruction>(inst#"_"#kind#"_"#vlmul.MX#"_MASK")
+ (result_type result_reg_class:$merge),
+ (op1_type op1_reg_class:$rs1),
+ (mask_type VR:$rs2),
+ (NoX0 GPR:$vl), sew)>;
+
class VPatBinaryNoMask<string intrinsic_name,
string inst,
string kind,
@@ -1541,6 +1590,16 @@ multiclass VPatUnaryS_M<string intrinsic_name,
}
}
+multiclass VPatUnaryV_V_AnyMask<string intrinsic, string instruction,
+ list<VTypeInfo> vtilist> {
+ foreach vti = vtilist in {
+ def : VPatUnaryAnyMask<intrinsic, instruction, "VM",
+ vti.Vector, vti.Vector, vti.Mask,
+ vti.SEW, vti.LMul, vti.RegClass,
+ vti.RegClass>;
+ }
+}
+
multiclass VPatUnaryM_M<string intrinsic,
string inst>
{
@@ -2645,6 +2704,11 @@ let Predicates = [HasStdExtV, HasStdExtF] in {
//===----------------------------------------------------------------------===//
defm PseudoVRGATHER : VPseudoBinaryV_VV_VX_VI<uimm5, "@earlyclobber $rd">;
+//===----------------------------------------------------------------------===//
+// 17.5. Vector Compress Instruction
+//===----------------------------------------------------------------------===//
+defm PseudoVCOMPRESS : VPseudoUnaryV_V_AnyMask;
+
//===----------------------------------------------------------------------===//
// Patterns.
//===----------------------------------------------------------------------===//
@@ -3201,5 +3265,16 @@ let Predicates = [HasStdExtV, HasStdExtF] in {
AllFloatVectors, uimm5>;
} // Predicates = [HasStdExtV, HasStdExtF]
+//===----------------------------------------------------------------------===//
+// 17.5. Vector Compress Instruction
+//===----------------------------------------------------------------------===//
+let Predicates = [HasStdExtV] in {
+ defm "" : VPatUnaryV_V_AnyMask<"int_riscv_vcompress", "PseudoVCOMPRESS", AllIntegerVectors>;
+} // Predicates = [HasStdExtV]
+
+let Predicates = [HasStdExtV, HasStdExtF] in {
+ defm "" : VPatUnaryV_V_AnyMask<"int_riscv_vcompress", "PseudoVCOMPRESS", AllFloatVectors>;
+} // Predicates = [HasStdExtV, HasStdExtF]
+
// Include the non-intrinsic ISel patterns
include "RISCVInstrInfoVSDPatterns.td"
diff --git a/llvm/test/CodeGen/RISCV/rvv/vcompress-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vcompress-rv32.ll
new file mode 100644
index 000000000000..b8d42eeb9e6c
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vcompress-rv32.ll
@@ -0,0 +1,650 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f,+experimental-zfh -verify-machineinstrs \
+; RUN: --riscv-no-aliases < %s | FileCheck %s
+declare <vscale x 1 x i8> @llvm.riscv.vcompress.mask.nxv1i8(
+ <vscale x 1 x i8>,
+ <vscale x 1 x i8>,
+ <vscale x 1 x i1>,
+ i32);
+
+define <vscale x 1 x i8> @intrinsic_vcompress_mask_vm_nxv1i8_nxv1i8(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv1i8_nxv1i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e8,mf8,tu,mu
+; CHECK-NEXT: vcompress.vm v16, v17, v0
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 1 x i8> @llvm.riscv.vcompress.mask.nxv1i8(
+ <vscale x 1 x i8> %0,
+ <vscale x 1 x i8> %1,
+ <vscale x 1 x i1> %2,
+ i32 %3)
+
+ ret <vscale x 1 x i8> %a
+}
+
+declare <vscale x 2 x i8> @llvm.riscv.vcompress.mask.nxv2i8(
+ <vscale x 2 x i8>,
+ <vscale x 2 x i8>,
+ <vscale x 2 x i1>,
+ i32);
+
+define <vscale x 2 x i8> @intrinsic_vcompress_mask_vm_nxv2i8_nxv2i8(<vscale x 2 x i8> %0, <vscale x 2 x i8> %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv2i8_nxv2i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e8,mf4,tu,mu
+; CHECK-NEXT: vcompress.vm v16, v17, v0
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 2 x i8> @llvm.riscv.vcompress.mask.nxv2i8(
+ <vscale x 2 x i8> %0,
+ <vscale x 2 x i8> %1,
+ <vscale x 2 x i1> %2,
+ i32 %3)
+
+ ret <vscale x 2 x i8> %a
+}
+
+declare <vscale x 4 x i8> @llvm.riscv.vcompress.mask.nxv4i8(
+ <vscale x 4 x i8>,
+ <vscale x 4 x i8>,
+ <vscale x 4 x i1>,
+ i32);
+
+define <vscale x 4 x i8> @intrinsic_vcompress_mask_vm_nxv4i8_nxv4i8(<vscale x 4 x i8> %0, <vscale x 4 x i8> %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv4i8_nxv4i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e8,mf2,tu,mu
+; CHECK-NEXT: vcompress.vm v16, v17, v0
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 4 x i8> @llvm.riscv.vcompress.mask.nxv4i8(
+ <vscale x 4 x i8> %0,
+ <vscale x 4 x i8> %1,
+ <vscale x 4 x i1> %2,
+ i32 %3)
+
+ ret <vscale x 4 x i8> %a
+}
+
+declare <vscale x 8 x i8> @llvm.riscv.vcompress.mask.nxv8i8(
+ <vscale x 8 x i8>,
+ <vscale x 8 x i8>,
+ <vscale x 8 x i1>,
+ i32);
+
+define <vscale x 8 x i8> @intrinsic_vcompress_mask_vm_nxv8i8_nxv8i8(<vscale x 8 x i8> %0, <vscale x 8 x i8> %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv8i8_nxv8i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e8,m1,tu,mu
+; CHECK-NEXT: vcompress.vm v16, v17, v0
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 8 x i8> @llvm.riscv.vcompress.mask.nxv8i8(
+ <vscale x 8 x i8> %0,
+ <vscale x 8 x i8> %1,
+ <vscale x 8 x i1> %2,
+ i32 %3)
+
+ ret <vscale x 8 x i8> %a
+}
+
+declare <vscale x 16 x i8> @llvm.riscv.vcompress.mask.nxv16i8(
+ <vscale x 16 x i8>,
+ <vscale x 16 x i8>,
+ <vscale x 16 x i1>,
+ i32);
+
+define <vscale x 16 x i8> @intrinsic_vcompress_mask_vm_nxv16i8_nxv16i8(<vscale x 16 x i8> %0, <vscale x 16 x i8> %1, <vscale x 16 x i1> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv16i8_nxv16i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e8,m2,tu,mu
+; CHECK-NEXT: vcompress.vm v16, v18, v0
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 16 x i8> @llvm.riscv.vcompress.mask.nxv16i8(
+ <vscale x 16 x i8> %0,
+ <vscale x 16 x i8> %1,
+ <vscale x 16 x i1> %2,
+ i32 %3)
+
+ ret <vscale x 16 x i8> %a
+}
+
+declare <vscale x 32 x i8> @llvm.riscv.vcompress.mask.nxv32i8(
+ <vscale x 32 x i8>,
+ <vscale x 32 x i8>,
+ <vscale x 32 x i1>,
+ i32);
+
+define <vscale x 32 x i8> @intrinsic_vcompress_mask_vm_nxv32i8_nxv32i8(<vscale x 32 x i8> %0, <vscale x 32 x i8> %1, <vscale x 32 x i1> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv32i8_nxv32i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e8,m4,tu,mu
+; CHECK-NEXT: vcompress.vm v16, v20, v0
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 32 x i8> @llvm.riscv.vcompress.mask.nxv32i8(
+ <vscale x 32 x i8> %0,
+ <vscale x 32 x i8> %1,
+ <vscale x 32 x i1> %2,
+ i32 %3)
+
+ ret <vscale x 32 x i8> %a
+}
+
+declare <vscale x 64 x i8> @llvm.riscv.vcompress.mask.nxv64i8(
+ <vscale x 64 x i8>,
+ <vscale x 64 x i8>,
+ <vscale x 64 x i1>,
+ i32);
+
+define <vscale x 64 x i8> @intrinsic_vcompress_mask_vm_nxv64i8_nxv64i8(<vscale x 64 x i8> %0, <vscale x 64 x i8> %1, <vscale x 64 x i1> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv64i8_nxv64i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a2, zero, e8,m8,ta,mu
+; CHECK-NEXT: vle8.v v8, (a0)
+; CHECK-NEXT: vsetvli a0, a1, e8,m8,tu,mu
+; CHECK-NEXT: vcompress.vm v16, v8, v0
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 64 x i8> @llvm.riscv.vcompress.mask.nxv64i8(
+ <vscale x 64 x i8> %0,
+ <vscale x 64 x i8> %1,
+ <vscale x 64 x i1> %2,
+ i32 %3)
+
+ ret <vscale x 64 x i8> %a
+}
+
+declare <vscale x 1 x i16> @llvm.riscv.vcompress.mask.nxv1i16(
+ <vscale x 1 x i16>,
+ <vscale x 1 x i16>,
+ <vscale x 1 x i1>,
+ i32);
+
+define <vscale x 1 x i16> @intrinsic_vcompress_mask_vm_nxv1i16_nxv1i16(<vscale x 1 x i16> %0, <vscale x 1 x i16> %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv1i16_nxv1i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e16,mf4,tu,mu
+; CHECK-NEXT: vcompress.vm v16, v17, v0
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 1 x i16> @llvm.riscv.vcompress.mask.nxv1i16(
+ <vscale x 1 x i16> %0,
+ <vscale x 1 x i16> %1,
+ <vscale x 1 x i1> %2,
+ i32 %3)
+
+ ret <vscale x 1 x i16> %a
+}
+
+declare <vscale x 2 x i16> @llvm.riscv.vcompress.mask.nxv2i16(
+ <vscale x 2 x i16>,
+ <vscale x 2 x i16>,
+ <vscale x 2 x i1>,
+ i32);
+
+define <vscale x 2 x i16> @intrinsic_vcompress_mask_vm_nxv2i16_nxv2i16(<vscale x 2 x i16> %0, <vscale x 2 x i16> %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv2i16_nxv2i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e16,mf2,tu,mu
+; CHECK-NEXT: vcompress.vm v16, v17, v0
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 2 x i16> @llvm.riscv.vcompress.mask.nxv2i16(
+ <vscale x 2 x i16> %0,
+ <vscale x 2 x i16> %1,
+ <vscale x 2 x i1> %2,
+ i32 %3)
+
+ ret <vscale x 2 x i16> %a
+}
+
+declare <vscale x 4 x i16> @llvm.riscv.vcompress.mask.nxv4i16(
+ <vscale x 4 x i16>,
+ <vscale x 4 x i16>,
+ <vscale x 4 x i1>,
+ i32);
+
+define <vscale x 4 x i16> @intrinsic_vcompress_mask_vm_nxv4i16_nxv4i16(<vscale x 4 x i16> %0, <vscale x 4 x i16> %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv4i16_nxv4i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e16,m1,tu,mu
+; CHECK-NEXT: vcompress.vm v16, v17, v0
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 4 x i16> @llvm.riscv.vcompress.mask.nxv4i16(
+ <vscale x 4 x i16> %0,
+ <vscale x 4 x i16> %1,
+ <vscale x 4 x i1> %2,
+ i32 %3)
+
+ ret <vscale x 4 x i16> %a
+}
+
+declare <vscale x 8 x i16> @llvm.riscv.vcompress.mask.nxv8i16(
+ <vscale x 8 x i16>,
+ <vscale x 8 x i16>,
+ <vscale x 8 x i1>,
+ i32);
+
+define <vscale x 8 x i16> @intrinsic_vcompress_mask_vm_nxv8i16_nxv8i16(<vscale x 8 x i16> %0, <vscale x 8 x i16> %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv8i16_nxv8i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e16,m2,tu,mu
+; CHECK-NEXT: vcompress.vm v16, v18, v0
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 8 x i16> @llvm.riscv.vcompress.mask.nxv8i16(
+ <vscale x 8 x i16> %0,
+ <vscale x 8 x i16> %1,
+ <vscale x 8 x i1> %2,
+ i32 %3)
+
+ ret <vscale x 8 x i16> %a
+}
+
+declare <vscale x 16 x i16> @llvm.riscv.vcompress.mask.nxv16i16(
+ <vscale x 16 x i16>,
+ <vscale x 16 x i16>,
+ <vscale x 16 x i1>,
+ i32);
+
+define <vscale x 16 x i16> @intrinsic_vcompress_mask_vm_nxv16i16_nxv16i16(<vscale x 16 x i16> %0, <vscale x 16 x i16> %1, <vscale x 16 x i1> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv16i16_nxv16i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e16,m4,tu,mu
+; CHECK-NEXT: vcompress.vm v16, v20, v0
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 16 x i16> @llvm.riscv.vcompress.mask.nxv16i16(
+ <vscale x 16 x i16> %0,
+ <vscale x 16 x i16> %1,
+ <vscale x 16 x i1> %2,
+ i32 %3)
+
+ ret <vscale x 16 x i16> %a
+}
+
+declare <vscale x 32 x i16> @llvm.riscv.vcompress.mask.nxv32i16(
+ <vscale x 32 x i16>,
+ <vscale x 32 x i16>,
+ <vscale x 32 x i1>,
+ i32);
+
+define <vscale x 32 x i16> @intrinsic_vcompress_mask_vm_nxv32i16_nxv32i16(<vscale x 32 x i16> %0, <vscale x 32 x i16> %1, <vscale x 32 x i1> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv32i16_nxv32i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a2, zero, e16,m8,ta,mu
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vsetvli a0, a1, e16,m8,tu,mu
+; CHECK-NEXT: vcompress.vm v16, v8, v0
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 32 x i16> @llvm.riscv.vcompress.mask.nxv32i16(
+ <vscale x 32 x i16> %0,
+ <vscale x 32 x i16> %1,
+ <vscale x 32 x i1> %2,
+ i32 %3)
+
+ ret <vscale x 32 x i16> %a
+}
+
+declare <vscale x 1 x i32> @llvm.riscv.vcompress.mask.nxv1i32(
+ <vscale x 1 x i32>,
+ <vscale x 1 x i32>,
+ <vscale x 1 x i1>,
+ i32);
+
+define <vscale x 1 x i32> @intrinsic_vcompress_mask_vm_nxv1i32_nxv1i32(<vscale x 1 x i32> %0, <vscale x 1 x i32> %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv1i32_nxv1i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,mf2,tu,mu
+; CHECK-NEXT: vcompress.vm v16, v17, v0
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 1 x i32> @llvm.riscv.vcompress.mask.nxv1i32(
+ <vscale x 1 x i32> %0,
+ <vscale x 1 x i32> %1,
+ <vscale x 1 x i1> %2,
+ i32 %3)
+
+ ret <vscale x 1 x i32> %a
+}
+
+declare <vscale x 2 x i32> @llvm.riscv.vcompress.mask.nxv2i32(
+ <vscale x 2 x i32>,
+ <vscale x 2 x i32>,
+ <vscale x 2 x i1>,
+ i32);
+
+define <vscale x 2 x i32> @intrinsic_vcompress_mask_vm_nxv2i32_nxv2i32(<vscale x 2 x i32> %0, <vscale x 2 x i32> %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv2i32_nxv2i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,m1,tu,mu
+; CHECK-NEXT: vcompress.vm v16, v17, v0
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 2 x i32> @llvm.riscv.vcompress.mask.nxv2i32(
+ <vscale x 2 x i32> %0,
+ <vscale x 2 x i32> %1,
+ <vscale x 2 x i1> %2,
+ i32 %3)
+
+ ret <vscale x 2 x i32> %a
+}
+
+declare <vscale x 4 x i32> @llvm.riscv.vcompress.mask.nxv4i32(
+ <vscale x 4 x i32>,
+ <vscale x 4 x i32>,
+ <vscale x 4 x i1>,
+ i32);
+
+define <vscale x 4 x i32> @intrinsic_vcompress_mask_vm_nxv4i32_nxv4i32(<vscale x 4 x i32> %0, <vscale x 4 x i32> %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv4i32_nxv4i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,m2,tu,mu
+; CHECK-NEXT: vcompress.vm v16, v18, v0
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 4 x i32> @llvm.riscv.vcompress.mask.nxv4i32(
+ <vscale x 4 x i32> %0,
+ <vscale x 4 x i32> %1,
+ <vscale x 4 x i1> %2,
+ i32 %3)
+
+ ret <vscale x 4 x i32> %a
+}
+
+declare <vscale x 8 x i32> @llvm.riscv.vcompress.mask.nxv8i32(
+ <vscale x 8 x i32>,
+ <vscale x 8 x i32>,
+ <vscale x 8 x i1>,
+ i32);
+
+define <vscale x 8 x i32> @intrinsic_vcompress_mask_vm_nxv8i32_nxv8i32(<vscale x 8 x i32> %0, <vscale x 8 x i32> %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv8i32_nxv8i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,m4,tu,mu
+; CHECK-NEXT: vcompress.vm v16, v20, v0
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 8 x i32> @llvm.riscv.vcompress.mask.nxv8i32(
+ <vscale x 8 x i32> %0,
+ <vscale x 8 x i32> %1,
+ <vscale x 8 x i1> %2,
+ i32 %3)
+
+ ret <vscale x 8 x i32> %a
+}
+
+declare <vscale x 16 x i32> @llvm.riscv.vcompress.mask.nxv16i32(
+ <vscale x 16 x i32>,
+ <vscale x 16 x i32>,
+ <vscale x 16 x i1>,
+ i32);
+
+define <vscale x 16 x i32> @intrinsic_vcompress_mask_vm_nxv16i32_nxv16i32(<vscale x 16 x i32> %0, <vscale x 16 x i32> %1, <vscale x 16 x i1> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv16i32_nxv16i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a2, zero, e32,m8,ta,mu
+; CHECK-NEXT: vle32.v v8, (a0)
+; CHECK-NEXT: vsetvli a0, a1, e32,m8,tu,mu
+; CHECK-NEXT: vcompress.vm v16, v8, v0
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 16 x i32> @llvm.riscv.vcompress.mask.nxv16i32(
+ <vscale x 16 x i32> %0,
+ <vscale x 16 x i32> %1,
+ <vscale x 16 x i1> %2,
+ i32 %3)
+
+ ret <vscale x 16 x i32> %a
+}
+
+declare <vscale x 1 x half> @llvm.riscv.vcompress.mask.nxv1f16(
+ <vscale x 1 x half>,
+ <vscale x 1 x half>,
+ <vscale x 1 x i1>,
+ i32);
+
+define <vscale x 1 x half> @intrinsic_vcompress_mask_vm_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv1f16_nxv1f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e16,mf4,tu,mu
+; CHECK-NEXT: vcompress.vm v16, v17, v0
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 1 x half> @llvm.riscv.vcompress.mask.nxv1f16(
+ <vscale x 1 x half> %0,
+ <vscale x 1 x half> %1,
+ <vscale x 1 x i1> %2,
+ i32 %3)
+
+ ret <vscale x 1 x half> %a
+}
+
+declare <vscale x 2 x half> @llvm.riscv.vcompress.mask.nxv2f16(
+ <vscale x 2 x half>,
+ <vscale x 2 x half>,
+ <vscale x 2 x i1>,
+ i32);
+
+define <vscale x 2 x half> @intrinsic_vcompress_mask_vm_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv2f16_nxv2f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e16,mf2,tu,mu
+; CHECK-NEXT: vcompress.vm v16, v17, v0
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 2 x half> @llvm.riscv.vcompress.mask.nxv2f16(
+ <vscale x 2 x half> %0,
+ <vscale x 2 x half> %1,
+ <vscale x 2 x i1> %2,
+ i32 %3)
+
+ ret <vscale x 2 x half> %a
+}
+
+declare <vscale x 4 x half> @llvm.riscv.vcompress.mask.nxv4f16(
+ <vscale x 4 x half>,
+ <vscale x 4 x half>,
+ <vscale x 4 x i1>,
+ i32);
+
+define <vscale x 4 x half> @intrinsic_vcompress_mask_vm_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv4f16_nxv4f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e16,m1,tu,mu
+; CHECK-NEXT: vcompress.vm v16, v17, v0
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 4 x half> @llvm.riscv.vcompress.mask.nxv4f16(
+ <vscale x 4 x half> %0,
+ <vscale x 4 x half> %1,
+ <vscale x 4 x i1> %2,
+ i32 %3)
+
+ ret <vscale x 4 x half> %a
+}
+
+declare <vscale x 8 x half> @llvm.riscv.vcompress.mask.nxv8f16(
+ <vscale x 8 x half>,
+ <vscale x 8 x half>,
+ <vscale x 8 x i1>,
+ i32);
+
+define <vscale x 8 x half> @intrinsic_vcompress_mask_vm_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv8f16_nxv8f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e16,m2,tu,mu
+; CHECK-NEXT: vcompress.vm v16, v18, v0
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 8 x half> @llvm.riscv.vcompress.mask.nxv8f16(
+ <vscale x 8 x half> %0,
+ <vscale x 8 x half> %1,
+ <vscale x 8 x i1> %2,
+ i32 %3)
+
+ ret <vscale x 8 x half> %a
+}
+
+declare <vscale x 16 x half> @llvm.riscv.vcompress.mask.nxv16f16(
+ <vscale x 16 x half>,
+ <vscale x 16 x half>,
+ <vscale x 16 x i1>,
+ i32);
+
+define <vscale x 16 x half> @intrinsic_vcompress_mask_vm_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, <vscale x 16 x i1> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv16f16_nxv16f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e16,m4,tu,mu
+; CHECK-NEXT: vcompress.vm v16, v20, v0
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 16 x half> @llvm.riscv.vcompress.mask.nxv16f16(
+ <vscale x 16 x half> %0,
+ <vscale x 16 x half> %1,
+ <vscale x 16 x i1> %2,
+ i32 %3)
+
+ ret <vscale x 16 x half> %a
+}
+
+declare <vscale x 32 x half> @llvm.riscv.vcompress.mask.nxv32f16(
+ <vscale x 32 x half>,
+ <vscale x 32 x half>,
+ <vscale x 32 x i1>,
+ i32);
+
+define <vscale x 32 x half> @intrinsic_vcompress_mask_vm_nxv32f16_nxv32f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, <vscale x 32 x i1> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv32f16_nxv32f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a2, zero, e16,m8,ta,mu
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vsetvli a0, a1, e16,m8,tu,mu
+; CHECK-NEXT: vcompress.vm v16, v8, v0
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 32 x half> @llvm.riscv.vcompress.mask.nxv32f16(
+ <vscale x 32 x half> %0,
+ <vscale x 32 x half> %1,
+ <vscale x 32 x i1> %2,
+ i32 %3)
+
+ ret <vscale x 32 x half> %a
+}
+
+declare <vscale x 1 x float> @llvm.riscv.vcompress.mask.nxv1f32(
+ <vscale x 1 x float>,
+ <vscale x 1 x float>,
+ <vscale x 1 x i1>,
+ i32);
+
+define <vscale x 1 x float> @intrinsic_vcompress_mask_vm_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv1f32_nxv1f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,mf2,tu,mu
+; CHECK-NEXT: vcompress.vm v16, v17, v0
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vcompress.mask.nxv1f32(
+ <vscale x 1 x float> %0,
+ <vscale x 1 x float> %1,
+ <vscale x 1 x i1> %2,
+ i32 %3)
+
+ ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vcompress.mask.nxv2f32(
+ <vscale x 2 x float>,
+ <vscale x 2 x float>,
+ <vscale x 2 x i1>,
+ i32);
+
+define <vscale x 2 x float> @intrinsic_vcompress_mask_vm_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv2f32_nxv2f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,m1,tu,mu
+; CHECK-NEXT: vcompress.vm v16, v17, v0
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.vcompress.mask.nxv2f32(
+ <vscale x 2 x float> %0,
+ <vscale x 2 x float> %1,
+ <vscale x 2 x i1> %2,
+ i32 %3)
+
+ ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.vcompress.mask.nxv4f32(
+ <vscale x 4 x float>,
+ <vscale x 4 x float>,
+ <vscale x 4 x i1>,
+ i32);
+
+define <vscale x 4 x float> @intrinsic_vcompress_mask_vm_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv4f32_nxv4f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,m2,tu,mu
+; CHECK-NEXT: vcompress.vm v16, v18, v0
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.vcompress.mask.nxv4f32(
+ <vscale x 4 x float> %0,
+ <vscale x 4 x float> %1,
+ <vscale x 4 x i1> %2,
+ i32 %3)
+
+ ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.vcompress.mask.nxv8f32(
+ <vscale x 8 x float>,
+ <vscale x 8 x float>,
+ <vscale x 8 x i1>,
+ i32);
+
+define <vscale x 8 x float> @intrinsic_vcompress_mask_vm_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv8f32_nxv8f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,m4,tu,mu
+; CHECK-NEXT: vcompress.vm v16, v20, v0
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.vcompress.mask.nxv8f32(
+ <vscale x 8 x float> %0,
+ <vscale x 8 x float> %1,
+ <vscale x 8 x i1> %2,
+ i32 %3)
+
+ ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vcompress.mask.nxv16f32(
+ <vscale x 16 x float>,
+ <vscale x 16 x float>,
+ <vscale x 16 x i1>,
+ i32);
+
+define <vscale x 16 x float> @intrinsic_vcompress_mask_vm_nxv16f32_nxv16f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, <vscale x 16 x i1> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv16f32_nxv16f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a2, zero, e32,m8,ta,mu
+; CHECK-NEXT: vle32.v v8, (a0)
+; CHECK-NEXT: vsetvli a0, a1, e32,m8,tu,mu
+; CHECK-NEXT: vcompress.vm v16, v8, v0
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 16 x float> @llvm.riscv.vcompress.mask.nxv16f32(
+ <vscale x 16 x float> %0,
+ <vscale x 16 x float> %1,
+ <vscale x 16 x i1> %2,
+ i32 %3)
+
+ ret <vscale x 16 x float> %a
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vcompress-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vcompress-rv64.ll
new file mode 100644
index 000000000000..a2e9df6e3fa2
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vcompress-rv64.ll
@@ -0,0 +1,830 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+experimental-zfh -verify-machineinstrs \
+; RUN: --riscv-no-aliases < %s | FileCheck %s
+declare <vscale x 1 x i8> @llvm.riscv.vcompress.mask.nxv1i8(
+ <vscale x 1 x i8>,
+ <vscale x 1 x i8>,
+ <vscale x 1 x i1>,
+ i64);
+
+define <vscale x 1 x i8> @intrinsic_vcompress_mask_vm_nxv1i8_nxv1i8(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv1i8_nxv1i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e8,mf8,tu,mu
+; CHECK-NEXT: vcompress.vm v16, v17, v0
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 1 x i8> @llvm.riscv.vcompress.mask.nxv1i8(
+ <vscale x 1 x i8> %0,
+ <vscale x 1 x i8> %1,
+ <vscale x 1 x i1> %2,
+ i64 %3)
+
+ ret <vscale x 1 x i8> %a
+}
+
+declare <vscale x 2 x i8> @llvm.riscv.vcompress.mask.nxv2i8(
+ <vscale x 2 x i8>,
+ <vscale x 2 x i8>,
+ <vscale x 2 x i1>,
+ i64);
+
+define <vscale x 2 x i8> @intrinsic_vcompress_mask_vm_nxv2i8_nxv2i8(<vscale x 2 x i8> %0, <vscale x 2 x i8> %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv2i8_nxv2i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e8,mf4,tu,mu
+; CHECK-NEXT: vcompress.vm v16, v17, v0
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 2 x i8> @llvm.riscv.vcompress.mask.nxv2i8(
+ <vscale x 2 x i8> %0,
+ <vscale x 2 x i8> %1,
+ <vscale x 2 x i1> %2,
+ i64 %3)
+
+ ret <vscale x 2 x i8> %a
+}
+
+declare <vscale x 4 x i8> @llvm.riscv.vcompress.mask.nxv4i8(
+ <vscale x 4 x i8>,
+ <vscale x 4 x i8>,
+ <vscale x 4 x i1>,
+ i64);
+
+define <vscale x 4 x i8> @intrinsic_vcompress_mask_vm_nxv4i8_nxv4i8(<vscale x 4 x i8> %0, <vscale x 4 x i8> %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv4i8_nxv4i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e8,mf2,tu,mu
+; CHECK-NEXT: vcompress.vm v16, v17, v0
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 4 x i8> @llvm.riscv.vcompress.mask.nxv4i8(
+ <vscale x 4 x i8> %0,
+ <vscale x 4 x i8> %1,
+ <vscale x 4 x i1> %2,
+ i64 %3)
+
+ ret <vscale x 4 x i8> %a
+}
+
+declare <vscale x 8 x i8> @llvm.riscv.vcompress.mask.nxv8i8(
+ <vscale x 8 x i8>,
+ <vscale x 8 x i8>,
+ <vscale x 8 x i1>,
+ i64);
+
+define <vscale x 8 x i8> @intrinsic_vcompress_mask_vm_nxv8i8_nxv8i8(<vscale x 8 x i8> %0, <vscale x 8 x i8> %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv8i8_nxv8i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e8,m1,tu,mu
+; CHECK-NEXT: vcompress.vm v16, v17, v0
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 8 x i8> @llvm.riscv.vcompress.mask.nxv8i8(
+ <vscale x 8 x i8> %0,
+ <vscale x 8 x i8> %1,
+ <vscale x 8 x i1> %2,
+ i64 %3)
+
+ ret <vscale x 8 x i8> %a
+}
+
+declare <vscale x 16 x i8> @llvm.riscv.vcompress.mask.nxv16i8(
+ <vscale x 16 x i8>,
+ <vscale x 16 x i8>,
+ <vscale x 16 x i1>,
+ i64);
+
+define <vscale x 16 x i8> @intrinsic_vcompress_mask_vm_nxv16i8_nxv16i8(<vscale x 16 x i8> %0, <vscale x 16 x i8> %1, <vscale x 16 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv16i8_nxv16i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e8,m2,tu,mu
+; CHECK-NEXT: vcompress.vm v16, v18, v0
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 16 x i8> @llvm.riscv.vcompress.mask.nxv16i8(
+ <vscale x 16 x i8> %0,
+ <vscale x 16 x i8> %1,
+ <vscale x 16 x i1> %2,
+ i64 %3)
+
+ ret <vscale x 16 x i8> %a
+}
+
+declare <vscale x 32 x i8> @llvm.riscv.vcompress.mask.nxv32i8(
+ <vscale x 32 x i8>,
+ <vscale x 32 x i8>,
+ <vscale x 32 x i1>,
+ i64);
+
+define <vscale x 32 x i8> @intrinsic_vcompress_mask_vm_nxv32i8_nxv32i8(<vscale x 32 x i8> %0, <vscale x 32 x i8> %1, <vscale x 32 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv32i8_nxv32i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e8,m4,tu,mu
+; CHECK-NEXT: vcompress.vm v16, v20, v0
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 32 x i8> @llvm.riscv.vcompress.mask.nxv32i8(
+ <vscale x 32 x i8> %0,
+ <vscale x 32 x i8> %1,
+ <vscale x 32 x i1> %2,
+ i64 %3)
+
+ ret <vscale x 32 x i8> %a
+}
+
+declare <vscale x 64 x i8> @llvm.riscv.vcompress.mask.nxv64i8(
+ <vscale x 64 x i8>,
+ <vscale x 64 x i8>,
+ <vscale x 64 x i1>,
+ i64);
+
+define <vscale x 64 x i8> @intrinsic_vcompress_mask_vm_nxv64i8_nxv64i8(<vscale x 64 x i8> %0, <vscale x 64 x i8> %1, <vscale x 64 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv64i8_nxv64i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a2, zero, e8,m8,ta,mu
+; CHECK-NEXT: vle8.v v8, (a0)
+; CHECK-NEXT: vsetvli a0, a1, e8,m8,tu,mu
+; CHECK-NEXT: vcompress.vm v16, v8, v0
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 64 x i8> @llvm.riscv.vcompress.mask.nxv64i8(
+ <vscale x 64 x i8> %0,
+ <vscale x 64 x i8> %1,
+ <vscale x 64 x i1> %2,
+ i64 %3)
+
+ ret <vscale x 64 x i8> %a
+}
+
+declare <vscale x 1 x i16> @llvm.riscv.vcompress.mask.nxv1i16(
+ <vscale x 1 x i16>,
+ <vscale x 1 x i16>,
+ <vscale x 1 x i1>,
+ i64);
+
+define <vscale x 1 x i16> @intrinsic_vcompress_mask_vm_nxv1i16_nxv1i16(<vscale x 1 x i16> %0, <vscale x 1 x i16> %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv1i16_nxv1i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e16,mf4,tu,mu
+; CHECK-NEXT: vcompress.vm v16, v17, v0
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 1 x i16> @llvm.riscv.vcompress.mask.nxv1i16(
+ <vscale x 1 x i16> %0,
+ <vscale x 1 x i16> %1,
+ <vscale x 1 x i1> %2,
+ i64 %3)
+
+ ret <vscale x 1 x i16> %a
+}
+
+declare <vscale x 2 x i16> @llvm.riscv.vcompress.mask.nxv2i16(
+ <vscale x 2 x i16>,
+ <vscale x 2 x i16>,
+ <vscale x 2 x i1>,
+ i64);
+
+define <vscale x 2 x i16> @intrinsic_vcompress_mask_vm_nxv2i16_nxv2i16(<vscale x 2 x i16> %0, <vscale x 2 x i16> %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv2i16_nxv2i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e16,mf2,tu,mu
+; CHECK-NEXT: vcompress.vm v16, v17, v0
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 2 x i16> @llvm.riscv.vcompress.mask.nxv2i16(
+ <vscale x 2 x i16> %0,
+ <vscale x 2 x i16> %1,
+ <vscale x 2 x i1> %2,
+ i64 %3)
+
+ ret <vscale x 2 x i16> %a
+}
+
+declare <vscale x 4 x i16> @llvm.riscv.vcompress.mask.nxv4i16(
+ <vscale x 4 x i16>,
+ <vscale x 4 x i16>,
+ <vscale x 4 x i1>,
+ i64);
+
+define <vscale x 4 x i16> @intrinsic_vcompress_mask_vm_nxv4i16_nxv4i16(<vscale x 4 x i16> %0, <vscale x 4 x i16> %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv4i16_nxv4i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e16,m1,tu,mu
+; CHECK-NEXT: vcompress.vm v16, v17, v0
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 4 x i16> @llvm.riscv.vcompress.mask.nxv4i16(
+ <vscale x 4 x i16> %0,
+ <vscale x 4 x i16> %1,
+ <vscale x 4 x i1> %2,
+ i64 %3)
+
+ ret <vscale x 4 x i16> %a
+}
+
+declare <vscale x 8 x i16> @llvm.riscv.vcompress.mask.nxv8i16(
+ <vscale x 8 x i16>,
+ <vscale x 8 x i16>,
+ <vscale x 8 x i1>,
+ i64);
+
+define <vscale x 8 x i16> @intrinsic_vcompress_mask_vm_nxv8i16_nxv8i16(<vscale x 8 x i16> %0, <vscale x 8 x i16> %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv8i16_nxv8i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e16,m2,tu,mu
+; CHECK-NEXT: vcompress.vm v16, v18, v0
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 8 x i16> @llvm.riscv.vcompress.mask.nxv8i16(
+ <vscale x 8 x i16> %0,
+ <vscale x 8 x i16> %1,
+ <vscale x 8 x i1> %2,
+ i64 %3)
+
+ ret <vscale x 8 x i16> %a
+}
+
+declare <vscale x 16 x i16> @llvm.riscv.vcompress.mask.nxv16i16(
+ <vscale x 16 x i16>,
+ <vscale x 16 x i16>,
+ <vscale x 16 x i1>,
+ i64);
+
+define <vscale x 16 x i16> @intrinsic_vcompress_mask_vm_nxv16i16_nxv16i16(<vscale x 16 x i16> %0, <vscale x 16 x i16> %1, <vscale x 16 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv16i16_nxv16i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e16,m4,tu,mu
+; CHECK-NEXT: vcompress.vm v16, v20, v0
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 16 x i16> @llvm.riscv.vcompress.mask.nxv16i16(
+ <vscale x 16 x i16> %0,
+ <vscale x 16 x i16> %1,
+ <vscale x 16 x i1> %2,
+ i64 %3)
+
+ ret <vscale x 16 x i16> %a
+}
+
+declare <vscale x 32 x i16> @llvm.riscv.vcompress.mask.nxv32i16(
+ <vscale x 32 x i16>,
+ <vscale x 32 x i16>,
+ <vscale x 32 x i1>,
+ i64);
+
+define <vscale x 32 x i16> @intrinsic_vcompress_mask_vm_nxv32i16_nxv32i16(<vscale x 32 x i16> %0, <vscale x 32 x i16> %1, <vscale x 32 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv32i16_nxv32i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a2, zero, e16,m8,ta,mu
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vsetvli a0, a1, e16,m8,tu,mu
+; CHECK-NEXT: vcompress.vm v16, v8, v0
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 32 x i16> @llvm.riscv.vcompress.mask.nxv32i16(
+ <vscale x 32 x i16> %0,
+ <vscale x 32 x i16> %1,
+ <vscale x 32 x i1> %2,
+ i64 %3)
+
+ ret <vscale x 32 x i16> %a
+}
+
+declare <vscale x 1 x i32> @llvm.riscv.vcompress.mask.nxv1i32(
+ <vscale x 1 x i32>,
+ <vscale x 1 x i32>,
+ <vscale x 1 x i1>,
+ i64);
+
+define <vscale x 1 x i32> @intrinsic_vcompress_mask_vm_nxv1i32_nxv1i32(<vscale x 1 x i32> %0, <vscale x 1 x i32> %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv1i32_nxv1i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,mf2,tu,mu
+; CHECK-NEXT: vcompress.vm v16, v17, v0
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 1 x i32> @llvm.riscv.vcompress.mask.nxv1i32(
+ <vscale x 1 x i32> %0,
+ <vscale x 1 x i32> %1,
+ <vscale x 1 x i1> %2,
+ i64 %3)
+
+ ret <vscale x 1 x i32> %a
+}
+
+declare <vscale x 2 x i32> @llvm.riscv.vcompress.mask.nxv2i32(
+ <vscale x 2 x i32>,
+ <vscale x 2 x i32>,
+ <vscale x 2 x i1>,
+ i64);
+
+define <vscale x 2 x i32> @intrinsic_vcompress_mask_vm_nxv2i32_nxv2i32(<vscale x 2 x i32> %0, <vscale x 2 x i32> %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv2i32_nxv2i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,m1,tu,mu
+; CHECK-NEXT: vcompress.vm v16, v17, v0
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 2 x i32> @llvm.riscv.vcompress.mask.nxv2i32(
+ <vscale x 2 x i32> %0,
+ <vscale x 2 x i32> %1,
+ <vscale x 2 x i1> %2,
+ i64 %3)
+
+ ret <vscale x 2 x i32> %a
+}
+
+declare <vscale x 4 x i32> @llvm.riscv.vcompress.mask.nxv4i32(
+ <vscale x 4 x i32>,
+ <vscale x 4 x i32>,
+ <vscale x 4 x i1>,
+ i64);
+
+define <vscale x 4 x i32> @intrinsic_vcompress_mask_vm_nxv4i32_nxv4i32(<vscale x 4 x i32> %0, <vscale x 4 x i32> %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv4i32_nxv4i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,m2,tu,mu
+; CHECK-NEXT: vcompress.vm v16, v18, v0
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 4 x i32> @llvm.riscv.vcompress.mask.nxv4i32(
+ <vscale x 4 x i32> %0,
+ <vscale x 4 x i32> %1,
+ <vscale x 4 x i1> %2,
+ i64 %3)
+
+ ret <vscale x 4 x i32> %a
+}
+
+declare <vscale x 8 x i32> @llvm.riscv.vcompress.mask.nxv8i32(
+ <vscale x 8 x i32>,
+ <vscale x 8 x i32>,
+ <vscale x 8 x i1>,
+ i64);
+
+define <vscale x 8 x i32> @intrinsic_vcompress_mask_vm_nxv8i32_nxv8i32(<vscale x 8 x i32> %0, <vscale x 8 x i32> %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv8i32_nxv8i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,m4,tu,mu
+; CHECK-NEXT: vcompress.vm v16, v20, v0
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 8 x i32> @llvm.riscv.vcompress.mask.nxv8i32(
+ <vscale x 8 x i32> %0,
+ <vscale x 8 x i32> %1,
+ <vscale x 8 x i1> %2,
+ i64 %3)
+
+ ret <vscale x 8 x i32> %a
+}
+
+declare <vscale x 16 x i32> @llvm.riscv.vcompress.mask.nxv16i32(
+ <vscale x 16 x i32>,
+ <vscale x 16 x i32>,
+ <vscale x 16 x i1>,
+ i64);
+
+define <vscale x 16 x i32> @intrinsic_vcompress_mask_vm_nxv16i32_nxv16i32(<vscale x 16 x i32> %0, <vscale x 16 x i32> %1, <vscale x 16 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv16i32_nxv16i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a2, zero, e32,m8,ta,mu
+; CHECK-NEXT: vle32.v v8, (a0)
+; CHECK-NEXT: vsetvli a0, a1, e32,m8,tu,mu
+; CHECK-NEXT: vcompress.vm v16, v8, v0
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 16 x i32> @llvm.riscv.vcompress.mask.nxv16i32(
+ <vscale x 16 x i32> %0,
+ <vscale x 16 x i32> %1,
+ <vscale x 16 x i1> %2,
+ i64 %3)
+
+ ret <vscale x 16 x i32> %a
+}
+
+declare <vscale x 1 x i64> @llvm.riscv.vcompress.mask.nxv1i64(
+ <vscale x 1 x i64>,
+ <vscale x 1 x i64>,
+ <vscale x 1 x i1>,
+ i64);
+
+define <vscale x 1 x i64> @intrinsic_vcompress_mask_vm_nxv1i64_nxv1i64(<vscale x 1 x i64> %0, <vscale x 1 x i64> %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv1i64_nxv1i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e64,m1,tu,mu
+; CHECK-NEXT: vcompress.vm v16, v17, v0
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 1 x i64> @llvm.riscv.vcompress.mask.nxv1i64(
+ <vscale x 1 x i64> %0,
+ <vscale x 1 x i64> %1,
+ <vscale x 1 x i1> %2,
+ i64 %3)
+
+ ret <vscale x 1 x i64> %a
+}
+
+declare <vscale x 2 x i64> @llvm.riscv.vcompress.mask.nxv2i64(
+ <vscale x 2 x i64>,
+ <vscale x 2 x i64>,
+ <vscale x 2 x i1>,
+ i64);
+
+define <vscale x 2 x i64> @intrinsic_vcompress_mask_vm_nxv2i64_nxv2i64(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv2i64_nxv2i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e64,m2,tu,mu
+; CHECK-NEXT: vcompress.vm v16, v18, v0
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 2 x i64> @llvm.riscv.vcompress.mask.nxv2i64(
+ <vscale x 2 x i64> %0,
+ <vscale x 2 x i64> %1,
+ <vscale x 2 x i1> %2,
+ i64 %3)
+
+ ret <vscale x 2 x i64> %a
+}
+
+declare <vscale x 4 x i64> @llvm.riscv.vcompress.mask.nxv4i64(
+ <vscale x 4 x i64>,
+ <vscale x 4 x i64>,
+ <vscale x 4 x i1>,
+ i64);
+
+define <vscale x 4 x i64> @intrinsic_vcompress_mask_vm_nxv4i64_nxv4i64(<vscale x 4 x i64> %0, <vscale x 4 x i64> %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv4i64_nxv4i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e64,m4,tu,mu
+; CHECK-NEXT: vcompress.vm v16, v20, v0
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 4 x i64> @llvm.riscv.vcompress.mask.nxv4i64(
+ <vscale x 4 x i64> %0,
+ <vscale x 4 x i64> %1,
+ <vscale x 4 x i1> %2,
+ i64 %3)
+
+ ret <vscale x 4 x i64> %a
+}
+
+declare <vscale x 8 x i64> @llvm.riscv.vcompress.mask.nxv8i64(
+ <vscale x 8 x i64>,
+ <vscale x 8 x i64>,
+ <vscale x 8 x i1>,
+ i64);
+
+define <vscale x 8 x i64> @intrinsic_vcompress_mask_vm_nxv8i64_nxv8i64(<vscale x 8 x i64> %0, <vscale x 8 x i64> %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv8i64_nxv8i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a2, zero, e64,m8,ta,mu
+; CHECK-NEXT: vle64.v v8, (a0)
+; CHECK-NEXT: vsetvli a0, a1, e64,m8,tu,mu
+; CHECK-NEXT: vcompress.vm v16, v8, v0
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 8 x i64> @llvm.riscv.vcompress.mask.nxv8i64(
+ <vscale x 8 x i64> %0,
+ <vscale x 8 x i64> %1,
+ <vscale x 8 x i1> %2,
+ i64 %3)
+
+ ret <vscale x 8 x i64> %a
+}
+
+declare <vscale x 1 x half> @llvm.riscv.vcompress.mask.nxv1f16(
+ <vscale x 1 x half>,
+ <vscale x 1 x half>,
+ <vscale x 1 x i1>,
+ i64);
+
+define <vscale x 1 x half> @intrinsic_vcompress_mask_vm_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv1f16_nxv1f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e16,mf4,tu,mu
+; CHECK-NEXT: vcompress.vm v16, v17, v0
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 1 x half> @llvm.riscv.vcompress.mask.nxv1f16(
+ <vscale x 1 x half> %0,
+ <vscale x 1 x half> %1,
+ <vscale x 1 x i1> %2,
+ i64 %3)
+
+ ret <vscale x 1 x half> %a
+}
+
+declare <vscale x 2 x half> @llvm.riscv.vcompress.mask.nxv2f16(
+ <vscale x 2 x half>,
+ <vscale x 2 x half>,
+ <vscale x 2 x i1>,
+ i64);
+
+define <vscale x 2 x half> @intrinsic_vcompress_mask_vm_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv2f16_nxv2f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e16,mf2,tu,mu
+; CHECK-NEXT: vcompress.vm v16, v17, v0
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 2 x half> @llvm.riscv.vcompress.mask.nxv2f16(
+ <vscale x 2 x half> %0,
+ <vscale x 2 x half> %1,
+ <vscale x 2 x i1> %2,
+ i64 %3)
+
+ ret <vscale x 2 x half> %a
+}
+
+declare <vscale x 4 x half> @llvm.riscv.vcompress.mask.nxv4f16(
+ <vscale x 4 x half>,
+ <vscale x 4 x half>,
+ <vscale x 4 x i1>,
+ i64);
+
+define <vscale x 4 x half> @intrinsic_vcompress_mask_vm_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv4f16_nxv4f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e16,m1,tu,mu
+; CHECK-NEXT: vcompress.vm v16, v17, v0
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 4 x half> @llvm.riscv.vcompress.mask.nxv4f16(
+ <vscale x 4 x half> %0,
+ <vscale x 4 x half> %1,
+ <vscale x 4 x i1> %2,
+ i64 %3)
+
+ ret <vscale x 4 x half> %a
+}
+
+declare <vscale x 8 x half> @llvm.riscv.vcompress.mask.nxv8f16(
+ <vscale x 8 x half>,
+ <vscale x 8 x half>,
+ <vscale x 8 x i1>,
+ i64);
+
+define <vscale x 8 x half> @intrinsic_vcompress_mask_vm_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv8f16_nxv8f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e16,m2,tu,mu
+; CHECK-NEXT: vcompress.vm v16, v18, v0
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 8 x half> @llvm.riscv.vcompress.mask.nxv8f16(
+ <vscale x 8 x half> %0,
+ <vscale x 8 x half> %1,
+ <vscale x 8 x i1> %2,
+ i64 %3)
+
+ ret <vscale x 8 x half> %a
+}
+
+declare <vscale x 16 x half> @llvm.riscv.vcompress.mask.nxv16f16(
+ <vscale x 16 x half>,
+ <vscale x 16 x half>,
+ <vscale x 16 x i1>,
+ i64);
+
+define <vscale x 16 x half> @intrinsic_vcompress_mask_vm_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, <vscale x 16 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv16f16_nxv16f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e16,m4,tu,mu
+; CHECK-NEXT: vcompress.vm v16, v20, v0
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 16 x half> @llvm.riscv.vcompress.mask.nxv16f16(
+ <vscale x 16 x half> %0,
+ <vscale x 16 x half> %1,
+ <vscale x 16 x i1> %2,
+ i64 %3)
+
+ ret <vscale x 16 x half> %a
+}
+
+declare <vscale x 32 x half> @llvm.riscv.vcompress.mask.nxv32f16(
+ <vscale x 32 x half>,
+ <vscale x 32 x half>,
+ <vscale x 32 x i1>,
+ i64);
+
+define <vscale x 32 x half> @intrinsic_vcompress_mask_vm_nxv32f16_nxv32f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, <vscale x 32 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv32f16_nxv32f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a2, zero, e16,m8,ta,mu
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vsetvli a0, a1, e16,m8,tu,mu
+; CHECK-NEXT: vcompress.vm v16, v8, v0
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 32 x half> @llvm.riscv.vcompress.mask.nxv32f16(
+ <vscale x 32 x half> %0,
+ <vscale x 32 x half> %1,
+ <vscale x 32 x i1> %2,
+ i64 %3)
+
+ ret <vscale x 32 x half> %a
+}
+
+declare <vscale x 1 x float> @llvm.riscv.vcompress.mask.nxv1f32(
+ <vscale x 1 x float>,
+ <vscale x 1 x float>,
+ <vscale x 1 x i1>,
+ i64);
+
+define <vscale x 1 x float> @intrinsic_vcompress_mask_vm_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv1f32_nxv1f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,mf2,tu,mu
+; CHECK-NEXT: vcompress.vm v16, v17, v0
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vcompress.mask.nxv1f32(
+ <vscale x 1 x float> %0,
+ <vscale x 1 x float> %1,
+ <vscale x 1 x i1> %2,
+ i64 %3)
+
+ ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vcompress.mask.nxv2f32(
+ <vscale x 2 x float>,
+ <vscale x 2 x float>,
+ <vscale x 2 x i1>,
+ i64);
+
+define <vscale x 2 x float> @intrinsic_vcompress_mask_vm_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv2f32_nxv2f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,m1,tu,mu
+; CHECK-NEXT: vcompress.vm v16, v17, v0
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.vcompress.mask.nxv2f32(
+ <vscale x 2 x float> %0,
+ <vscale x 2 x float> %1,
+ <vscale x 2 x i1> %2,
+ i64 %3)
+
+ ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.vcompress.mask.nxv4f32(
+ <vscale x 4 x float>,
+ <vscale x 4 x float>,
+ <vscale x 4 x i1>,
+ i64);
+
+define <vscale x 4 x float> @intrinsic_vcompress_mask_vm_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv4f32_nxv4f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,m2,tu,mu
+; CHECK-NEXT: vcompress.vm v16, v18, v0
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.vcompress.mask.nxv4f32(
+ <vscale x 4 x float> %0,
+ <vscale x 4 x float> %1,
+ <vscale x 4 x i1> %2,
+ i64 %3)
+
+ ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.vcompress.mask.nxv8f32(
+ <vscale x 8 x float>,
+ <vscale x 8 x float>,
+ <vscale x 8 x i1>,
+ i64);
+
+define <vscale x 8 x float> @intrinsic_vcompress_mask_vm_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv8f32_nxv8f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,m4,tu,mu
+; CHECK-NEXT: vcompress.vm v16, v20, v0
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.vcompress.mask.nxv8f32(
+ <vscale x 8 x float> %0,
+ <vscale x 8 x float> %1,
+ <vscale x 8 x i1> %2,
+ i64 %3)
+
+ ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vcompress.mask.nxv16f32(
+ <vscale x 16 x float>,
+ <vscale x 16 x float>,
+ <vscale x 16 x i1>,
+ i64);
+
+define <vscale x 16 x float> @intrinsic_vcompress_mask_vm_nxv16f32_nxv16f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, <vscale x 16 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv16f32_nxv16f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a2, zero, e32,m8,ta,mu
+; CHECK-NEXT: vle32.v v8, (a0)
+; CHECK-NEXT: vsetvli a0, a1, e32,m8,tu,mu
+; CHECK-NEXT: vcompress.vm v16, v8, v0
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 16 x float> @llvm.riscv.vcompress.mask.nxv16f32(
+ <vscale x 16 x float> %0,
+ <vscale x 16 x float> %1,
+ <vscale x 16 x i1> %2,
+ i64 %3)
+
+ ret <vscale x 16 x float> %a
+}
+
+declare <vscale x 1 x double> @llvm.riscv.vcompress.mask.nxv1f64(
+ <vscale x 1 x double>,
+ <vscale x 1 x double>,
+ <vscale x 1 x i1>,
+ i64);
+
+define <vscale x 1 x double> @intrinsic_vcompress_mask_vm_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv1f64_nxv1f64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e64,m1,tu,mu
+; CHECK-NEXT: vcompress.vm v16, v17, v0
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 1 x double> @llvm.riscv.vcompress.mask.nxv1f64(
+ <vscale x 1 x double> %0,
+ <vscale x 1 x double> %1,
+ <vscale x 1 x i1> %2,
+ i64 %3)
+
+ ret <vscale x 1 x double> %a
+}
+
+declare <vscale x 2 x double> @llvm.riscv.vcompress.mask.nxv2f64(
+ <vscale x 2 x double>,
+ <vscale x 2 x double>,
+ <vscale x 2 x i1>,
+ i64);
+
+define <vscale x 2 x double> @intrinsic_vcompress_mask_vm_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv2f64_nxv2f64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e64,m2,tu,mu
+; CHECK-NEXT: vcompress.vm v16, v18, v0
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 2 x double> @llvm.riscv.vcompress.mask.nxv2f64(
+ <vscale x 2 x double> %0,
+ <vscale x 2 x double> %1,
+ <vscale x 2 x i1> %2,
+ i64 %3)
+
+ ret <vscale x 2 x double> %a
+}
+
+declare <vscale x 4 x double> @llvm.riscv.vcompress.mask.nxv4f64(
+ <vscale x 4 x double>,
+ <vscale x 4 x double>,
+ <vscale x 4 x i1>,
+ i64);
+
+define <vscale x 4 x double> @intrinsic_vcompress_mask_vm_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv4f64_nxv4f64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e64,m4,tu,mu
+; CHECK-NEXT: vcompress.vm v16, v20, v0
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 4 x double> @llvm.riscv.vcompress.mask.nxv4f64(
+ <vscale x 4 x double> %0,
+ <vscale x 4 x double> %1,
+ <vscale x 4 x i1> %2,
+ i64 %3)
+
+ ret <vscale x 4 x double> %a
+}
+
+declare <vscale x 8 x double> @llvm.riscv.vcompress.mask.nxv8f64(
+ <vscale x 8 x double>,
+ <vscale x 8 x double>,
+ <vscale x 8 x i1>,
+ i64);
+
+define <vscale x 8 x double> @intrinsic_vcompress_mask_vm_nxv8f64_nxv8f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv8f64_nxv8f64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a2, zero, e64,m8,ta,mu
+; CHECK-NEXT: vle64.v v8, (a0)
+; CHECK-NEXT: vsetvli a0, a1, e64,m8,tu,mu
+; CHECK-NEXT: vcompress.vm v16, v8, v0
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 8 x double> @llvm.riscv.vcompress.mask.nxv8f64(
+ <vscale x 8 x double> %0,
+ <vscale x 8 x double> %1,
+ <vscale x 8 x i1> %2,
+ i64 %3)
+
+ ret <vscale x 8 x double> %a
+}
More information about the llvm-branch-commits
mailing list