[llvm] aa68e28 - [RISCV] Support `llvm.masked.compressstore` intrinsic (#83457)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 13 12:18:55 PDT 2024
Author: Kolya Panchenko
Date: 2024-03-13T15:18:51-04:00
New Revision: aa68e2814d9a4bad21e4def900152b2e78e25e98
URL: https://github.com/llvm/llvm-project/commit/aa68e2814d9a4bad21e4def900152b2e78e25e98
DIFF: https://github.com/llvm/llvm-project/commit/aa68e2814d9a4bad21e4def900152b2e78e25e98.diff
LOG: [RISCV] Support `llvm.masked.compressstore` intrinsic (#83457)
The changeset enables lowering of `llvm.masked.compressstore(%data,
%ptr, %mask)` for RVV for fixed vector type into:
```
%0 = vcompress %data, %mask, %vl
%new_vl = vcpop %mask, %vl
vse %0, %ptr, %1, %new_vl
```
Such lowering is only possible when `%data` fits into available LMULs
and otherwise `llvm.masked.compressstore` is scalarized by
`ScalarizeMaskedMemIntrin` pass.
Even though RVV spec in the section `15.8` provide alternative sequence
for compressstore, use of `vcompress + vcpop` should be a proper
canonical form to lower `llvm.masked.compressstore`. If RISC-V target
find the sequence from `15.8` better, peephole optimization can
transform `vcompress + vcpop` into that sequence.
Added:
llvm/test/CodeGen/RISCV/rvv/compressstore.ll
Modified:
llvm/lib/Target/RISCV/RISCVISelLowering.cpp
llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-compressstore-fp.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-compressstore-int.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 08678a859ae2b6..803774fd16dbf0 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -10466,6 +10466,7 @@ SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op,
SDValue BasePtr = MemSD->getBasePtr();
SDValue Val, Mask, VL;
+ bool IsCompressingStore = false;
if (const auto *VPStore = dyn_cast<VPStoreSDNode>(Op)) {
Val = VPStore->getValue();
Mask = VPStore->getMask();
@@ -10474,9 +10475,11 @@ SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op,
const auto *MStore = cast<MaskedStoreSDNode>(Op);
Val = MStore->getValue();
Mask = MStore->getMask();
+ IsCompressingStore = MStore->isCompressingStore();
}
- bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
+ bool IsUnmasked =
+ ISD::isConstantSplatVectorAllOnes(Mask.getNode()) || IsCompressingStore;
MVT VT = Val.getSimpleValueType();
MVT XLenVT = Subtarget.getXLenVT();
@@ -10486,7 +10489,7 @@ SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op,
ContainerVT = getContainerForFixedLengthVector(VT);
Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
- if (!IsUnmasked) {
+ if (!IsUnmasked || IsCompressingStore) {
MVT MaskVT = getMaskTypeFor(ContainerVT);
Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
}
@@ -10495,6 +10498,15 @@ SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op,
if (!VL)
VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
+ if (IsCompressingStore) {
+ Val = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ContainerVT,
+ DAG.getConstant(Intrinsic::riscv_vcompress, DL, XLenVT),
+ DAG.getUNDEF(ContainerVT), Val, Mask, VL);
+ VL =
+ DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Mask,
+ getAllOnesMask(Mask.getSimpleValueType(), VL, DL, DAG), VL);
+ }
+
unsigned IntID =
IsUnmasked ? Intrinsic::riscv_vse : Intrinsic::riscv_vse_mask;
SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index ecd373649e2c79..8f46fdc2f7ca93 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -1620,3 +1620,13 @@ bool RISCVTTIImpl::isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
C2.NumIVMuls, C2.NumBaseAdds,
C2.ScaleCost, C2.ImmCost, C2.SetupCost);
}
+
+bool RISCVTTIImpl::isLegalMaskedCompressStore(Type *DataTy, Align Alignment) {
+ auto *VTy = dyn_cast<VectorType>(DataTy);
+ if (!VTy || VTy->isScalableTy())
+ return false;
+
+ if (!isLegalMaskedLoadStore(DataTy, Alignment))
+ return false;
+ return true;
+}
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
index af36e9d5d5e886..8daf6845dc8bc9 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -261,6 +261,8 @@ class RISCVTTIImpl : public BasicTTIImplBase<RISCVTTIImpl> {
return TLI->isLegalStridedLoadStore(DataTypeVT, Alignment);
}
+ bool isLegalMaskedCompressStore(Type *DataTy, Align Alignment);
+
bool isVScaleKnownToBeAPowerOfTwo() const {
return TLI->isVScaleKnownToBeAPowerOfTwo();
}
diff --git a/llvm/test/CodeGen/RISCV/rvv/compressstore.ll b/llvm/test/CodeGen/RISCV/rvv/compressstore.ll
new file mode 100644
index 00000000000000..673008d9c0b3d8
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/compressstore.ll
@@ -0,0 +1,871 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -verify-machineinstrs -mtriple=riscv64 -mattr=+v,+d,+m,+zbb %s -o - | FileCheck %s --check-prefix=RV64
+; RUN: llc -verify-machineinstrs -mtriple=riscv32 -mattr=+v,+d,+m,+zbb %s -o - | FileCheck %s --check-prefix=RV32
+
+; Compress + store for i8 type
+
+define void @test_compresstore_v1i8(ptr %p, <1 x i1> %mask, <1 x i8> %data) {
+; RV64-LABEL: test_compresstore_v1i8:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
+; RV64-NEXT: vcompress.vm v9, v8, v0
+; RV64-NEXT: vcpop.m a1, v0
+; RV64-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; RV64-NEXT: vse8.v v9, (a0)
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_compresstore_v1i8:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
+; RV32-NEXT: vcompress.vm v9, v8, v0
+; RV32-NEXT: vcpop.m a1, v0
+; RV32-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; RV32-NEXT: vse8.v v9, (a0)
+; RV32-NEXT: ret
+entry:
+ tail call void @llvm.masked.compressstore.v1i8(<1 x i8> %data, ptr align 1 %p, <1 x i1> %mask)
+ ret void
+}
+
+define void @test_compresstore_v2i8(ptr %p, <2 x i1> %mask, <2 x i8> %data) {
+; RV64-LABEL: test_compresstore_v2i8:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
+; RV64-NEXT: vcompress.vm v9, v8, v0
+; RV64-NEXT: vcpop.m a1, v0
+; RV64-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; RV64-NEXT: vse8.v v9, (a0)
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_compresstore_v2i8:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
+; RV32-NEXT: vcompress.vm v9, v8, v0
+; RV32-NEXT: vcpop.m a1, v0
+; RV32-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; RV32-NEXT: vse8.v v9, (a0)
+; RV32-NEXT: ret
+entry:
+ tail call void @llvm.masked.compressstore.v2i8(<2 x i8> %data, ptr align 1 %p, <2 x i1> %mask)
+ ret void
+}
+
+define void @test_compresstore_v4i8(ptr %p, <4 x i1> %mask, <4 x i8> %data) {
+; RV64-LABEL: test_compresstore_v4i8:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
+; RV64-NEXT: vcompress.vm v9, v8, v0
+; RV64-NEXT: vcpop.m a1, v0
+; RV64-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; RV64-NEXT: vse8.v v9, (a0)
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_compresstore_v4i8:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
+; RV32-NEXT: vcompress.vm v9, v8, v0
+; RV32-NEXT: vcpop.m a1, v0
+; RV32-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; RV32-NEXT: vse8.v v9, (a0)
+; RV32-NEXT: ret
+entry:
+ tail call void @llvm.masked.compressstore.v4i8(<4 x i8> %data, ptr align 1 %p, <4 x i1> %mask)
+ ret void
+}
+
+define void @test_compresstore_v8i8(ptr %p, <8 x i1> %mask, <8 x i8> %data) {
+; RV64-LABEL: test_compresstore_v8i8:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV64-NEXT: vcompress.vm v9, v8, v0
+; RV64-NEXT: vcpop.m a1, v0
+; RV64-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; RV64-NEXT: vse8.v v9, (a0)
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_compresstore_v8i8:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV32-NEXT: vcompress.vm v9, v8, v0
+; RV32-NEXT: vcpop.m a1, v0
+; RV32-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; RV32-NEXT: vse8.v v9, (a0)
+; RV32-NEXT: ret
+entry:
+ tail call void @llvm.masked.compressstore.v8i8(<8 x i8> %data, ptr align 1 %p, <8 x i1> %mask)
+ ret void
+}
+
+define void @test_compresstore_v16i8(ptr %p, <16 x i1> %mask, <16 x i8> %data) {
+; RV64-LABEL: test_compresstore_v16i8:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; RV64-NEXT: vcompress.vm v9, v8, v0
+; RV64-NEXT: vcpop.m a1, v0
+; RV64-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; RV64-NEXT: vse8.v v9, (a0)
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_compresstore_v16i8:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; RV32-NEXT: vcompress.vm v9, v8, v0
+; RV32-NEXT: vcpop.m a1, v0
+; RV32-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; RV32-NEXT: vse8.v v9, (a0)
+; RV32-NEXT: ret
+entry:
+ tail call void @llvm.masked.compressstore.v16i8(<16 x i8> %data, ptr align 1 %p, <16 x i1> %mask)
+ ret void
+}
+
+define void @test_compresstore_v32i8(ptr %p, <32 x i1> %mask, <32 x i8> %data) {
+; RV64-LABEL: test_compresstore_v32i8:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: li a1, 32
+; RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; RV64-NEXT: vcompress.vm v10, v8, v0
+; RV64-NEXT: vcpop.m a1, v0
+; RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; RV64-NEXT: vse8.v v10, (a0)
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_compresstore_v32i8:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: li a1, 32
+; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; RV32-NEXT: vcompress.vm v10, v8, v0
+; RV32-NEXT: vcpop.m a1, v0
+; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; RV32-NEXT: vse8.v v10, (a0)
+; RV32-NEXT: ret
+entry:
+ tail call void @llvm.masked.compressstore.v32i8(<32 x i8> %data, ptr align 1 %p, <32 x i1> %mask)
+ ret void
+}
+
+define void @test_compresstore_v64i8(ptr %p, <64 x i1> %mask, <64 x i8> %data) {
+; RV64-LABEL: test_compresstore_v64i8:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: li a1, 64
+; RV64-NEXT: vsetvli zero, a1, e8, m4, ta, ma
+; RV64-NEXT: vcompress.vm v12, v8, v0
+; RV64-NEXT: vcpop.m a1, v0
+; RV64-NEXT: vsetvli zero, a1, e8, m4, ta, ma
+; RV64-NEXT: vse8.v v12, (a0)
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_compresstore_v64i8:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: li a1, 64
+; RV32-NEXT: vsetvli zero, a1, e8, m4, ta, ma
+; RV32-NEXT: vcompress.vm v12, v8, v0
+; RV32-NEXT: vcpop.m a1, v0
+; RV32-NEXT: vsetvli zero, a1, e8, m4, ta, ma
+; RV32-NEXT: vse8.v v12, (a0)
+; RV32-NEXT: ret
+entry:
+ tail call void @llvm.masked.compressstore.v64i8(<64 x i8> %data, ptr align 1 %p, <64 x i1> %mask)
+ ret void
+}
+
+define void @test_compresstore_v128i8(ptr %p, <128 x i1> %mask, <128 x i8> %data) {
+; RV64-LABEL: test_compresstore_v128i8:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: li a1, 128
+; RV64-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; RV64-NEXT: vcompress.vm v16, v8, v0
+; RV64-NEXT: vcpop.m a1, v0
+; RV64-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a0)
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_compresstore_v128i8:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: li a1, 128
+; RV32-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; RV32-NEXT: vcompress.vm v16, v8, v0
+; RV32-NEXT: vcpop.m a1, v0
+; RV32-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a0)
+; RV32-NEXT: ret
+entry:
+ tail call void @llvm.masked.compressstore.v128i8(<128 x i8> %data, ptr align 1 %p, <128 x i1> %mask)
+ ret void
+}
+
+define void @test_compresstore_v256i8(ptr %p, <256 x i1> %mask, <256 x i8> %data) {
+; RV64-LABEL: test_compresstore_v256i8:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: vmv1r.v v7, v8
+; RV64-NEXT: li a2, 128
+; RV64-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; RV64-NEXT: vle8.v v24, (a1)
+; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vslidedown.vi v9, v0, 1
+; RV64-NEXT: vmv.x.s a1, v9
+; RV64-NEXT: vmv.x.s a3, v0
+; RV64-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; RV64-NEXT: vcompress.vm v8, v16, v0
+; RV64-NEXT: vcpop.m a4, v0
+; RV64-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v8, (a0)
+; RV64-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; RV64-NEXT: vcompress.vm v8, v24, v7
+; RV64-NEXT: vcpop.m a2, v7
+; RV64-NEXT: cpop a3, a3
+; RV64-NEXT: cpop a1, a1
+; RV64-NEXT: add a0, a0, a3
+; RV64-NEXT: add a0, a0, a1
+; RV64-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v8, (a0)
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_compresstore_v256i8:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: vmv1r.v v7, v8
+; RV32-NEXT: li a2, 128
+; RV32-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; RV32-NEXT: vle8.v v24, (a1)
+; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v9, v0, 1
+; RV32-NEXT: li a1, 32
+; RV32-NEXT: vsrl.vx v10, v9, a1
+; RV32-NEXT: vmv.x.s a3, v10
+; RV32-NEXT: vsrl.vx v10, v0, a1
+; RV32-NEXT: vmv.x.s a1, v10
+; RV32-NEXT: vmv.x.s a4, v9
+; RV32-NEXT: vmv.x.s a5, v0
+; RV32-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; RV32-NEXT: vcompress.vm v8, v16, v0
+; RV32-NEXT: vcpop.m a6, v0
+; RV32-NEXT: vsetvli zero, a6, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v8, (a0)
+; RV32-NEXT: cpop a1, a1
+; RV32-NEXT: cpop a5, a5
+; RV32-NEXT: add a1, a5, a1
+; RV32-NEXT: cpop a3, a3
+; RV32-NEXT: cpop a4, a4
+; RV32-NEXT: add a3, a4, a3
+; RV32-NEXT: add a1, a1, a3
+; RV32-NEXT: add a0, a0, a1
+; RV32-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; RV32-NEXT: vcompress.vm v8, v24, v7
+; RV32-NEXT: vcpop.m a1, v7
+; RV32-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v8, (a0)
+; RV32-NEXT: ret
+entry:
+ tail call void @llvm.masked.compressstore.v256i8(<256 x i8> %data, ptr align 1 %p, <256 x i1> %mask)
+ ret void
+}
+
+; Compress + store for i16 type
+
+define void @test_compresstore_v1i16(ptr %p, <1 x i1> %mask, <1 x i16> %data) {
+; RV64-LABEL: test_compresstore_v1i16:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
+; RV64-NEXT: vcompress.vm v9, v8, v0
+; RV64-NEXT: vcpop.m a1, v0
+; RV64-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; RV64-NEXT: vse16.v v9, (a0)
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_compresstore_v1i16:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
+; RV32-NEXT: vcompress.vm v9, v8, v0
+; RV32-NEXT: vcpop.m a1, v0
+; RV32-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; RV32-NEXT: vse16.v v9, (a0)
+; RV32-NEXT: ret
+entry:
+ tail call void @llvm.masked.compressstore.v1i16(<1 x i16> %data, ptr align 2 %p, <1 x i1> %mask)
+ ret void
+}
+
+define void @test_compresstore_v2i16(ptr %p, <2 x i1> %mask, <2 x i16> %data) {
+; RV64-LABEL: test_compresstore_v2i16:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; RV64-NEXT: vcompress.vm v9, v8, v0
+; RV64-NEXT: vcpop.m a1, v0
+; RV64-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; RV64-NEXT: vse16.v v9, (a0)
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_compresstore_v2i16:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; RV32-NEXT: vcompress.vm v9, v8, v0
+; RV32-NEXT: vcpop.m a1, v0
+; RV32-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; RV32-NEXT: vse16.v v9, (a0)
+; RV32-NEXT: ret
+entry:
+ tail call void @llvm.masked.compressstore.v2i16(<2 x i16> %data, ptr align 2 %p, <2 x i1> %mask)
+ ret void
+}
+
+define void @test_compresstore_v4i16(ptr %p, <4 x i1> %mask, <4 x i16> %data) {
+; RV64-LABEL: test_compresstore_v4i16:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; RV64-NEXT: vcompress.vm v9, v8, v0
+; RV64-NEXT: vcpop.m a1, v0
+; RV64-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
+; RV64-NEXT: vse16.v v9, (a0)
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_compresstore_v4i16:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; RV32-NEXT: vcompress.vm v9, v8, v0
+; RV32-NEXT: vcpop.m a1, v0
+; RV32-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
+; RV32-NEXT: vse16.v v9, (a0)
+; RV32-NEXT: ret
+entry:
+ tail call void @llvm.masked.compressstore.v4i16(<4 x i16> %data, ptr align 2 %p, <4 x i1> %mask)
+ ret void
+}
+
+define void @test_compresstore_v8i16(ptr %p, <8 x i1> %mask, <8 x i16> %data) {
+; RV64-LABEL: test_compresstore_v8i16:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV64-NEXT: vcompress.vm v9, v8, v0
+; RV64-NEXT: vcpop.m a1, v0
+; RV64-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; RV64-NEXT: vse16.v v9, (a0)
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_compresstore_v8i16:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV32-NEXT: vcompress.vm v9, v8, v0
+; RV32-NEXT: vcpop.m a1, v0
+; RV32-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; RV32-NEXT: vse16.v v9, (a0)
+; RV32-NEXT: ret
+entry:
+ tail call void @llvm.masked.compressstore.v8i16(<8 x i16> %data, ptr align 2 %p, <8 x i1> %mask)
+ ret void
+}
+
+define void @test_compresstore_v16i16(ptr %p, <16 x i1> %mask, <16 x i16> %data) {
+; RV64-LABEL: test_compresstore_v16i16:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; RV64-NEXT: vcompress.vm v10, v8, v0
+; RV64-NEXT: vcpop.m a1, v0
+; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma
+; RV64-NEXT: vse16.v v10, (a0)
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_compresstore_v16i16:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; RV32-NEXT: vcompress.vm v10, v8, v0
+; RV32-NEXT: vcpop.m a1, v0
+; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma
+; RV32-NEXT: vse16.v v10, (a0)
+; RV32-NEXT: ret
+entry:
+ tail call void @llvm.masked.compressstore.v16i16(<16 x i16> %data, ptr align 2 %p, <16 x i1> %mask)
+ ret void
+}
+
+define void @test_compresstore_v32i16(ptr %p, <32 x i1> %mask, <32 x i16> %data) {
+; RV64-LABEL: test_compresstore_v32i16:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: li a1, 32
+; RV64-NEXT: vsetvli zero, a1, e16, m4, ta, ma
+; RV64-NEXT: vcompress.vm v12, v8, v0
+; RV64-NEXT: vcpop.m a1, v0
+; RV64-NEXT: vsetvli zero, a1, e16, m4, ta, ma
+; RV64-NEXT: vse16.v v12, (a0)
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_compresstore_v32i16:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: li a1, 32
+; RV32-NEXT: vsetvli zero, a1, e16, m4, ta, ma
+; RV32-NEXT: vcompress.vm v12, v8, v0
+; RV32-NEXT: vcpop.m a1, v0
+; RV32-NEXT: vsetvli zero, a1, e16, m4, ta, ma
+; RV32-NEXT: vse16.v v12, (a0)
+; RV32-NEXT: ret
+entry:
+ tail call void @llvm.masked.compressstore.v32i16(<32 x i16> %data, ptr align 2 %p, <32 x i1> %mask)
+ ret void
+}
+
+define void @test_compresstore_v64i16(ptr %p, <64 x i1> %mask, <64 x i16> %data) {
+; RV64-LABEL: test_compresstore_v64i16:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: li a1, 64
+; RV64-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; RV64-NEXT: vcompress.vm v16, v8, v0
+; RV64-NEXT: vcpop.m a1, v0
+; RV64-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v16, (a0)
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_compresstore_v64i16:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: li a1, 64
+; RV32-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; RV32-NEXT: vcompress.vm v16, v8, v0
+; RV32-NEXT: vcpop.m a1, v0
+; RV32-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v16, (a0)
+; RV32-NEXT: ret
+entry:
+ tail call void @llvm.masked.compressstore.v64i16(<64 x i16> %data, ptr align 2 %p, <64 x i1> %mask)
+ ret void
+}
+
+define void @test_compresstore_v128i16(ptr %p, <128 x i1> %mask, <128 x i16> %data) {
+; RV64-LABEL: test_compresstore_v128i16:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: li a1, 64
+; RV64-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; RV64-NEXT: vcompress.vm v24, v8, v0
+; RV64-NEXT: vcpop.m a2, v0
+; RV64-NEXT: vsetvli zero, a2, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v24, (a0)
+; RV64-NEXT: vsetivli zero, 8, e8, m1, ta, ma
+; RV64-NEXT: vslidedown.vi v8, v0, 8
+; RV64-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; RV64-NEXT: vcompress.vm v24, v16, v8
+; RV64-NEXT: vcpop.m a2, v8
+; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; RV64-NEXT: vmv.x.s a1, v0
+; RV64-NEXT: cpop a1, a1
+; RV64-NEXT: slli a1, a1, 1
+; RV64-NEXT: add a0, a0, a1
+; RV64-NEXT: vsetvli zero, a2, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v24, (a0)
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_compresstore_v128i16:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: li a1, 64
+; RV32-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; RV32-NEXT: vcompress.vm v24, v8, v0
+; RV32-NEXT: vcpop.m a2, v0
+; RV32-NEXT: vsetvli zero, a2, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v24, (a0)
+; RV32-NEXT: vsetivli zero, 8, e8, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v24, v0, 8
+; RV32-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; RV32-NEXT: vcompress.vm v8, v16, v24
+; RV32-NEXT: vcpop.m a1, v24
+; RV32-NEXT: li a2, 32
+; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV32-NEXT: vsrl.vx v16, v0, a2
+; RV32-NEXT: vmv.x.s a2, v16
+; RV32-NEXT: cpop a2, a2
+; RV32-NEXT: vmv.x.s a3, v0
+; RV32-NEXT: cpop a3, a3
+; RV32-NEXT: add a2, a3, a2
+; RV32-NEXT: slli a2, a2, 1
+; RV32-NEXT: add a0, a0, a2
+; RV32-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v8, (a0)
+; RV32-NEXT: ret
+entry:
+ tail call void @llvm.masked.compressstore.v128i16(<128 x i16> %data, ptr align 2 %p, <128 x i1> %mask)
+ ret void
+}
+
+; Compress + store for i32 type
+
+define void @test_compresstore_v1i32(ptr %p, <1 x i1> %mask, <1 x i32> %data) {
+; RV64-LABEL: test_compresstore_v1i32:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; RV64-NEXT: vcompress.vm v9, v8, v0
+; RV64-NEXT: vcpop.m a1, v0
+; RV64-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; RV64-NEXT: vse32.v v9, (a0)
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_compresstore_v1i32:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; RV32-NEXT: vcompress.vm v9, v8, v0
+; RV32-NEXT: vcpop.m a1, v0
+; RV32-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; RV32-NEXT: vse32.v v9, (a0)
+; RV32-NEXT: ret
+entry:
+ tail call void @llvm.masked.compressstore.v1i32(<1 x i32> %data, ptr align 4 %p, <1 x i1> %mask)
+ ret void
+}
+
+define void @test_compresstore_v2i32(ptr %p, <2 x i1> %mask, <2 x i32> %data) {
+; RV64-LABEL: test_compresstore_v2i32:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; RV64-NEXT: vcompress.vm v9, v8, v0
+; RV64-NEXT: vcpop.m a1, v0
+; RV64-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; RV64-NEXT: vse32.v v9, (a0)
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_compresstore_v2i32:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; RV32-NEXT: vcompress.vm v9, v8, v0
+; RV32-NEXT: vcpop.m a1, v0
+; RV32-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; RV32-NEXT: vse32.v v9, (a0)
+; RV32-NEXT: ret
+entry:
+ tail call void @llvm.masked.compressstore.v2i32(<2 x i32> %data, ptr align 4 %p, <2 x i1> %mask)
+ ret void
+}
+
+define void @test_compresstore_v4i32(ptr %p, <4 x i1> %mask, <4 x i32> %data) {
+; RV64-LABEL: test_compresstore_v4i32:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV64-NEXT: vcompress.vm v9, v8, v0
+; RV64-NEXT: vcpop.m a1, v0
+; RV64-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; RV64-NEXT: vse32.v v9, (a0)
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_compresstore_v4i32:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV32-NEXT: vcompress.vm v9, v8, v0
+; RV32-NEXT: vcpop.m a1, v0
+; RV32-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; RV32-NEXT: vse32.v v9, (a0)
+; RV32-NEXT: ret
+entry:
+ tail call void @llvm.masked.compressstore.v4i32(<4 x i32> %data, ptr align 4 %p, <4 x i1> %mask)
+ ret void
+}
+
+define void @test_compresstore_v8i32(ptr %p, <8 x i1> %mask, <8 x i32> %data) {
+; RV64-LABEL: test_compresstore_v8i32:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV64-NEXT: vcompress.vm v10, v8, v0
+; RV64-NEXT: vcpop.m a1, v0
+; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; RV64-NEXT: vse32.v v10, (a0)
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_compresstore_v8i32:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV32-NEXT: vcompress.vm v10, v8, v0
+; RV32-NEXT: vcpop.m a1, v0
+; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; RV32-NEXT: vse32.v v10, (a0)
+; RV32-NEXT: ret
+entry:
+ tail call void @llvm.masked.compressstore.v8i32(<8 x i32> %data, ptr align 4 %p, <8 x i1> %mask)
+ ret void
+}
+
+define void @test_compresstore_v16i32(ptr %p, <16 x i1> %mask, <16 x i32> %data) {
+; RV64-LABEL: test_compresstore_v16i32:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma
+; RV64-NEXT: vcompress.vm v12, v8, v0
+; RV64-NEXT: vcpop.m a1, v0
+; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; RV64-NEXT: vse32.v v12, (a0)
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_compresstore_v16i32:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
+; RV32-NEXT: vcompress.vm v12, v8, v0
+; RV32-NEXT: vcpop.m a1, v0
+; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT: vse32.v v12, (a0)
+; RV32-NEXT: ret
+entry:
+ tail call void @llvm.masked.compressstore.v16i32(<16 x i32> %data, ptr align 4 %p, <16 x i1> %mask)
+ ret void
+}
+
+define void @test_compresstore_v32i32(ptr %p, <32 x i1> %mask, <32 x i32> %data) {
+; RV64-LABEL: test_compresstore_v32i32:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: li a1, 32
+; RV64-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; RV64-NEXT: vcompress.vm v16, v8, v0
+; RV64-NEXT: vcpop.m a1, v0
+; RV64-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; RV64-NEXT: vse32.v v16, (a0)
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_compresstore_v32i32:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: li a1, 32
+; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; RV32-NEXT: vcompress.vm v16, v8, v0
+; RV32-NEXT: vcpop.m a1, v0
+; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; RV32-NEXT: vse32.v v16, (a0)
+; RV32-NEXT: ret
+entry:
+ tail call void @llvm.masked.compressstore.v32i32(<32 x i32> %data, ptr align 4 %p, <32 x i1> %mask)
+ ret void
+}
+
+define void @test_compresstore_v64i32(ptr %p, <64 x i1> %mask, <64 x i32> %data) {
+; RV64-LABEL: test_compresstore_v64i32:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: li a1, 32
+; RV64-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; RV64-NEXT: vcompress.vm v24, v8, v0
+; RV64-NEXT: vcpop.m a2, v0
+; RV64-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; RV64-NEXT: vse32.v v24, (a0)
+; RV64-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
+; RV64-NEXT: vslidedown.vi v8, v0, 4
+; RV64-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; RV64-NEXT: vcompress.vm v24, v16, v8
+; RV64-NEXT: vcpop.m a1, v8
+; RV64-NEXT: vmv.x.s a2, v0
+; RV64-NEXT: cpopw a2, a2
+; RV64-NEXT: slli a2, a2, 2
+; RV64-NEXT: add a0, a0, a2
+; RV64-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; RV64-NEXT: vse32.v v24, (a0)
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_compresstore_v64i32:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: li a1, 32
+; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; RV32-NEXT: vcompress.vm v24, v8, v0
+; RV32-NEXT: vcpop.m a2, v0
+; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; RV32-NEXT: vse32.v v24, (a0)
+; RV32-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
+; RV32-NEXT: vslidedown.vi v8, v0, 4
+; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; RV32-NEXT: vcompress.vm v24, v16, v8
+; RV32-NEXT: vcpop.m a1, v8
+; RV32-NEXT: vmv.x.s a2, v0
+; RV32-NEXT: cpop a2, a2
+; RV32-NEXT: slli a2, a2, 2
+; RV32-NEXT: add a0, a0, a2
+; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; RV32-NEXT: vse32.v v24, (a0)
+; RV32-NEXT: ret
+entry:
+ tail call void @llvm.masked.compressstore.v64i32(<64 x i32> %data, ptr align 4 %p, <64 x i1> %mask)
+ ret void
+}
+
+; Compress + store for i64 type
+
+define void @test_compresstore_v1i64(ptr %p, <1 x i1> %mask, <1 x i64> %data) {
+; RV64-LABEL: test_compresstore_v1i64:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vcompress.vm v9, v8, v0
+; RV64-NEXT: vcpop.m a1, v0
+; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; RV64-NEXT: vse64.v v9, (a0)
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_compresstore_v1i64:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV32-NEXT: vcompress.vm v9, v8, v0
+; RV32-NEXT: vcpop.m a1, v0
+; RV32-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; RV32-NEXT: vse64.v v9, (a0)
+; RV32-NEXT: ret
+entry:
+ tail call void @llvm.masked.compressstore.v1i64(<1 x i64> %data, ptr align 8 %p, <1 x i1> %mask)
+ ret void
+}
+
+define void @test_compresstore_v2i64(ptr %p, <2 x i1> %mask, <2 x i64> %data) {
+; RV64-LABEL: test_compresstore_v2i64:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; RV64-NEXT: vcompress.vm v9, v8, v0
+; RV64-NEXT: vcpop.m a1, v0
+; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; RV64-NEXT: vse64.v v9, (a0)
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_compresstore_v2i64:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; RV32-NEXT: vcompress.vm v9, v8, v0
+; RV32-NEXT: vcpop.m a1, v0
+; RV32-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; RV32-NEXT: vse64.v v9, (a0)
+; RV32-NEXT: ret
+entry:
+ tail call void @llvm.masked.compressstore.v2i64(<2 x i64> %data, ptr align 8 %p, <2 x i1> %mask)
+ ret void
+}
+
+define void @test_compresstore_v4i64(ptr %p, <4 x i1> %mask, <4 x i64> %data) {
+; RV64-LABEL: test_compresstore_v4i64:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT: vcompress.vm v10, v8, v0
+; RV64-NEXT: vcpop.m a1, v0
+; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; RV64-NEXT: vse64.v v10, (a0)
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_compresstore_v4i64:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV32-NEXT: vcompress.vm v10, v8, v0
+; RV32-NEXT: vcpop.m a1, v0
+; RV32-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; RV32-NEXT: vse64.v v10, (a0)
+; RV32-NEXT: ret
+entry:
+ tail call void @llvm.masked.compressstore.v4i64(<4 x i64> %data, ptr align 8 %p, <4 x i1> %mask)
+ ret void
+}
+
+define void @test_compresstore_v8i64(ptr %p, <8 x i1> %mask, <8 x i64> %data) {
+; RV64-LABEL: test_compresstore_v8i64:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64-NEXT: vcompress.vm v12, v8, v0
+; RV64-NEXT: vcpop.m a1, v0
+; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; RV64-NEXT: vse64.v v12, (a0)
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_compresstore_v8i64:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV32-NEXT: vcompress.vm v12, v8, v0
+; RV32-NEXT: vcpop.m a1, v0
+; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; RV32-NEXT: vse64.v v12, (a0)
+; RV32-NEXT: ret
+entry:
+ tail call void @llvm.masked.compressstore.v8i64(<8 x i64> %data, ptr align 8 %p, <8 x i1> %mask)
+ ret void
+}
+
+define void @test_compresstore_v16i64(ptr %p, <16 x i1> %mask, <16 x i64> %data) {
+; RV64-LABEL: test_compresstore_v16i64:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; RV64-NEXT: vcompress.vm v16, v8, v0
+; RV64-NEXT: vcpop.m a1, v0
+; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT: vse64.v v16, (a0)
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_compresstore_v16i64:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; RV32-NEXT: vcompress.vm v16, v8, v0
+; RV32-NEXT: vcpop.m a1, v0
+; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT: vse64.v v16, (a0)
+; RV32-NEXT: ret
+entry:
+ tail call void @llvm.masked.compressstore.v16i64(<16 x i64> %data, ptr align 8 %p, <16 x i1> %mask)
+ ret void
+}
+
+define void @test_compresstore_v32i64(ptr %p, <32 x i1> %mask, <32 x i64> %data) {
+; RV64-LABEL: test_compresstore_v32i64:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; RV64-NEXT: vcompress.vm v24, v8, v0
+; RV64-NEXT: vcpop.m a1, v0
+; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT: vse64.v v24, (a0)
+; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; RV64-NEXT: vslidedown.vi v24, v0, 2
+; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; RV64-NEXT: vcompress.vm v8, v16, v24
+; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; RV64-NEXT: vmv.x.s a1, v0
+; RV64-NEXT: zext.h a1, a1
+; RV64-NEXT: cpopw a1, a1
+; RV64-NEXT: slli a1, a1, 3
+; RV64-NEXT: add a0, a0, a1
+; RV64-NEXT: vcpop.m a1, v24
+; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT: vse64.v v8, (a0)
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_compresstore_v32i64:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; RV32-NEXT: vcompress.vm v24, v8, v0
+; RV32-NEXT: vcpop.m a1, v0
+; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT: vse64.v v24, (a0)
+; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; RV32-NEXT: vslidedown.vi v24, v0, 2
+; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; RV32-NEXT: vcompress.vm v8, v16, v24
+; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; RV32-NEXT: vmv.x.s a1, v0
+; RV32-NEXT: zext.h a1, a1
+; RV32-NEXT: cpop a1, a1
+; RV32-NEXT: slli a1, a1, 3
+; RV32-NEXT: add a0, a0, a1
+; RV32-NEXT: vcpop.m a1, v24
+; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT: vse64.v v8, (a0)
+; RV32-NEXT: ret
+entry:
+ tail call void @llvm.masked.compressstore.v32i64(<32 x i64> %data, ptr align 8 %p, <32 x i1> %mask)
+ ret void
+}
+
+declare void @llvm.masked.compressstore.v1i8(<1 x i8>, ptr, <1 x i1>)
+declare void @llvm.masked.compressstore.v2i8(<2 x i8>, ptr, <2 x i1>)
+declare void @llvm.masked.compressstore.v4i8(<4 x i8>, ptr, <4 x i1>)
+declare void @llvm.masked.compressstore.v8i8(<8 x i8>, ptr, <8 x i1>)
+declare void @llvm.masked.compressstore.v16i8(<16 x i8>, ptr, <16 x i1>)
+declare void @llvm.masked.compressstore.v32i8(<32 x i8>, ptr, <32 x i1>)
+declare void @llvm.masked.compressstore.v64i8(<64 x i8>, ptr, <64 x i1>)
+declare void @llvm.masked.compressstore.v128i8(<128 x i8>, ptr, <128 x i1>)
+declare void @llvm.masked.compressstore.v256i8(<256 x i8>, ptr, <256 x i1>)
+
+declare void @llvm.masked.compressstore.v1i16(<1 x i16>, ptr, <1 x i1>)
+declare void @llvm.masked.compressstore.v2i16(<2 x i16>, ptr, <2 x i1>)
+declare void @llvm.masked.compressstore.v4i16(<4 x i16>, ptr, <4 x i1>)
+declare void @llvm.masked.compressstore.v8i16(<8 x i16>, ptr, <8 x i1>)
+declare void @llvm.masked.compressstore.v16i16(<16 x i16>, ptr, <16 x i1>)
+declare void @llvm.masked.compressstore.v32i16(<32 x i16>, ptr, <32 x i1>)
+declare void @llvm.masked.compressstore.v64i16(<64 x i16>, ptr, <64 x i1>)
+declare void @llvm.masked.compressstore.v128i16(<128 x i16>, ptr, <128 x i1>)
+
+declare void @llvm.masked.compressstore.v1i32(<1 x i32>, ptr, <1 x i1>)
+declare void @llvm.masked.compressstore.v2i32(<2 x i32>, ptr, <2 x i1>)
+declare void @llvm.masked.compressstore.v4i32(<4 x i32>, ptr, <4 x i1>)
+declare void @llvm.masked.compressstore.v8i32(<8 x i32>, ptr, <8 x i1>)
+declare void @llvm.masked.compressstore.v16i32(<16 x i32>, ptr, <16 x i1>)
+declare void @llvm.masked.compressstore.v32i32(<32 x i32>, ptr, <32 x i1>)
+declare void @llvm.masked.compressstore.v64i32(<64 x i32>, ptr, <64 x i1>)
+
+declare void @llvm.masked.compressstore.v1i64(<1 x i64>, ptr, <1 x i1>)
+declare void @llvm.masked.compressstore.v2i64(<2 x i64>, ptr, <2 x i1>)
+declare void @llvm.masked.compressstore.v4i64(<4 x i64>, ptr, <4 x i1>)
+declare void @llvm.masked.compressstore.v8i64(<8 x i64>, ptr, <8 x i1>)
+declare void @llvm.masked.compressstore.v16i64(<16 x i64>, ptr, <16 x i1>)
+declare void @llvm.masked.compressstore.v32i64(<32 x i64>, ptr, <32 x i1>)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-compressstore-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-compressstore-fp.ll
index 52c52921e7e1d2..36fbdd8e0664fd 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-compressstore-fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-compressstore-fp.ll
@@ -6,24 +6,20 @@ declare void @llvm.masked.compressstore.v1f16(<1 x half>, ptr, <1 x i1>)
define void @compressstore_v1f16(ptr %base, <1 x half> %v, <1 x i1> %mask) {
; RV32-LABEL: compressstore_v1f16:
; RV32: # %bb.0:
-; RV32-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
-; RV32-NEXT: vfirst.m a1, v0
-; RV32-NEXT: bnez a1, .LBB0_2
-; RV32-NEXT: # %bb.1: # %cond.store
; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; RV32-NEXT: vse16.v v8, (a0)
-; RV32-NEXT: .LBB0_2: # %else
+; RV32-NEXT: vcompress.vm v9, v8, v0
+; RV32-NEXT: vcpop.m a1, v0
+; RV32-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; RV32-NEXT: vse16.v v9, (a0)
; RV32-NEXT: ret
;
; RV64-LABEL: compressstore_v1f16:
; RV64: # %bb.0:
-; RV64-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
-; RV64-NEXT: vfirst.m a1, v0
-; RV64-NEXT: bnez a1, .LBB0_2
-; RV64-NEXT: # %bb.1: # %cond.store
; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; RV64-NEXT: vse16.v v8, (a0)
-; RV64-NEXT: .LBB0_2: # %else
+; RV64-NEXT: vcompress.vm v9, v8, v0
+; RV64-NEXT: vcpop.m a1, v0
+; RV64-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; RV64-NEXT: vse16.v v9, (a0)
; RV64-NEXT: ret
call void @llvm.masked.compressstore.v1f16(<1 x half> %v, ptr align 2 %base, <1 x i1> %mask)
ret void
@@ -33,48 +29,20 @@ declare void @llvm.masked.compressstore.v2f16(<2 x half>, ptr, <2 x i1>)
define void @compressstore_v2f16(ptr %base, <2 x half> %v, <2 x i1> %mask) {
; RV32-LABEL: compressstore_v2f16:
; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV32-NEXT: vmv.x.s a1, v0
-; RV32-NEXT: andi a2, a1, 1
-; RV32-NEXT: bnez a2, .LBB1_3
-; RV32-NEXT: # %bb.1: # %else
-; RV32-NEXT: andi a1, a1, 2
-; RV32-NEXT: bnez a1, .LBB1_4
-; RV32-NEXT: .LBB1_2: # %else2
-; RV32-NEXT: ret
-; RV32-NEXT: .LBB1_3: # %cond.store
-; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; RV32-NEXT: vse16.v v8, (a0)
-; RV32-NEXT: addi a0, a0, 2
-; RV32-NEXT: andi a1, a1, 2
-; RV32-NEXT: beqz a1, .LBB1_2
-; RV32-NEXT: .LBB1_4: # %cond.store1
-; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v8, 1
-; RV32-NEXT: vse16.v v8, (a0)
+; RV32-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; RV32-NEXT: vcompress.vm v9, v8, v0
+; RV32-NEXT: vcpop.m a1, v0
+; RV32-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; RV32-NEXT: vse16.v v9, (a0)
; RV32-NEXT: ret
;
; RV64-LABEL: compressstore_v2f16:
; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64-NEXT: vmv.x.s a1, v0
-; RV64-NEXT: andi a2, a1, 1
-; RV64-NEXT: bnez a2, .LBB1_3
-; RV64-NEXT: # %bb.1: # %else
-; RV64-NEXT: andi a1, a1, 2
-; RV64-NEXT: bnez a1, .LBB1_4
-; RV64-NEXT: .LBB1_2: # %else2
-; RV64-NEXT: ret
-; RV64-NEXT: .LBB1_3: # %cond.store
-; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; RV64-NEXT: vse16.v v8, (a0)
-; RV64-NEXT: addi a0, a0, 2
-; RV64-NEXT: andi a1, a1, 2
-; RV64-NEXT: beqz a1, .LBB1_2
-; RV64-NEXT: .LBB1_4: # %cond.store1
-; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; RV64-NEXT: vslidedown.vi v8, v8, 1
-; RV64-NEXT: vse16.v v8, (a0)
+; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; RV64-NEXT: vcompress.vm v9, v8, v0
+; RV64-NEXT: vcpop.m a1, v0
+; RV64-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; RV64-NEXT: vse16.v v9, (a0)
; RV64-NEXT: ret
call void @llvm.masked.compressstore.v2f16(<2 x half> %v, ptr align 2 %base, <2 x i1> %mask)
ret void
@@ -84,88 +52,20 @@ declare void @llvm.masked.compressstore.v4f16(<4 x half>, ptr, <4 x i1>)
define void @compressstore_v4f16(ptr %base, <4 x half> %v, <4 x i1> %mask) {
; RV32-LABEL: compressstore_v4f16:
; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV32-NEXT: vmv.x.s a1, v0
-; RV32-NEXT: andi a2, a1, 1
-; RV32-NEXT: bnez a2, .LBB2_5
-; RV32-NEXT: # %bb.1: # %else
-; RV32-NEXT: andi a2, a1, 2
-; RV32-NEXT: bnez a2, .LBB2_6
-; RV32-NEXT: .LBB2_2: # %else2
-; RV32-NEXT: andi a2, a1, 4
-; RV32-NEXT: bnez a2, .LBB2_7
-; RV32-NEXT: .LBB2_3: # %else5
-; RV32-NEXT: andi a1, a1, 8
-; RV32-NEXT: bnez a1, .LBB2_8
-; RV32-NEXT: .LBB2_4: # %else8
-; RV32-NEXT: ret
-; RV32-NEXT: .LBB2_5: # %cond.store
-; RV32-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; RV32-NEXT: vse16.v v8, (a0)
-; RV32-NEXT: addi a0, a0, 2
-; RV32-NEXT: andi a2, a1, 2
-; RV32-NEXT: beqz a2, .LBB2_2
-; RV32-NEXT: .LBB2_6: # %cond.store1
-; RV32-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v8, 1
-; RV32-NEXT: vse16.v v9, (a0)
-; RV32-NEXT: addi a0, a0, 2
-; RV32-NEXT: andi a2, a1, 4
-; RV32-NEXT: beqz a2, .LBB2_3
-; RV32-NEXT: .LBB2_7: # %cond.store4
-; RV32-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v8, 2
+; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; RV32-NEXT: vcompress.vm v9, v8, v0
+; RV32-NEXT: vcpop.m a1, v0
+; RV32-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
; RV32-NEXT: vse16.v v9, (a0)
-; RV32-NEXT: addi a0, a0, 2
-; RV32-NEXT: andi a1, a1, 8
-; RV32-NEXT: beqz a1, .LBB2_4
-; RV32-NEXT: .LBB2_8: # %cond.store7
-; RV32-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v8, 3
-; RV32-NEXT: vse16.v v8, (a0)
; RV32-NEXT: ret
;
; RV64-LABEL: compressstore_v4f16:
; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64-NEXT: vmv.x.s a1, v0
-; RV64-NEXT: andi a2, a1, 1
-; RV64-NEXT: bnez a2, .LBB2_5
-; RV64-NEXT: # %bb.1: # %else
-; RV64-NEXT: andi a2, a1, 2
-; RV64-NEXT: bnez a2, .LBB2_6
-; RV64-NEXT: .LBB2_2: # %else2
-; RV64-NEXT: andi a2, a1, 4
-; RV64-NEXT: bnez a2, .LBB2_7
-; RV64-NEXT: .LBB2_3: # %else5
-; RV64-NEXT: andi a1, a1, 8
-; RV64-NEXT: bnez a1, .LBB2_8
-; RV64-NEXT: .LBB2_4: # %else8
-; RV64-NEXT: ret
-; RV64-NEXT: .LBB2_5: # %cond.store
-; RV64-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; RV64-NEXT: vse16.v v8, (a0)
-; RV64-NEXT: addi a0, a0, 2
-; RV64-NEXT: andi a2, a1, 2
-; RV64-NEXT: beqz a2, .LBB2_2
-; RV64-NEXT: .LBB2_6: # %cond.store1
-; RV64-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; RV64-NEXT: vslidedown.vi v9, v8, 1
-; RV64-NEXT: vse16.v v9, (a0)
-; RV64-NEXT: addi a0, a0, 2
-; RV64-NEXT: andi a2, a1, 4
-; RV64-NEXT: beqz a2, .LBB2_3
-; RV64-NEXT: .LBB2_7: # %cond.store4
-; RV64-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; RV64-NEXT: vslidedown.vi v9, v8, 2
+; RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; RV64-NEXT: vcompress.vm v9, v8, v0
+; RV64-NEXT: vcpop.m a1, v0
+; RV64-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
; RV64-NEXT: vse16.v v9, (a0)
-; RV64-NEXT: addi a0, a0, 2
-; RV64-NEXT: andi a1, a1, 8
-; RV64-NEXT: beqz a1, .LBB2_4
-; RV64-NEXT: .LBB2_8: # %cond.store7
-; RV64-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; RV64-NEXT: vslidedown.vi v8, v8, 3
-; RV64-NEXT: vse16.v v8, (a0)
; RV64-NEXT: ret
call void @llvm.masked.compressstore.v4f16(<4 x half> %v, ptr align 2 %base, <4 x i1> %mask)
ret void
@@ -175,168 +75,20 @@ declare void @llvm.masked.compressstore.v8f16(<8 x half>, ptr, <8 x i1>)
define void @compressstore_v8f16(ptr %base, <8 x half> %v, <8 x i1> %mask) {
; RV32-LABEL: compressstore_v8f16:
; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV32-NEXT: vmv.x.s a1, v0
-; RV32-NEXT: andi a2, a1, 1
-; RV32-NEXT: bnez a2, .LBB3_9
-; RV32-NEXT: # %bb.1: # %else
-; RV32-NEXT: andi a2, a1, 2
-; RV32-NEXT: bnez a2, .LBB3_10
-; RV32-NEXT: .LBB3_2: # %else2
-; RV32-NEXT: andi a2, a1, 4
-; RV32-NEXT: bnez a2, .LBB3_11
-; RV32-NEXT: .LBB3_3: # %else5
-; RV32-NEXT: andi a2, a1, 8
-; RV32-NEXT: bnez a2, .LBB3_12
-; RV32-NEXT: .LBB3_4: # %else8
-; RV32-NEXT: andi a2, a1, 16
-; RV32-NEXT: bnez a2, .LBB3_13
-; RV32-NEXT: .LBB3_5: # %else11
-; RV32-NEXT: andi a2, a1, 32
-; RV32-NEXT: bnez a2, .LBB3_14
-; RV32-NEXT: .LBB3_6: # %else14
-; RV32-NEXT: andi a2, a1, 64
-; RV32-NEXT: bnez a2, .LBB3_15
-; RV32-NEXT: .LBB3_7: # %else17
-; RV32-NEXT: andi a1, a1, -128
-; RV32-NEXT: bnez a1, .LBB3_16
-; RV32-NEXT: .LBB3_8: # %else20
-; RV32-NEXT: ret
-; RV32-NEXT: .LBB3_9: # %cond.store
-; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32-NEXT: vse16.v v8, (a0)
-; RV32-NEXT: addi a0, a0, 2
-; RV32-NEXT: andi a2, a1, 2
-; RV32-NEXT: beqz a2, .LBB3_2
-; RV32-NEXT: .LBB3_10: # %cond.store1
-; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v8, 1
-; RV32-NEXT: vse16.v v9, (a0)
-; RV32-NEXT: addi a0, a0, 2
-; RV32-NEXT: andi a2, a1, 4
-; RV32-NEXT: beqz a2, .LBB3_3
-; RV32-NEXT: .LBB3_11: # %cond.store4
-; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v8, 2
-; RV32-NEXT: vse16.v v9, (a0)
-; RV32-NEXT: addi a0, a0, 2
-; RV32-NEXT: andi a2, a1, 8
-; RV32-NEXT: beqz a2, .LBB3_4
-; RV32-NEXT: .LBB3_12: # %cond.store7
-; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v8, 3
-; RV32-NEXT: vse16.v v9, (a0)
-; RV32-NEXT: addi a0, a0, 2
-; RV32-NEXT: andi a2, a1, 16
-; RV32-NEXT: beqz a2, .LBB3_5
-; RV32-NEXT: .LBB3_13: # %cond.store10
-; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v8, 4
+; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV32-NEXT: vcompress.vm v9, v8, v0
+; RV32-NEXT: vcpop.m a1, v0
+; RV32-NEXT: vsetvli zero, a1, e16, m1, ta, ma
; RV32-NEXT: vse16.v v9, (a0)
-; RV32-NEXT: addi a0, a0, 2
-; RV32-NEXT: andi a2, a1, 32
-; RV32-NEXT: beqz a2, .LBB3_6
-; RV32-NEXT: .LBB3_14: # %cond.store13
-; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v8, 5
-; RV32-NEXT: vse16.v v9, (a0)
-; RV32-NEXT: addi a0, a0, 2
-; RV32-NEXT: andi a2, a1, 64
-; RV32-NEXT: beqz a2, .LBB3_7
-; RV32-NEXT: .LBB3_15: # %cond.store16
-; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v8, 6
-; RV32-NEXT: vse16.v v9, (a0)
-; RV32-NEXT: addi a0, a0, 2
-; RV32-NEXT: andi a1, a1, -128
-; RV32-NEXT: beqz a1, .LBB3_8
-; RV32-NEXT: .LBB3_16: # %cond.store19
-; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v8, 7
-; RV32-NEXT: vse16.v v8, (a0)
; RV32-NEXT: ret
;
; RV64-LABEL: compressstore_v8f16:
; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64-NEXT: vmv.x.s a1, v0
-; RV64-NEXT: andi a2, a1, 1
-; RV64-NEXT: bnez a2, .LBB3_9
-; RV64-NEXT: # %bb.1: # %else
-; RV64-NEXT: andi a2, a1, 2
-; RV64-NEXT: bnez a2, .LBB3_10
-; RV64-NEXT: .LBB3_2: # %else2
-; RV64-NEXT: andi a2, a1, 4
-; RV64-NEXT: bnez a2, .LBB3_11
-; RV64-NEXT: .LBB3_3: # %else5
-; RV64-NEXT: andi a2, a1, 8
-; RV64-NEXT: bnez a2, .LBB3_12
-; RV64-NEXT: .LBB3_4: # %else8
-; RV64-NEXT: andi a2, a1, 16
-; RV64-NEXT: bnez a2, .LBB3_13
-; RV64-NEXT: .LBB3_5: # %else11
-; RV64-NEXT: andi a2, a1, 32
-; RV64-NEXT: bnez a2, .LBB3_14
-; RV64-NEXT: .LBB3_6: # %else14
-; RV64-NEXT: andi a2, a1, 64
-; RV64-NEXT: bnez a2, .LBB3_15
-; RV64-NEXT: .LBB3_7: # %else17
-; RV64-NEXT: andi a1, a1, -128
-; RV64-NEXT: bnez a1, .LBB3_16
-; RV64-NEXT: .LBB3_8: # %else20
-; RV64-NEXT: ret
-; RV64-NEXT: .LBB3_9: # %cond.store
-; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64-NEXT: vse16.v v8, (a0)
-; RV64-NEXT: addi a0, a0, 2
-; RV64-NEXT: andi a2, a1, 2
-; RV64-NEXT: beqz a2, .LBB3_2
-; RV64-NEXT: .LBB3_10: # %cond.store1
-; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64-NEXT: vslidedown.vi v9, v8, 1
-; RV64-NEXT: vse16.v v9, (a0)
-; RV64-NEXT: addi a0, a0, 2
-; RV64-NEXT: andi a2, a1, 4
-; RV64-NEXT: beqz a2, .LBB3_3
-; RV64-NEXT: .LBB3_11: # %cond.store4
-; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64-NEXT: vslidedown.vi v9, v8, 2
-; RV64-NEXT: vse16.v v9, (a0)
-; RV64-NEXT: addi a0, a0, 2
-; RV64-NEXT: andi a2, a1, 8
-; RV64-NEXT: beqz a2, .LBB3_4
-; RV64-NEXT: .LBB3_12: # %cond.store7
-; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64-NEXT: vslidedown.vi v9, v8, 3
+; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV64-NEXT: vcompress.vm v9, v8, v0
+; RV64-NEXT: vcpop.m a1, v0
+; RV64-NEXT: vsetvli zero, a1, e16, m1, ta, ma
; RV64-NEXT: vse16.v v9, (a0)
-; RV64-NEXT: addi a0, a0, 2
-; RV64-NEXT: andi a2, a1, 16
-; RV64-NEXT: beqz a2, .LBB3_5
-; RV64-NEXT: .LBB3_13: # %cond.store10
-; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64-NEXT: vslidedown.vi v9, v8, 4
-; RV64-NEXT: vse16.v v9, (a0)
-; RV64-NEXT: addi a0, a0, 2
-; RV64-NEXT: andi a2, a1, 32
-; RV64-NEXT: beqz a2, .LBB3_6
-; RV64-NEXT: .LBB3_14: # %cond.store13
-; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64-NEXT: vslidedown.vi v9, v8, 5
-; RV64-NEXT: vse16.v v9, (a0)
-; RV64-NEXT: addi a0, a0, 2
-; RV64-NEXT: andi a2, a1, 64
-; RV64-NEXT: beqz a2, .LBB3_7
-; RV64-NEXT: .LBB3_15: # %cond.store16
-; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64-NEXT: vslidedown.vi v9, v8, 6
-; RV64-NEXT: vse16.v v9, (a0)
-; RV64-NEXT: addi a0, a0, 2
-; RV64-NEXT: andi a1, a1, -128
-; RV64-NEXT: beqz a1, .LBB3_8
-; RV64-NEXT: .LBB3_16: # %cond.store19
-; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64-NEXT: vslidedown.vi v8, v8, 7
-; RV64-NEXT: vse16.v v8, (a0)
; RV64-NEXT: ret
call void @llvm.masked.compressstore.v8f16(<8 x half> %v, ptr align 2 %base, <8 x i1> %mask)
ret void
@@ -346,24 +98,20 @@ declare void @llvm.masked.compressstore.v1f32(<1 x float>, ptr, <1 x i1>)
define void @compressstore_v1f32(ptr %base, <1 x float> %v, <1 x i1> %mask) {
; RV32-LABEL: compressstore_v1f32:
; RV32: # %bb.0:
-; RV32-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
-; RV32-NEXT: vfirst.m a1, v0
-; RV32-NEXT: bnez a1, .LBB4_2
-; RV32-NEXT: # %bb.1: # %cond.store
; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; RV32-NEXT: vse32.v v8, (a0)
-; RV32-NEXT: .LBB4_2: # %else
+; RV32-NEXT: vcompress.vm v9, v8, v0
+; RV32-NEXT: vcpop.m a1, v0
+; RV32-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; RV32-NEXT: vse32.v v9, (a0)
; RV32-NEXT: ret
;
; RV64-LABEL: compressstore_v1f32:
; RV64: # %bb.0:
-; RV64-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
-; RV64-NEXT: vfirst.m a1, v0
-; RV64-NEXT: bnez a1, .LBB4_2
-; RV64-NEXT: # %bb.1: # %cond.store
; RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; RV64-NEXT: vse32.v v8, (a0)
-; RV64-NEXT: .LBB4_2: # %else
+; RV64-NEXT: vcompress.vm v9, v8, v0
+; RV64-NEXT: vcpop.m a1, v0
+; RV64-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; RV64-NEXT: vse32.v v9, (a0)
; RV64-NEXT: ret
call void @llvm.masked.compressstore.v1f32(<1 x float> %v, ptr align 4 %base, <1 x i1> %mask)
ret void
@@ -373,48 +121,20 @@ declare void @llvm.masked.compressstore.v2f32(<2 x float>, ptr, <2 x i1>)
define void @compressstore_v2f32(ptr %base, <2 x float> %v, <2 x i1> %mask) {
; RV32-LABEL: compressstore_v2f32:
; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV32-NEXT: vmv.x.s a1, v0
-; RV32-NEXT: andi a2, a1, 1
-; RV32-NEXT: bnez a2, .LBB5_3
-; RV32-NEXT: # %bb.1: # %else
-; RV32-NEXT: andi a1, a1, 2
-; RV32-NEXT: bnez a1, .LBB5_4
-; RV32-NEXT: .LBB5_2: # %else2
-; RV32-NEXT: ret
-; RV32-NEXT: .LBB5_3: # %cond.store
-; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; RV32-NEXT: vse32.v v8, (a0)
-; RV32-NEXT: addi a0, a0, 4
-; RV32-NEXT: andi a1, a1, 2
-; RV32-NEXT: beqz a1, .LBB5_2
-; RV32-NEXT: .LBB5_4: # %cond.store1
-; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v8, 1
-; RV32-NEXT: vse32.v v8, (a0)
+; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; RV32-NEXT: vcompress.vm v9, v8, v0
+; RV32-NEXT: vcpop.m a1, v0
+; RV32-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; RV32-NEXT: vse32.v v9, (a0)
; RV32-NEXT: ret
;
; RV64-LABEL: compressstore_v2f32:
; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64-NEXT: vmv.x.s a1, v0
-; RV64-NEXT: andi a2, a1, 1
-; RV64-NEXT: bnez a2, .LBB5_3
-; RV64-NEXT: # %bb.1: # %else
-; RV64-NEXT: andi a1, a1, 2
-; RV64-NEXT: bnez a1, .LBB5_4
-; RV64-NEXT: .LBB5_2: # %else2
-; RV64-NEXT: ret
-; RV64-NEXT: .LBB5_3: # %cond.store
-; RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; RV64-NEXT: vse32.v v8, (a0)
-; RV64-NEXT: addi a0, a0, 4
-; RV64-NEXT: andi a1, a1, 2
-; RV64-NEXT: beqz a1, .LBB5_2
-; RV64-NEXT: .LBB5_4: # %cond.store1
-; RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; RV64-NEXT: vslidedown.vi v8, v8, 1
-; RV64-NEXT: vse32.v v8, (a0)
+; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; RV64-NEXT: vcompress.vm v9, v8, v0
+; RV64-NEXT: vcpop.m a1, v0
+; RV64-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; RV64-NEXT: vse32.v v9, (a0)
; RV64-NEXT: ret
call void @llvm.masked.compressstore.v2f32(<2 x float> %v, ptr align 4 %base, <2 x i1> %mask)
ret void
@@ -424,88 +144,20 @@ declare void @llvm.masked.compressstore.v4f32(<4 x float>, ptr, <4 x i1>)
define void @compressstore_v4f32(ptr %base, <4 x float> %v, <4 x i1> %mask) {
; RV32-LABEL: compressstore_v4f32:
; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV32-NEXT: vmv.x.s a1, v0
-; RV32-NEXT: andi a2, a1, 1
-; RV32-NEXT: bnez a2, .LBB6_5
-; RV32-NEXT: # %bb.1: # %else
-; RV32-NEXT: andi a2, a1, 2
-; RV32-NEXT: bnez a2, .LBB6_6
-; RV32-NEXT: .LBB6_2: # %else2
-; RV32-NEXT: andi a2, a1, 4
-; RV32-NEXT: bnez a2, .LBB6_7
-; RV32-NEXT: .LBB6_3: # %else5
-; RV32-NEXT: andi a1, a1, 8
-; RV32-NEXT: bnez a1, .LBB6_8
-; RV32-NEXT: .LBB6_4: # %else8
-; RV32-NEXT: ret
-; RV32-NEXT: .LBB6_5: # %cond.store
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vse32.v v8, (a0)
-; RV32-NEXT: addi a0, a0, 4
-; RV32-NEXT: andi a2, a1, 2
-; RV32-NEXT: beqz a2, .LBB6_2
-; RV32-NEXT: .LBB6_6: # %cond.store1
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v8, 1
-; RV32-NEXT: vse32.v v9, (a0)
-; RV32-NEXT: addi a0, a0, 4
-; RV32-NEXT: andi a2, a1, 4
-; RV32-NEXT: beqz a2, .LBB6_3
-; RV32-NEXT: .LBB6_7: # %cond.store4
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v8, 2
+; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV32-NEXT: vcompress.vm v9, v8, v0
+; RV32-NEXT: vcpop.m a1, v0
+; RV32-NEXT: vsetvli zero, a1, e32, m1, ta, ma
; RV32-NEXT: vse32.v v9, (a0)
-; RV32-NEXT: addi a0, a0, 4
-; RV32-NEXT: andi a1, a1, 8
-; RV32-NEXT: beqz a1, .LBB6_4
-; RV32-NEXT: .LBB6_8: # %cond.store7
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v8, 3
-; RV32-NEXT: vse32.v v8, (a0)
; RV32-NEXT: ret
;
; RV64-LABEL: compressstore_v4f32:
; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64-NEXT: vmv.x.s a1, v0
-; RV64-NEXT: andi a2, a1, 1
-; RV64-NEXT: bnez a2, .LBB6_5
-; RV64-NEXT: # %bb.1: # %else
-; RV64-NEXT: andi a2, a1, 2
-; RV64-NEXT: bnez a2, .LBB6_6
-; RV64-NEXT: .LBB6_2: # %else2
-; RV64-NEXT: andi a2, a1, 4
-; RV64-NEXT: bnez a2, .LBB6_7
-; RV64-NEXT: .LBB6_3: # %else5
-; RV64-NEXT: andi a1, a1, 8
-; RV64-NEXT: bnez a1, .LBB6_8
-; RV64-NEXT: .LBB6_4: # %else8
-; RV64-NEXT: ret
-; RV64-NEXT: .LBB6_5: # %cond.store
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT: vse32.v v8, (a0)
-; RV64-NEXT: addi a0, a0, 4
-; RV64-NEXT: andi a2, a1, 2
-; RV64-NEXT: beqz a2, .LBB6_2
-; RV64-NEXT: .LBB6_6: # %cond.store1
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT: vslidedown.vi v9, v8, 1
-; RV64-NEXT: vse32.v v9, (a0)
-; RV64-NEXT: addi a0, a0, 4
-; RV64-NEXT: andi a2, a1, 4
-; RV64-NEXT: beqz a2, .LBB6_3
-; RV64-NEXT: .LBB6_7: # %cond.store4
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT: vslidedown.vi v9, v8, 2
+; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV64-NEXT: vcompress.vm v9, v8, v0
+; RV64-NEXT: vcpop.m a1, v0
+; RV64-NEXT: vsetvli zero, a1, e32, m1, ta, ma
; RV64-NEXT: vse32.v v9, (a0)
-; RV64-NEXT: addi a0, a0, 4
-; RV64-NEXT: andi a1, a1, 8
-; RV64-NEXT: beqz a1, .LBB6_4
-; RV64-NEXT: .LBB6_8: # %cond.store7
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT: vslidedown.vi v8, v8, 3
-; RV64-NEXT: vse32.v v8, (a0)
; RV64-NEXT: ret
call void @llvm.masked.compressstore.v4f32(<4 x float> %v, ptr align 4 %base, <4 x i1> %mask)
ret void
@@ -515,176 +167,20 @@ declare void @llvm.masked.compressstore.v8f32(<8 x float>, ptr, <8 x i1>)
define void @compressstore_v8f32(ptr %base, <8 x float> %v, <8 x i1> %mask) {
; RV32-LABEL: compressstore_v8f32:
; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV32-NEXT: vmv.x.s a1, v0
-; RV32-NEXT: andi a2, a1, 1
-; RV32-NEXT: bnez a2, .LBB7_9
-; RV32-NEXT: # %bb.1: # %else
-; RV32-NEXT: andi a2, a1, 2
-; RV32-NEXT: bnez a2, .LBB7_10
-; RV32-NEXT: .LBB7_2: # %else2
-; RV32-NEXT: andi a2, a1, 4
-; RV32-NEXT: bnez a2, .LBB7_11
-; RV32-NEXT: .LBB7_3: # %else5
-; RV32-NEXT: andi a2, a1, 8
-; RV32-NEXT: bnez a2, .LBB7_12
-; RV32-NEXT: .LBB7_4: # %else8
-; RV32-NEXT: andi a2, a1, 16
-; RV32-NEXT: bnez a2, .LBB7_13
-; RV32-NEXT: .LBB7_5: # %else11
-; RV32-NEXT: andi a2, a1, 32
-; RV32-NEXT: bnez a2, .LBB7_14
-; RV32-NEXT: .LBB7_6: # %else14
-; RV32-NEXT: andi a2, a1, 64
-; RV32-NEXT: bnez a2, .LBB7_15
-; RV32-NEXT: .LBB7_7: # %else17
-; RV32-NEXT: andi a1, a1, -128
-; RV32-NEXT: bnez a1, .LBB7_16
-; RV32-NEXT: .LBB7_8: # %else20
-; RV32-NEXT: ret
-; RV32-NEXT: .LBB7_9: # %cond.store
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vse32.v v8, (a0)
-; RV32-NEXT: addi a0, a0, 4
-; RV32-NEXT: andi a2, a1, 2
-; RV32-NEXT: beqz a2, .LBB7_2
-; RV32-NEXT: .LBB7_10: # %cond.store1
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v10, v8, 1
-; RV32-NEXT: vse32.v v10, (a0)
-; RV32-NEXT: addi a0, a0, 4
-; RV32-NEXT: andi a2, a1, 4
-; RV32-NEXT: beqz a2, .LBB7_3
-; RV32-NEXT: .LBB7_11: # %cond.store4
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v10, v8, 2
-; RV32-NEXT: vse32.v v10, (a0)
-; RV32-NEXT: addi a0, a0, 4
-; RV32-NEXT: andi a2, a1, 8
-; RV32-NEXT: beqz a2, .LBB7_4
-; RV32-NEXT: .LBB7_12: # %cond.store7
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v10, v8, 3
-; RV32-NEXT: vse32.v v10, (a0)
-; RV32-NEXT: addi a0, a0, 4
-; RV32-NEXT: andi a2, a1, 16
-; RV32-NEXT: beqz a2, .LBB7_5
-; RV32-NEXT: .LBB7_13: # %cond.store10
-; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV32-NEXT: vslidedown.vi v10, v8, 4
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vse32.v v10, (a0)
-; RV32-NEXT: addi a0, a0, 4
-; RV32-NEXT: andi a2, a1, 32
-; RV32-NEXT: beqz a2, .LBB7_6
-; RV32-NEXT: .LBB7_14: # %cond.store13
-; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV32-NEXT: vslidedown.vi v10, v8, 5
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vse32.v v10, (a0)
-; RV32-NEXT: addi a0, a0, 4
-; RV32-NEXT: andi a2, a1, 64
-; RV32-NEXT: beqz a2, .LBB7_7
-; RV32-NEXT: .LBB7_15: # %cond.store16
-; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV32-NEXT: vslidedown.vi v10, v8, 6
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV32-NEXT: vcompress.vm v10, v8, v0
+; RV32-NEXT: vcpop.m a1, v0
+; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma
; RV32-NEXT: vse32.v v10, (a0)
-; RV32-NEXT: addi a0, a0, 4
-; RV32-NEXT: andi a1, a1, -128
-; RV32-NEXT: beqz a1, .LBB7_8
-; RV32-NEXT: .LBB7_16: # %cond.store19
-; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v8, 7
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vse32.v v8, (a0)
; RV32-NEXT: ret
;
; RV64-LABEL: compressstore_v8f32:
; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64-NEXT: vmv.x.s a1, v0
-; RV64-NEXT: andi a2, a1, 1
-; RV64-NEXT: bnez a2, .LBB7_9
-; RV64-NEXT: # %bb.1: # %else
-; RV64-NEXT: andi a2, a1, 2
-; RV64-NEXT: bnez a2, .LBB7_10
-; RV64-NEXT: .LBB7_2: # %else2
-; RV64-NEXT: andi a2, a1, 4
-; RV64-NEXT: bnez a2, .LBB7_11
-; RV64-NEXT: .LBB7_3: # %else5
-; RV64-NEXT: andi a2, a1, 8
-; RV64-NEXT: bnez a2, .LBB7_12
-; RV64-NEXT: .LBB7_4: # %else8
-; RV64-NEXT: andi a2, a1, 16
-; RV64-NEXT: bnez a2, .LBB7_13
-; RV64-NEXT: .LBB7_5: # %else11
-; RV64-NEXT: andi a2, a1, 32
-; RV64-NEXT: bnez a2, .LBB7_14
-; RV64-NEXT: .LBB7_6: # %else14
-; RV64-NEXT: andi a2, a1, 64
-; RV64-NEXT: bnez a2, .LBB7_15
-; RV64-NEXT: .LBB7_7: # %else17
-; RV64-NEXT: andi a1, a1, -128
-; RV64-NEXT: bnez a1, .LBB7_16
-; RV64-NEXT: .LBB7_8: # %else20
-; RV64-NEXT: ret
-; RV64-NEXT: .LBB7_9: # %cond.store
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT: vse32.v v8, (a0)
-; RV64-NEXT: addi a0, a0, 4
-; RV64-NEXT: andi a2, a1, 2
-; RV64-NEXT: beqz a2, .LBB7_2
-; RV64-NEXT: .LBB7_10: # %cond.store1
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT: vslidedown.vi v10, v8, 1
-; RV64-NEXT: vse32.v v10, (a0)
-; RV64-NEXT: addi a0, a0, 4
-; RV64-NEXT: andi a2, a1, 4
-; RV64-NEXT: beqz a2, .LBB7_3
-; RV64-NEXT: .LBB7_11: # %cond.store4
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT: vslidedown.vi v10, v8, 2
-; RV64-NEXT: vse32.v v10, (a0)
-; RV64-NEXT: addi a0, a0, 4
-; RV64-NEXT: andi a2, a1, 8
-; RV64-NEXT: beqz a2, .LBB7_4
-; RV64-NEXT: .LBB7_12: # %cond.store7
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT: vslidedown.vi v10, v8, 3
-; RV64-NEXT: vse32.v v10, (a0)
-; RV64-NEXT: addi a0, a0, 4
-; RV64-NEXT: andi a2, a1, 16
-; RV64-NEXT: beqz a2, .LBB7_5
-; RV64-NEXT: .LBB7_13: # %cond.store10
-; RV64-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV64-NEXT: vslidedown.vi v10, v8, 4
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT: vse32.v v10, (a0)
-; RV64-NEXT: addi a0, a0, 4
-; RV64-NEXT: andi a2, a1, 32
-; RV64-NEXT: beqz a2, .LBB7_6
-; RV64-NEXT: .LBB7_14: # %cond.store13
-; RV64-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV64-NEXT: vslidedown.vi v10, v8, 5
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT: vse32.v v10, (a0)
-; RV64-NEXT: addi a0, a0, 4
-; RV64-NEXT: andi a2, a1, 64
-; RV64-NEXT: beqz a2, .LBB7_7
-; RV64-NEXT: .LBB7_15: # %cond.store16
-; RV64-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV64-NEXT: vslidedown.vi v10, v8, 6
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV64-NEXT: vcompress.vm v10, v8, v0
+; RV64-NEXT: vcpop.m a1, v0
+; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma
; RV64-NEXT: vse32.v v10, (a0)
-; RV64-NEXT: addi a0, a0, 4
-; RV64-NEXT: andi a1, a1, -128
-; RV64-NEXT: beqz a1, .LBB7_8
-; RV64-NEXT: .LBB7_16: # %cond.store19
-; RV64-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV64-NEXT: vslidedown.vi v8, v8, 7
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT: vse32.v v8, (a0)
; RV64-NEXT: ret
call void @llvm.masked.compressstore.v8f32(<8 x float> %v, ptr align 4 %base, <8 x i1> %mask)
ret void
@@ -694,24 +190,20 @@ declare void @llvm.masked.compressstore.v1f64(<1 x double>, ptr, <1 x i1>)
define void @compressstore_v1f64(ptr %base, <1 x double> %v, <1 x i1> %mask) {
; RV32-LABEL: compressstore_v1f64:
; RV32: # %bb.0:
-; RV32-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
-; RV32-NEXT: vfirst.m a1, v0
-; RV32-NEXT: bnez a1, .LBB8_2
-; RV32-NEXT: # %bb.1: # %cond.store
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV32-NEXT: vse64.v v8, (a0)
-; RV32-NEXT: .LBB8_2: # %else
+; RV32-NEXT: vcompress.vm v9, v8, v0
+; RV32-NEXT: vcpop.m a1, v0
+; RV32-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; RV32-NEXT: vse64.v v9, (a0)
; RV32-NEXT: ret
;
; RV64-LABEL: compressstore_v1f64:
; RV64: # %bb.0:
-; RV64-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
-; RV64-NEXT: vfirst.m a1, v0
-; RV64-NEXT: bnez a1, .LBB8_2
-; RV64-NEXT: # %bb.1: # %cond.store
; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vse64.v v8, (a0)
-; RV64-NEXT: .LBB8_2: # %else
+; RV64-NEXT: vcompress.vm v9, v8, v0
+; RV64-NEXT: vcpop.m a1, v0
+; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; RV64-NEXT: vse64.v v9, (a0)
; RV64-NEXT: ret
call void @llvm.masked.compressstore.v1f64(<1 x double> %v, ptr align 8 %base, <1 x i1> %mask)
ret void
@@ -721,48 +213,20 @@ declare void @llvm.masked.compressstore.v2f64(<2 x double>, ptr, <2 x i1>)
define void @compressstore_v2f64(ptr %base, <2 x double> %v, <2 x i1> %mask) {
; RV32-LABEL: compressstore_v2f64:
; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV32-NEXT: vmv.x.s a1, v0
-; RV32-NEXT: andi a2, a1, 1
-; RV32-NEXT: bnez a2, .LBB9_3
-; RV32-NEXT: # %bb.1: # %else
-; RV32-NEXT: andi a1, a1, 2
-; RV32-NEXT: bnez a1, .LBB9_4
-; RV32-NEXT: .LBB9_2: # %else2
-; RV32-NEXT: ret
-; RV32-NEXT: .LBB9_3: # %cond.store
-; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV32-NEXT: vse64.v v8, (a0)
-; RV32-NEXT: addi a0, a0, 8
-; RV32-NEXT: andi a1, a1, 2
-; RV32-NEXT: beqz a1, .LBB9_2
-; RV32-NEXT: .LBB9_4: # %cond.store1
-; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v8, 1
-; RV32-NEXT: vse64.v v8, (a0)
+; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; RV32-NEXT: vcompress.vm v9, v8, v0
+; RV32-NEXT: vcpop.m a1, v0
+; RV32-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; RV32-NEXT: vse64.v v9, (a0)
; RV32-NEXT: ret
;
; RV64-LABEL: compressstore_v2f64:
; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64-NEXT: vmv.x.s a1, v0
-; RV64-NEXT: andi a2, a1, 1
-; RV64-NEXT: bnez a2, .LBB9_3
-; RV64-NEXT: # %bb.1: # %else
-; RV64-NEXT: andi a1, a1, 2
-; RV64-NEXT: bnez a1, .LBB9_4
-; RV64-NEXT: .LBB9_2: # %else2
-; RV64-NEXT: ret
-; RV64-NEXT: .LBB9_3: # %cond.store
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vse64.v v8, (a0)
-; RV64-NEXT: addi a0, a0, 8
-; RV64-NEXT: andi a1, a1, 2
-; RV64-NEXT: beqz a1, .LBB9_2
-; RV64-NEXT: .LBB9_4: # %cond.store1
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vslidedown.vi v8, v8, 1
-; RV64-NEXT: vse64.v v8, (a0)
+; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; RV64-NEXT: vcompress.vm v9, v8, v0
+; RV64-NEXT: vcpop.m a1, v0
+; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; RV64-NEXT: vse64.v v9, (a0)
; RV64-NEXT: ret
call void @llvm.masked.compressstore.v2f64(<2 x double> %v, ptr align 8 %base, <2 x i1> %mask)
ret void
@@ -772,92 +236,20 @@ declare void @llvm.masked.compressstore.v4f64(<4 x double>, ptr, <4 x i1>)
define void @compressstore_v4f64(ptr %base, <4 x double> %v, <4 x i1> %mask) {
; RV32-LABEL: compressstore_v4f64:
; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV32-NEXT: vmv.x.s a1, v0
-; RV32-NEXT: andi a2, a1, 1
-; RV32-NEXT: bnez a2, .LBB10_5
-; RV32-NEXT: # %bb.1: # %else
-; RV32-NEXT: andi a2, a1, 2
-; RV32-NEXT: bnez a2, .LBB10_6
-; RV32-NEXT: .LBB10_2: # %else2
-; RV32-NEXT: andi a2, a1, 4
-; RV32-NEXT: bnez a2, .LBB10_7
-; RV32-NEXT: .LBB10_3: # %else5
-; RV32-NEXT: andi a1, a1, 8
-; RV32-NEXT: bnez a1, .LBB10_8
-; RV32-NEXT: .LBB10_4: # %else8
-; RV32-NEXT: ret
-; RV32-NEXT: .LBB10_5: # %cond.store
-; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV32-NEXT: vse64.v v8, (a0)
-; RV32-NEXT: addi a0, a0, 8
-; RV32-NEXT: andi a2, a1, 2
-; RV32-NEXT: beqz a2, .LBB10_2
-; RV32-NEXT: .LBB10_6: # %cond.store1
-; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v10, v8, 1
+; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV32-NEXT: vcompress.vm v10, v8, v0
+; RV32-NEXT: vcpop.m a1, v0
+; RV32-NEXT: vsetvli zero, a1, e64, m2, ta, ma
; RV32-NEXT: vse64.v v10, (a0)
-; RV32-NEXT: addi a0, a0, 8
-; RV32-NEXT: andi a2, a1, 4
-; RV32-NEXT: beqz a2, .LBB10_3
-; RV32-NEXT: .LBB10_7: # %cond.store4
-; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV32-NEXT: vslidedown.vi v10, v8, 2
-; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV32-NEXT: vse64.v v10, (a0)
-; RV32-NEXT: addi a0, a0, 8
-; RV32-NEXT: andi a1, a1, 8
-; RV32-NEXT: beqz a1, .LBB10_4
-; RV32-NEXT: .LBB10_8: # %cond.store7
-; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v8, 3
-; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV32-NEXT: vse64.v v8, (a0)
; RV32-NEXT: ret
;
; RV64-LABEL: compressstore_v4f64:
; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64-NEXT: vmv.x.s a1, v0
-; RV64-NEXT: andi a2, a1, 1
-; RV64-NEXT: bnez a2, .LBB10_5
-; RV64-NEXT: # %bb.1: # %else
-; RV64-NEXT: andi a2, a1, 2
-; RV64-NEXT: bnez a2, .LBB10_6
-; RV64-NEXT: .LBB10_2: # %else2
-; RV64-NEXT: andi a2, a1, 4
-; RV64-NEXT: bnez a2, .LBB10_7
-; RV64-NEXT: .LBB10_3: # %else5
-; RV64-NEXT: andi a1, a1, 8
-; RV64-NEXT: bnez a1, .LBB10_8
-; RV64-NEXT: .LBB10_4: # %else8
-; RV64-NEXT: ret
-; RV64-NEXT: .LBB10_5: # %cond.store
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vse64.v v8, (a0)
-; RV64-NEXT: addi a0, a0, 8
-; RV64-NEXT: andi a2, a1, 2
-; RV64-NEXT: beqz a2, .LBB10_2
-; RV64-NEXT: .LBB10_6: # %cond.store1
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vslidedown.vi v10, v8, 1
-; RV64-NEXT: vse64.v v10, (a0)
-; RV64-NEXT: addi a0, a0, 8
-; RV64-NEXT: andi a2, a1, 4
-; RV64-NEXT: beqz a2, .LBB10_3
-; RV64-NEXT: .LBB10_7: # %cond.store4
-; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV64-NEXT: vslidedown.vi v10, v8, 2
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT: vcompress.vm v10, v8, v0
+; RV64-NEXT: vcpop.m a1, v0
+; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma
; RV64-NEXT: vse64.v v10, (a0)
-; RV64-NEXT: addi a0, a0, 8
-; RV64-NEXT: andi a1, a1, 8
-; RV64-NEXT: beqz a1, .LBB10_4
-; RV64-NEXT: .LBB10_8: # %cond.store7
-; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV64-NEXT: vslidedown.vi v8, v8, 3
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vse64.v v8, (a0)
; RV64-NEXT: ret
call void @llvm.masked.compressstore.v4f64(<4 x double> %v, ptr align 8 %base, <4 x i1> %mask)
ret void
@@ -867,213 +259,21 @@ declare void @llvm.masked.compressstore.v8f64(<8 x double>, ptr, <8 x i1>)
define void @compressstore_v8f64(ptr %base, <8 x double> %v, <8 x i1> %mask) {
; RV32-LABEL: compressstore_v8f64:
; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV32-NEXT: vmv.x.s a1, v0
-; RV32-NEXT: andi a2, a1, 1
-; RV32-NEXT: bnez a2, .LBB11_11
-; RV32-NEXT: # %bb.1: # %else
-; RV32-NEXT: andi a2, a1, 2
-; RV32-NEXT: bnez a2, .LBB11_12
-; RV32-NEXT: .LBB11_2: # %else2
-; RV32-NEXT: andi a2, a1, 4
-; RV32-NEXT: bnez a2, .LBB11_13
-; RV32-NEXT: .LBB11_3: # %else5
-; RV32-NEXT: andi a2, a1, 8
-; RV32-NEXT: beqz a2, .LBB11_5
-; RV32-NEXT: .LBB11_4: # %cond.store7
-; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV32-NEXT: vslidedown.vi v12, v8, 3
-; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV32-NEXT: vse64.v v12, (a0)
-; RV32-NEXT: addi a0, a0, 8
-; RV32-NEXT: .LBB11_5: # %else8
-; RV32-NEXT: addi sp, sp, -320
-; RV32-NEXT: .cfi_def_cfa_offset 320
-; RV32-NEXT: sw ra, 316(sp) # 4-byte Folded Spill
-; RV32-NEXT: sw s0, 312(sp) # 4-byte Folded Spill
-; RV32-NEXT: .cfi_offset ra, -4
-; RV32-NEXT: .cfi_offset s0, -8
-; RV32-NEXT: addi s0, sp, 320
-; RV32-NEXT: .cfi_def_cfa s0, 0
-; RV32-NEXT: andi sp, sp, -64
-; RV32-NEXT: andi a2, a1, 16
-; RV32-NEXT: bnez a2, .LBB11_14
-; RV32-NEXT: # %bb.6: # %else11
-; RV32-NEXT: andi a2, a1, 32
-; RV32-NEXT: bnez a2, .LBB11_15
-; RV32-NEXT: .LBB11_7: # %else14
-; RV32-NEXT: andi a2, a1, 64
-; RV32-NEXT: bnez a2, .LBB11_16
-; RV32-NEXT: .LBB11_8: # %else17
-; RV32-NEXT: andi a1, a1, -128
-; RV32-NEXT: beqz a1, .LBB11_10
-; RV32-NEXT: .LBB11_9: # %cond.store19
-; RV32-NEXT: mv a1, sp
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT: vse64.v v8, (a1)
-; RV32-NEXT: fld fa5, 56(sp)
-; RV32-NEXT: fsd fa5, 0(a0)
-; RV32-NEXT: .LBB11_10: # %else20
-; RV32-NEXT: addi sp, s0, -320
-; RV32-NEXT: lw ra, 316(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw s0, 312(sp) # 4-byte Folded Reload
-; RV32-NEXT: addi sp, sp, 320
-; RV32-NEXT: ret
-; RV32-NEXT: .LBB11_11: # %cond.store
-; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV32-NEXT: vse64.v v8, (a0)
-; RV32-NEXT: addi a0, a0, 8
-; RV32-NEXT: andi a2, a1, 2
-; RV32-NEXT: beqz a2, .LBB11_2
-; RV32-NEXT: .LBB11_12: # %cond.store1
-; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v12, v8, 1
+; RV32-NEXT: vcompress.vm v12, v8, v0
+; RV32-NEXT: vcpop.m a1, v0
+; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma
; RV32-NEXT: vse64.v v12, (a0)
-; RV32-NEXT: addi a0, a0, 8
-; RV32-NEXT: andi a2, a1, 4
-; RV32-NEXT: beqz a2, .LBB11_3
-; RV32-NEXT: .LBB11_13: # %cond.store4
-; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV32-NEXT: vslidedown.vi v12, v8, 2
-; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV32-NEXT: vse64.v v12, (a0)
-; RV32-NEXT: addi a0, a0, 8
-; RV32-NEXT: andi a2, a1, 8
-; RV32-NEXT: bnez a2, .LBB11_4
-; RV32-NEXT: j .LBB11_5
-; RV32-NEXT: .LBB11_14: # %cond.store10
-; RV32-NEXT: addi a2, sp, 192
-; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT: vse64.v v8, (a2)
-; RV32-NEXT: fld fa5, 224(sp)
-; RV32-NEXT: fsd fa5, 0(a0)
-; RV32-NEXT: addi a0, a0, 8
-; RV32-NEXT: andi a2, a1, 32
-; RV32-NEXT: beqz a2, .LBB11_7
-; RV32-NEXT: .LBB11_15: # %cond.store13
-; RV32-NEXT: addi a2, sp, 128
-; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT: vse64.v v8, (a2)
-; RV32-NEXT: fld fa5, 168(sp)
-; RV32-NEXT: fsd fa5, 0(a0)
-; RV32-NEXT: addi a0, a0, 8
-; RV32-NEXT: andi a2, a1, 64
-; RV32-NEXT: beqz a2, .LBB11_8
-; RV32-NEXT: .LBB11_16: # %cond.store16
-; RV32-NEXT: addi a2, sp, 64
-; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT: vse64.v v8, (a2)
-; RV32-NEXT: fld fa5, 112(sp)
-; RV32-NEXT: fsd fa5, 0(a0)
-; RV32-NEXT: addi a0, a0, 8
-; RV32-NEXT: andi a1, a1, -128
-; RV32-NEXT: bnez a1, .LBB11_9
-; RV32-NEXT: j .LBB11_10
+; RV32-NEXT: ret
;
; RV64-LABEL: compressstore_v8f64:
; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64-NEXT: vmv.x.s a1, v0
-; RV64-NEXT: andi a2, a1, 1
-; RV64-NEXT: bnez a2, .LBB11_11
-; RV64-NEXT: # %bb.1: # %else
-; RV64-NEXT: andi a2, a1, 2
-; RV64-NEXT: bnez a2, .LBB11_12
-; RV64-NEXT: .LBB11_2: # %else2
-; RV64-NEXT: andi a2, a1, 4
-; RV64-NEXT: bnez a2, .LBB11_13
-; RV64-NEXT: .LBB11_3: # %else5
-; RV64-NEXT: andi a2, a1, 8
-; RV64-NEXT: beqz a2, .LBB11_5
-; RV64-NEXT: .LBB11_4: # %cond.store7
-; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV64-NEXT: vslidedown.vi v12, v8, 3
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vse64.v v12, (a0)
-; RV64-NEXT: addi a0, a0, 8
-; RV64-NEXT: .LBB11_5: # %else8
-; RV64-NEXT: addi sp, sp, -320
-; RV64-NEXT: .cfi_def_cfa_offset 320
-; RV64-NEXT: sd ra, 312(sp) # 8-byte Folded Spill
-; RV64-NEXT: sd s0, 304(sp) # 8-byte Folded Spill
-; RV64-NEXT: .cfi_offset ra, -8
-; RV64-NEXT: .cfi_offset s0, -16
-; RV64-NEXT: addi s0, sp, 320
-; RV64-NEXT: .cfi_def_cfa s0, 0
-; RV64-NEXT: andi sp, sp, -64
-; RV64-NEXT: andi a2, a1, 16
-; RV64-NEXT: bnez a2, .LBB11_14
-; RV64-NEXT: # %bb.6: # %else11
-; RV64-NEXT: andi a2, a1, 32
-; RV64-NEXT: bnez a2, .LBB11_15
-; RV64-NEXT: .LBB11_7: # %else14
-; RV64-NEXT: andi a2, a1, 64
-; RV64-NEXT: bnez a2, .LBB11_16
-; RV64-NEXT: .LBB11_8: # %else17
-; RV64-NEXT: andi a1, a1, -128
-; RV64-NEXT: beqz a1, .LBB11_10
-; RV64-NEXT: .LBB11_9: # %cond.store19
-; RV64-NEXT: mv a1, sp
; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64-NEXT: vse64.v v8, (a1)
-; RV64-NEXT: fld fa5, 56(sp)
-; RV64-NEXT: fsd fa5, 0(a0)
-; RV64-NEXT: .LBB11_10: # %else20
-; RV64-NEXT: addi sp, s0, -320
-; RV64-NEXT: ld ra, 312(sp) # 8-byte Folded Reload
-; RV64-NEXT: ld s0, 304(sp) # 8-byte Folded Reload
-; RV64-NEXT: addi sp, sp, 320
-; RV64-NEXT: ret
-; RV64-NEXT: .LBB11_11: # %cond.store
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vse64.v v8, (a0)
-; RV64-NEXT: addi a0, a0, 8
-; RV64-NEXT: andi a2, a1, 2
-; RV64-NEXT: beqz a2, .LBB11_2
-; RV64-NEXT: .LBB11_12: # %cond.store1
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vslidedown.vi v12, v8, 1
+; RV64-NEXT: vcompress.vm v12, v8, v0
+; RV64-NEXT: vcpop.m a1, v0
+; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
; RV64-NEXT: vse64.v v12, (a0)
-; RV64-NEXT: addi a0, a0, 8
-; RV64-NEXT: andi a2, a1, 4
-; RV64-NEXT: beqz a2, .LBB11_3
-; RV64-NEXT: .LBB11_13: # %cond.store4
-; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV64-NEXT: vslidedown.vi v12, v8, 2
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vse64.v v12, (a0)
-; RV64-NEXT: addi a0, a0, 8
-; RV64-NEXT: andi a2, a1, 8
-; RV64-NEXT: bnez a2, .LBB11_4
-; RV64-NEXT: j .LBB11_5
-; RV64-NEXT: .LBB11_14: # %cond.store10
-; RV64-NEXT: addi a2, sp, 192
-; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64-NEXT: vse64.v v8, (a2)
-; RV64-NEXT: fld fa5, 224(sp)
-; RV64-NEXT: fsd fa5, 0(a0)
-; RV64-NEXT: addi a0, a0, 8
-; RV64-NEXT: andi a2, a1, 32
-; RV64-NEXT: beqz a2, .LBB11_7
-; RV64-NEXT: .LBB11_15: # %cond.store13
-; RV64-NEXT: addi a2, sp, 128
-; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64-NEXT: vse64.v v8, (a2)
-; RV64-NEXT: fld fa5, 168(sp)
-; RV64-NEXT: fsd fa5, 0(a0)
-; RV64-NEXT: addi a0, a0, 8
-; RV64-NEXT: andi a2, a1, 64
-; RV64-NEXT: beqz a2, .LBB11_8
-; RV64-NEXT: .LBB11_16: # %cond.store16
-; RV64-NEXT: addi a2, sp, 64
-; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64-NEXT: vse64.v v8, (a2)
-; RV64-NEXT: fld fa5, 112(sp)
-; RV64-NEXT: fsd fa5, 0(a0)
-; RV64-NEXT: addi a0, a0, 8
-; RV64-NEXT: andi a1, a1, -128
-; RV64-NEXT: bnez a1, .LBB11_9
-; RV64-NEXT: j .LBB11_10
+; RV64-NEXT: ret
call void @llvm.masked.compressstore.v8f64(<8 x double> %v, ptr align 8 %base, <8 x i1> %mask)
ret void
}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-compressstore-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-compressstore-int.ll
index eb0096dbfba6de..a388ba92f302bf 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-compressstore-int.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-compressstore-int.ll
@@ -6,13 +6,11 @@ declare void @llvm.masked.compressstore.v1i8(<1 x i8>, ptr, <1 x i1>)
define void @compressstore_v1i8(ptr %base, <1 x i8> %v, <1 x i1> %mask) {
; CHECK-LABEL: compressstore_v1i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
-; CHECK-NEXT: vfirst.m a1, v0
-; CHECK-NEXT: bnez a1, .LBB0_2
-; CHECK-NEXT: # %bb.1: # %cond.store
; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
-; CHECK-NEXT: vse8.v v8, (a0)
-; CHECK-NEXT: .LBB0_2: # %else
+; CHECK-NEXT: vcompress.vm v9, v8, v0
+; CHECK-NEXT: vcpop.m a1, v0
+; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT: vse8.v v9, (a0)
; CHECK-NEXT: ret
call void @llvm.masked.compressstore.v1i8(<1 x i8> %v, ptr %base, <1 x i1> %mask)
ret void
@@ -22,25 +20,11 @@ declare void @llvm.masked.compressstore.v2i8(<2 x i8>, ptr, <2 x i1>)
define void @compressstore_v2i8(ptr %base, <2 x i8> %v, <2 x i1> %mask) {
; CHECK-LABEL: compressstore_v2i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-NEXT: vmv.x.s a1, v0
-; CHECK-NEXT: andi a2, a1, 1
-; CHECK-NEXT: bnez a2, .LBB1_3
-; CHECK-NEXT: # %bb.1: # %else
-; CHECK-NEXT: andi a1, a1, 2
-; CHECK-NEXT: bnez a1, .LBB1_4
-; CHECK-NEXT: .LBB1_2: # %else2
-; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB1_3: # %cond.store
-; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
-; CHECK-NEXT: vse8.v v8, (a0)
-; CHECK-NEXT: addi a0, a0, 1
-; CHECK-NEXT: andi a1, a1, 2
-; CHECK-NEXT: beqz a1, .LBB1_2
-; CHECK-NEXT: .LBB1_4: # %cond.store1
-; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
-; CHECK-NEXT: vslidedown.vi v8, v8, 1
-; CHECK-NEXT: vse8.v v8, (a0)
+; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
+; CHECK-NEXT: vcompress.vm v9, v8, v0
+; CHECK-NEXT: vcpop.m a1, v0
+; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT: vse8.v v9, (a0)
; CHECK-NEXT: ret
call void @llvm.masked.compressstore.v2i8(<2 x i8> %v, ptr %base, <2 x i1> %mask)
ret void
@@ -50,45 +34,11 @@ declare void @llvm.masked.compressstore.v4i8(<4 x i8>, ptr, <4 x i1>)
define void @compressstore_v4i8(ptr %base, <4 x i8> %v, <4 x i1> %mask) {
; CHECK-LABEL: compressstore_v4i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-NEXT: vmv.x.s a1, v0
-; CHECK-NEXT: andi a2, a1, 1
-; CHECK-NEXT: bnez a2, .LBB2_5
-; CHECK-NEXT: # %bb.1: # %else
-; CHECK-NEXT: andi a2, a1, 2
-; CHECK-NEXT: bnez a2, .LBB2_6
-; CHECK-NEXT: .LBB2_2: # %else2
-; CHECK-NEXT: andi a2, a1, 4
-; CHECK-NEXT: bnez a2, .LBB2_7
-; CHECK-NEXT: .LBB2_3: # %else5
-; CHECK-NEXT: andi a1, a1, 8
-; CHECK-NEXT: bnez a1, .LBB2_8
-; CHECK-NEXT: .LBB2_4: # %else8
-; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB2_5: # %cond.store
-; CHECK-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; CHECK-NEXT: vse8.v v8, (a0)
-; CHECK-NEXT: addi a0, a0, 1
-; CHECK-NEXT: andi a2, a1, 2
-; CHECK-NEXT: beqz a2, .LBB2_2
-; CHECK-NEXT: .LBB2_6: # %cond.store1
-; CHECK-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; CHECK-NEXT: vslidedown.vi v9, v8, 1
+; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
+; CHECK-NEXT: vcompress.vm v9, v8, v0
+; CHECK-NEXT: vcpop.m a1, v0
+; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
; CHECK-NEXT: vse8.v v9, (a0)
-; CHECK-NEXT: addi a0, a0, 1
-; CHECK-NEXT: andi a2, a1, 4
-; CHECK-NEXT: beqz a2, .LBB2_3
-; CHECK-NEXT: .LBB2_7: # %cond.store4
-; CHECK-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; CHECK-NEXT: vslidedown.vi v9, v8, 2
-; CHECK-NEXT: vse8.v v9, (a0)
-; CHECK-NEXT: addi a0, a0, 1
-; CHECK-NEXT: andi a1, a1, 8
-; CHECK-NEXT: beqz a1, .LBB2_4
-; CHECK-NEXT: .LBB2_8: # %cond.store7
-; CHECK-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; CHECK-NEXT: vslidedown.vi v8, v8, 3
-; CHECK-NEXT: vse8.v v8, (a0)
; CHECK-NEXT: ret
call void @llvm.masked.compressstore.v4i8(<4 x i8> %v, ptr %base, <4 x i1> %mask)
ret void
@@ -98,85 +48,11 @@ declare void @llvm.masked.compressstore.v8i8(<8 x i8>, ptr, <8 x i1>)
define void @compressstore_v8i8(ptr %base, <8 x i8> %v, <8 x i1> %mask) {
; CHECK-LABEL: compressstore_v8i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-NEXT: vmv.x.s a1, v0
-; CHECK-NEXT: andi a2, a1, 1
-; CHECK-NEXT: bnez a2, .LBB3_9
-; CHECK-NEXT: # %bb.1: # %else
-; CHECK-NEXT: andi a2, a1, 2
-; CHECK-NEXT: bnez a2, .LBB3_10
-; CHECK-NEXT: .LBB3_2: # %else2
-; CHECK-NEXT: andi a2, a1, 4
-; CHECK-NEXT: bnez a2, .LBB3_11
-; CHECK-NEXT: .LBB3_3: # %else5
-; CHECK-NEXT: andi a2, a1, 8
-; CHECK-NEXT: bnez a2, .LBB3_12
-; CHECK-NEXT: .LBB3_4: # %else8
-; CHECK-NEXT: andi a2, a1, 16
-; CHECK-NEXT: bnez a2, .LBB3_13
-; CHECK-NEXT: .LBB3_5: # %else11
-; CHECK-NEXT: andi a2, a1, 32
-; CHECK-NEXT: bnez a2, .LBB3_14
-; CHECK-NEXT: .LBB3_6: # %else14
-; CHECK-NEXT: andi a2, a1, 64
-; CHECK-NEXT: bnez a2, .LBB3_15
-; CHECK-NEXT: .LBB3_7: # %else17
-; CHECK-NEXT: andi a1, a1, -128
-; CHECK-NEXT: bnez a1, .LBB3_16
-; CHECK-NEXT: .LBB3_8: # %else20
-; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB3_9: # %cond.store
-; CHECK-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
-; CHECK-NEXT: vse8.v v8, (a0)
-; CHECK-NEXT: addi a0, a0, 1
-; CHECK-NEXT: andi a2, a1, 2
-; CHECK-NEXT: beqz a2, .LBB3_2
-; CHECK-NEXT: .LBB3_10: # %cond.store1
-; CHECK-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
-; CHECK-NEXT: vslidedown.vi v9, v8, 1
-; CHECK-NEXT: vse8.v v9, (a0)
-; CHECK-NEXT: addi a0, a0, 1
-; CHECK-NEXT: andi a2, a1, 4
-; CHECK-NEXT: beqz a2, .LBB3_3
-; CHECK-NEXT: .LBB3_11: # %cond.store4
-; CHECK-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
-; CHECK-NEXT: vslidedown.vi v9, v8, 2
-; CHECK-NEXT: vse8.v v9, (a0)
-; CHECK-NEXT: addi a0, a0, 1
-; CHECK-NEXT: andi a2, a1, 8
-; CHECK-NEXT: beqz a2, .LBB3_4
-; CHECK-NEXT: .LBB3_12: # %cond.store7
-; CHECK-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
-; CHECK-NEXT: vslidedown.vi v9, v8, 3
-; CHECK-NEXT: vse8.v v9, (a0)
-; CHECK-NEXT: addi a0, a0, 1
-; CHECK-NEXT: andi a2, a1, 16
-; CHECK-NEXT: beqz a2, .LBB3_5
-; CHECK-NEXT: .LBB3_13: # %cond.store10
-; CHECK-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
-; CHECK-NEXT: vslidedown.vi v9, v8, 4
-; CHECK-NEXT: vse8.v v9, (a0)
-; CHECK-NEXT: addi a0, a0, 1
-; CHECK-NEXT: andi a2, a1, 32
-; CHECK-NEXT: beqz a2, .LBB3_6
-; CHECK-NEXT: .LBB3_14: # %cond.store13
-; CHECK-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
-; CHECK-NEXT: vslidedown.vi v9, v8, 5
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT: vcompress.vm v9, v8, v0
+; CHECK-NEXT: vcpop.m a1, v0
+; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
; CHECK-NEXT: vse8.v v9, (a0)
-; CHECK-NEXT: addi a0, a0, 1
-; CHECK-NEXT: andi a2, a1, 64
-; CHECK-NEXT: beqz a2, .LBB3_7
-; CHECK-NEXT: .LBB3_15: # %cond.store16
-; CHECK-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
-; CHECK-NEXT: vslidedown.vi v9, v8, 6
-; CHECK-NEXT: vse8.v v9, (a0)
-; CHECK-NEXT: addi a0, a0, 1
-; CHECK-NEXT: andi a1, a1, -128
-; CHECK-NEXT: beqz a1, .LBB3_8
-; CHECK-NEXT: .LBB3_16: # %cond.store19
-; CHECK-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
-; CHECK-NEXT: vslidedown.vi v8, v8, 7
-; CHECK-NEXT: vse8.v v8, (a0)
; CHECK-NEXT: ret
call void @llvm.masked.compressstore.v8i8(<8 x i8> %v, ptr %base, <8 x i1> %mask)
ret void
@@ -186,13 +62,11 @@ declare void @llvm.masked.compressstore.v1i16(<1 x i16>, ptr, <1 x i1>)
define void @compressstore_v1i16(ptr %base, <1 x i16> %v, <1 x i1> %mask) {
; CHECK-LABEL: compressstore_v1i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
-; CHECK-NEXT: vfirst.m a1, v0
-; CHECK-NEXT: bnez a1, .LBB4_2
-; CHECK-NEXT: # %bb.1: # %cond.store
; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; CHECK-NEXT: vse16.v v8, (a0)
-; CHECK-NEXT: .LBB4_2: # %else
+; CHECK-NEXT: vcompress.vm v9, v8, v0
+; CHECK-NEXT: vcpop.m a1, v0
+; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-NEXT: vse16.v v9, (a0)
; CHECK-NEXT: ret
call void @llvm.masked.compressstore.v1i16(<1 x i16> %v, ptr align 2 %base, <1 x i1> %mask)
ret void
@@ -202,25 +76,11 @@ declare void @llvm.masked.compressstore.v2i16(<2 x i16>, ptr, <2 x i1>)
define void @compressstore_v2i16(ptr %base, <2 x i16> %v, <2 x i1> %mask) {
; CHECK-LABEL: compressstore_v2i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-NEXT: vmv.x.s a1, v0
-; CHECK-NEXT: andi a2, a1, 1
-; CHECK-NEXT: bnez a2, .LBB5_3
-; CHECK-NEXT: # %bb.1: # %else
-; CHECK-NEXT: andi a1, a1, 2
-; CHECK-NEXT: bnez a1, .LBB5_4
-; CHECK-NEXT: .LBB5_2: # %else2
-; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB5_3: # %cond.store
-; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; CHECK-NEXT: vse16.v v8, (a0)
-; CHECK-NEXT: addi a0, a0, 2
-; CHECK-NEXT: andi a1, a1, 2
-; CHECK-NEXT: beqz a1, .LBB5_2
-; CHECK-NEXT: .LBB5_4: # %cond.store1
-; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; CHECK-NEXT: vslidedown.vi v8, v8, 1
-; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; CHECK-NEXT: vcompress.vm v9, v8, v0
+; CHECK-NEXT: vcpop.m a1, v0
+; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-NEXT: vse16.v v9, (a0)
; CHECK-NEXT: ret
call void @llvm.masked.compressstore.v2i16(<2 x i16> %v, ptr align 2 %base, <2 x i1> %mask)
ret void
@@ -230,45 +90,11 @@ declare void @llvm.masked.compressstore.v4i16(<4 x i16>, ptr, <4 x i1>)
define void @compressstore_v4i16(ptr %base, <4 x i16> %v, <4 x i1> %mask) {
; CHECK-LABEL: compressstore_v4i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-NEXT: vmv.x.s a1, v0
-; CHECK-NEXT: andi a2, a1, 1
-; CHECK-NEXT: bnez a2, .LBB6_5
-; CHECK-NEXT: # %bb.1: # %else
-; CHECK-NEXT: andi a2, a1, 2
-; CHECK-NEXT: bnez a2, .LBB6_6
-; CHECK-NEXT: .LBB6_2: # %else2
-; CHECK-NEXT: andi a2, a1, 4
-; CHECK-NEXT: bnez a2, .LBB6_7
-; CHECK-NEXT: .LBB6_3: # %else5
-; CHECK-NEXT: andi a1, a1, 8
-; CHECK-NEXT: bnez a1, .LBB6_8
-; CHECK-NEXT: .LBB6_4: # %else8
-; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB6_5: # %cond.store
-; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; CHECK-NEXT: vse16.v v8, (a0)
-; CHECK-NEXT: addi a0, a0, 2
-; CHECK-NEXT: andi a2, a1, 2
-; CHECK-NEXT: beqz a2, .LBB6_2
-; CHECK-NEXT: .LBB6_6: # %cond.store1
-; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; CHECK-NEXT: vslidedown.vi v9, v8, 1
-; CHECK-NEXT: vse16.v v9, (a0)
-; CHECK-NEXT: addi a0, a0, 2
-; CHECK-NEXT: andi a2, a1, 4
-; CHECK-NEXT: beqz a2, .LBB6_3
-; CHECK-NEXT: .LBB6_7: # %cond.store4
-; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; CHECK-NEXT: vslidedown.vi v9, v8, 2
+; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; CHECK-NEXT: vcompress.vm v9, v8, v0
+; CHECK-NEXT: vcpop.m a1, v0
+; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
; CHECK-NEXT: vse16.v v9, (a0)
-; CHECK-NEXT: addi a0, a0, 2
-; CHECK-NEXT: andi a1, a1, 8
-; CHECK-NEXT: beqz a1, .LBB6_4
-; CHECK-NEXT: .LBB6_8: # %cond.store7
-; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; CHECK-NEXT: vslidedown.vi v8, v8, 3
-; CHECK-NEXT: vse16.v v8, (a0)
; CHECK-NEXT: ret
call void @llvm.masked.compressstore.v4i16(<4 x i16> %v, ptr align 2 %base, <4 x i1> %mask)
ret void
@@ -278,85 +104,11 @@ declare void @llvm.masked.compressstore.v8i16(<8 x i16>, ptr, <8 x i1>)
define void @compressstore_v8i16(ptr %base, <8 x i16> %v, <8 x i1> %mask) {
; CHECK-LABEL: compressstore_v8i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-NEXT: vmv.x.s a1, v0
-; CHECK-NEXT: andi a2, a1, 1
-; CHECK-NEXT: bnez a2, .LBB7_9
-; CHECK-NEXT: # %bb.1: # %else
-; CHECK-NEXT: andi a2, a1, 2
-; CHECK-NEXT: bnez a2, .LBB7_10
-; CHECK-NEXT: .LBB7_2: # %else2
-; CHECK-NEXT: andi a2, a1, 4
-; CHECK-NEXT: bnez a2, .LBB7_11
-; CHECK-NEXT: .LBB7_3: # %else5
-; CHECK-NEXT: andi a2, a1, 8
-; CHECK-NEXT: bnez a2, .LBB7_12
-; CHECK-NEXT: .LBB7_4: # %else8
-; CHECK-NEXT: andi a2, a1, 16
-; CHECK-NEXT: bnez a2, .LBB7_13
-; CHECK-NEXT: .LBB7_5: # %else11
-; CHECK-NEXT: andi a2, a1, 32
-; CHECK-NEXT: bnez a2, .LBB7_14
-; CHECK-NEXT: .LBB7_6: # %else14
-; CHECK-NEXT: andi a2, a1, 64
-; CHECK-NEXT: bnez a2, .LBB7_15
-; CHECK-NEXT: .LBB7_7: # %else17
-; CHECK-NEXT: andi a1, a1, -128
-; CHECK-NEXT: bnez a1, .LBB7_16
-; CHECK-NEXT: .LBB7_8: # %else20
-; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB7_9: # %cond.store
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; CHECK-NEXT: vse16.v v8, (a0)
-; CHECK-NEXT: addi a0, a0, 2
-; CHECK-NEXT: andi a2, a1, 2
-; CHECK-NEXT: beqz a2, .LBB7_2
-; CHECK-NEXT: .LBB7_10: # %cond.store1
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; CHECK-NEXT: vslidedown.vi v9, v8, 1
-; CHECK-NEXT: vse16.v v9, (a0)
-; CHECK-NEXT: addi a0, a0, 2
-; CHECK-NEXT: andi a2, a1, 4
-; CHECK-NEXT: beqz a2, .LBB7_3
-; CHECK-NEXT: .LBB7_11: # %cond.store4
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; CHECK-NEXT: vslidedown.vi v9, v8, 2
-; CHECK-NEXT: vse16.v v9, (a0)
-; CHECK-NEXT: addi a0, a0, 2
-; CHECK-NEXT: andi a2, a1, 8
-; CHECK-NEXT: beqz a2, .LBB7_4
-; CHECK-NEXT: .LBB7_12: # %cond.store7
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; CHECK-NEXT: vslidedown.vi v9, v8, 3
-; CHECK-NEXT: vse16.v v9, (a0)
-; CHECK-NEXT: addi a0, a0, 2
-; CHECK-NEXT: andi a2, a1, 16
-; CHECK-NEXT: beqz a2, .LBB7_5
-; CHECK-NEXT: .LBB7_13: # %cond.store10
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; CHECK-NEXT: vslidedown.vi v9, v8, 4
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vcompress.vm v9, v8, v0
+; CHECK-NEXT: vcpop.m a1, v0
+; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
; CHECK-NEXT: vse16.v v9, (a0)
-; CHECK-NEXT: addi a0, a0, 2
-; CHECK-NEXT: andi a2, a1, 32
-; CHECK-NEXT: beqz a2, .LBB7_6
-; CHECK-NEXT: .LBB7_14: # %cond.store13
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; CHECK-NEXT: vslidedown.vi v9, v8, 5
-; CHECK-NEXT: vse16.v v9, (a0)
-; CHECK-NEXT: addi a0, a0, 2
-; CHECK-NEXT: andi a2, a1, 64
-; CHECK-NEXT: beqz a2, .LBB7_7
-; CHECK-NEXT: .LBB7_15: # %cond.store16
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; CHECK-NEXT: vslidedown.vi v9, v8, 6
-; CHECK-NEXT: vse16.v v9, (a0)
-; CHECK-NEXT: addi a0, a0, 2
-; CHECK-NEXT: andi a1, a1, -128
-; CHECK-NEXT: beqz a1, .LBB7_8
-; CHECK-NEXT: .LBB7_16: # %cond.store19
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; CHECK-NEXT: vslidedown.vi v8, v8, 7
-; CHECK-NEXT: vse16.v v8, (a0)
; CHECK-NEXT: ret
call void @llvm.masked.compressstore.v8i16(<8 x i16> %v, ptr align 2 %base, <8 x i1> %mask)
ret void
@@ -366,13 +118,11 @@ declare void @llvm.masked.compressstore.v1i32(<1 x i32>, ptr, <1 x i1>)
define void @compressstore_v1i32(ptr %base, <1 x i32> %v, <1 x i1> %mask) {
; CHECK-LABEL: compressstore_v1i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
-; CHECK-NEXT: vfirst.m a1, v0
-; CHECK-NEXT: bnez a1, .LBB8_2
-; CHECK-NEXT: # %bb.1: # %cond.store
; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT: vse32.v v8, (a0)
-; CHECK-NEXT: .LBB8_2: # %else
+; CHECK-NEXT: vcompress.vm v9, v8, v0
+; CHECK-NEXT: vcpop.m a1, v0
+; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-NEXT: vse32.v v9, (a0)
; CHECK-NEXT: ret
call void @llvm.masked.compressstore.v1i32(<1 x i32> %v, ptr align 4 %base, <1 x i1> %mask)
ret void
@@ -382,25 +132,11 @@ declare void @llvm.masked.compressstore.v2i32(<2 x i32>, ptr, <2 x i1>)
define void @compressstore_v2i32(ptr %base, <2 x i32> %v, <2 x i1> %mask) {
; CHECK-LABEL: compressstore_v2i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-NEXT: vmv.x.s a1, v0
-; CHECK-NEXT: andi a2, a1, 1
-; CHECK-NEXT: bnez a2, .LBB9_3
-; CHECK-NEXT: # %bb.1: # %else
-; CHECK-NEXT: andi a1, a1, 2
-; CHECK-NEXT: bnez a1, .LBB9_4
-; CHECK-NEXT: .LBB9_2: # %else2
-; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB9_3: # %cond.store
-; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT: vse32.v v8, (a0)
-; CHECK-NEXT: addi a0, a0, 4
-; CHECK-NEXT: andi a1, a1, 2
-; CHECK-NEXT: beqz a1, .LBB9_2
-; CHECK-NEXT: .LBB9_4: # %cond.store1
-; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT: vslidedown.vi v8, v8, 1
-; CHECK-NEXT: vse32.v v8, (a0)
+; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; CHECK-NEXT: vcompress.vm v9, v8, v0
+; CHECK-NEXT: vcpop.m a1, v0
+; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-NEXT: vse32.v v9, (a0)
; CHECK-NEXT: ret
call void @llvm.masked.compressstore.v2i32(<2 x i32> %v, ptr align 4 %base, <2 x i1> %mask)
ret void
@@ -410,45 +146,11 @@ declare void @llvm.masked.compressstore.v4i32(<4 x i32>, ptr, <4 x i1>)
define void @compressstore_v4i32(ptr %base, <4 x i32> %v, <4 x i1> %mask) {
; CHECK-LABEL: compressstore_v4i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-NEXT: vmv.x.s a1, v0
-; CHECK-NEXT: andi a2, a1, 1
-; CHECK-NEXT: bnez a2, .LBB10_5
-; CHECK-NEXT: # %bb.1: # %else
-; CHECK-NEXT: andi a2, a1, 2
-; CHECK-NEXT: bnez a2, .LBB10_6
-; CHECK-NEXT: .LBB10_2: # %else2
-; CHECK-NEXT: andi a2, a1, 4
-; CHECK-NEXT: bnez a2, .LBB10_7
-; CHECK-NEXT: .LBB10_3: # %else5
-; CHECK-NEXT: andi a1, a1, 8
-; CHECK-NEXT: bnez a1, .LBB10_8
-; CHECK-NEXT: .LBB10_4: # %else8
-; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB10_5: # %cond.store
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vse32.v v8, (a0)
-; CHECK-NEXT: addi a0, a0, 4
-; CHECK-NEXT: andi a2, a1, 2
-; CHECK-NEXT: beqz a2, .LBB10_2
-; CHECK-NEXT: .LBB10_6: # %cond.store1
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vslidedown.vi v9, v8, 1
-; CHECK-NEXT: vse32.v v9, (a0)
-; CHECK-NEXT: addi a0, a0, 4
-; CHECK-NEXT: andi a2, a1, 4
-; CHECK-NEXT: beqz a2, .LBB10_3
-; CHECK-NEXT: .LBB10_7: # %cond.store4
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vslidedown.vi v9, v8, 2
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vcompress.vm v9, v8, v0
+; CHECK-NEXT: vcpop.m a1, v0
+; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
; CHECK-NEXT: vse32.v v9, (a0)
-; CHECK-NEXT: addi a0, a0, 4
-; CHECK-NEXT: andi a1, a1, 8
-; CHECK-NEXT: beqz a1, .LBB10_4
-; CHECK-NEXT: .LBB10_8: # %cond.store7
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vslidedown.vi v8, v8, 3
-; CHECK-NEXT: vse32.v v8, (a0)
; CHECK-NEXT: ret
call void @llvm.masked.compressstore.v4i32(<4 x i32> %v, ptr align 4 %base, <4 x i1> %mask)
ret void
@@ -458,89 +160,11 @@ declare void @llvm.masked.compressstore.v8i32(<8 x i32>, ptr, <8 x i1>)
define void @compressstore_v8i32(ptr %base, <8 x i32> %v, <8 x i1> %mask) {
; CHECK-LABEL: compressstore_v8i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-NEXT: vmv.x.s a1, v0
-; CHECK-NEXT: andi a2, a1, 1
-; CHECK-NEXT: bnez a2, .LBB11_9
-; CHECK-NEXT: # %bb.1: # %else
-; CHECK-NEXT: andi a2, a1, 2
-; CHECK-NEXT: bnez a2, .LBB11_10
-; CHECK-NEXT: .LBB11_2: # %else2
-; CHECK-NEXT: andi a2, a1, 4
-; CHECK-NEXT: bnez a2, .LBB11_11
-; CHECK-NEXT: .LBB11_3: # %else5
-; CHECK-NEXT: andi a2, a1, 8
-; CHECK-NEXT: bnez a2, .LBB11_12
-; CHECK-NEXT: .LBB11_4: # %else8
-; CHECK-NEXT: andi a2, a1, 16
-; CHECK-NEXT: bnez a2, .LBB11_13
-; CHECK-NEXT: .LBB11_5: # %else11
-; CHECK-NEXT: andi a2, a1, 32
-; CHECK-NEXT: bnez a2, .LBB11_14
-; CHECK-NEXT: .LBB11_6: # %else14
-; CHECK-NEXT: andi a2, a1, 64
-; CHECK-NEXT: bnez a2, .LBB11_15
-; CHECK-NEXT: .LBB11_7: # %else17
-; CHECK-NEXT: andi a1, a1, -128
-; CHECK-NEXT: bnez a1, .LBB11_16
-; CHECK-NEXT: .LBB11_8: # %else20
-; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB11_9: # %cond.store
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vse32.v v8, (a0)
-; CHECK-NEXT: addi a0, a0, 4
-; CHECK-NEXT: andi a2, a1, 2
-; CHECK-NEXT: beqz a2, .LBB11_2
-; CHECK-NEXT: .LBB11_10: # %cond.store1
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vslidedown.vi v10, v8, 1
-; CHECK-NEXT: vse32.v v10, (a0)
-; CHECK-NEXT: addi a0, a0, 4
-; CHECK-NEXT: andi a2, a1, 4
-; CHECK-NEXT: beqz a2, .LBB11_3
-; CHECK-NEXT: .LBB11_11: # %cond.store4
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vslidedown.vi v10, v8, 2
-; CHECK-NEXT: vse32.v v10, (a0)
-; CHECK-NEXT: addi a0, a0, 4
-; CHECK-NEXT: andi a2, a1, 8
-; CHECK-NEXT: beqz a2, .LBB11_4
-; CHECK-NEXT: .LBB11_12: # %cond.store7
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vslidedown.vi v10, v8, 3
+; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT: vcompress.vm v10, v8, v0
+; CHECK-NEXT: vcpop.m a1, v0
+; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
; CHECK-NEXT: vse32.v v10, (a0)
-; CHECK-NEXT: addi a0, a0, 4
-; CHECK-NEXT: andi a2, a1, 16
-; CHECK-NEXT: beqz a2, .LBB11_5
-; CHECK-NEXT: .LBB11_13: # %cond.store10
-; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; CHECK-NEXT: vslidedown.vi v10, v8, 4
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vse32.v v10, (a0)
-; CHECK-NEXT: addi a0, a0, 4
-; CHECK-NEXT: andi a2, a1, 32
-; CHECK-NEXT: beqz a2, .LBB11_6
-; CHECK-NEXT: .LBB11_14: # %cond.store13
-; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; CHECK-NEXT: vslidedown.vi v10, v8, 5
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vse32.v v10, (a0)
-; CHECK-NEXT: addi a0, a0, 4
-; CHECK-NEXT: andi a2, a1, 64
-; CHECK-NEXT: beqz a2, .LBB11_7
-; CHECK-NEXT: .LBB11_15: # %cond.store16
-; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; CHECK-NEXT: vslidedown.vi v10, v8, 6
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vse32.v v10, (a0)
-; CHECK-NEXT: addi a0, a0, 4
-; CHECK-NEXT: andi a1, a1, -128
-; CHECK-NEXT: beqz a1, .LBB11_8
-; CHECK-NEXT: .LBB11_16: # %cond.store19
-; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; CHECK-NEXT: vslidedown.vi v8, v8, 7
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vse32.v v8, (a0)
; CHECK-NEXT: ret
call void @llvm.masked.compressstore.v8i32(<8 x i32> %v, ptr align 4 %base, <8 x i1> %mask)
ret void
@@ -548,439 +172,59 @@ define void @compressstore_v8i32(ptr %base, <8 x i32> %v, <8 x i1> %mask) {
declare void @llvm.masked.compressstore.v1i64(<1 x i64>, ptr, <1 x i1>)
define void @compressstore_v1i64(ptr %base, <1 x i64> %v, <1 x i1> %mask) {
-; RV32-LABEL: compressstore_v1i64:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
-; RV32-NEXT: vfirst.m a1, v0
-; RV32-NEXT: bnez a1, .LBB12_2
-; RV32-NEXT: # %bb.1: # %cond.store
-; RV32-NEXT: li a1, 32
-; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV32-NEXT: vsrl.vx v9, v8, a1
-; RV32-NEXT: vmv.x.s a1, v9
-; RV32-NEXT: vmv.x.s a2, v8
-; RV32-NEXT: sw a2, 0(a0)
-; RV32-NEXT: sw a1, 4(a0)
-; RV32-NEXT: .LBB12_2: # %else
-; RV32-NEXT: ret
-;
-; RV64-LABEL: compressstore_v1i64:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
-; RV64-NEXT: vfirst.m a1, v0
-; RV64-NEXT: bnez a1, .LBB12_2
-; RV64-NEXT: # %bb.1: # %cond.store
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vse64.v v8, (a0)
-; RV64-NEXT: .LBB12_2: # %else
-; RV64-NEXT: ret
+; CHECK-LABEL: compressstore_v1i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-NEXT: vcompress.vm v9, v8, v0
+; CHECK-NEXT: vcpop.m a1, v0
+; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-NEXT: vse64.v v9, (a0)
+; CHECK-NEXT: ret
call void @llvm.masked.compressstore.v1i64(<1 x i64> %v, ptr align 8 %base, <1 x i1> %mask)
ret void
}
declare void @llvm.masked.compressstore.v2i64(<2 x i64>, ptr, <2 x i1>)
define void @compressstore_v2i64(ptr %base, <2 x i64> %v, <2 x i1> %mask) {
-; RV32-LABEL: compressstore_v2i64:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV32-NEXT: vmv.x.s a1, v0
-; RV32-NEXT: andi a2, a1, 1
-; RV32-NEXT: bnez a2, .LBB13_3
-; RV32-NEXT: # %bb.1: # %else
-; RV32-NEXT: andi a1, a1, 2
-; RV32-NEXT: bnez a1, .LBB13_4
-; RV32-NEXT: .LBB13_2: # %else2
-; RV32-NEXT: ret
-; RV32-NEXT: .LBB13_3: # %cond.store
-; RV32-NEXT: li a2, 32
-; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV32-NEXT: vsrl.vx v9, v8, a2
-; RV32-NEXT: vmv.x.s a2, v9
-; RV32-NEXT: vmv.x.s a3, v8
-; RV32-NEXT: sw a3, 0(a0)
-; RV32-NEXT: sw a2, 4(a0)
-; RV32-NEXT: addi a0, a0, 8
-; RV32-NEXT: andi a1, a1, 2
-; RV32-NEXT: beqz a1, .LBB13_2
-; RV32-NEXT: .LBB13_4: # %cond.store1
-; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v8, 1
-; RV32-NEXT: li a1, 32
-; RV32-NEXT: vsrl.vx v9, v8, a1
-; RV32-NEXT: vmv.x.s a1, v9
-; RV32-NEXT: vmv.x.s a2, v8
-; RV32-NEXT: sw a2, 0(a0)
-; RV32-NEXT: sw a1, 4(a0)
-; RV32-NEXT: ret
-;
-; RV64-LABEL: compressstore_v2i64:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64-NEXT: vmv.x.s a1, v0
-; RV64-NEXT: andi a2, a1, 1
-; RV64-NEXT: bnez a2, .LBB13_3
-; RV64-NEXT: # %bb.1: # %else
-; RV64-NEXT: andi a1, a1, 2
-; RV64-NEXT: bnez a1, .LBB13_4
-; RV64-NEXT: .LBB13_2: # %else2
-; RV64-NEXT: ret
-; RV64-NEXT: .LBB13_3: # %cond.store
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vse64.v v8, (a0)
-; RV64-NEXT: addi a0, a0, 8
-; RV64-NEXT: andi a1, a1, 2
-; RV64-NEXT: beqz a1, .LBB13_2
-; RV64-NEXT: .LBB13_4: # %cond.store1
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vslidedown.vi v8, v8, 1
-; RV64-NEXT: vse64.v v8, (a0)
-; RV64-NEXT: ret
+; CHECK-LABEL: compressstore_v2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; CHECK-NEXT: vcompress.vm v9, v8, v0
+; CHECK-NEXT: vcpop.m a1, v0
+; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-NEXT: vse64.v v9, (a0)
+; CHECK-NEXT: ret
call void @llvm.masked.compressstore.v2i64(<2 x i64> %v, ptr align 8 %base, <2 x i1> %mask)
ret void
}
declare void @llvm.masked.compressstore.v4i64(<4 x i64>, ptr, <4 x i1>)
define void @compressstore_v4i64(ptr %base, <4 x i64> %v, <4 x i1> %mask) {
-; RV32-LABEL: compressstore_v4i64:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV32-NEXT: vmv.x.s a1, v0
-; RV32-NEXT: andi a2, a1, 1
-; RV32-NEXT: bnez a2, .LBB14_5
-; RV32-NEXT: # %bb.1: # %else
-; RV32-NEXT: andi a2, a1, 2
-; RV32-NEXT: bnez a2, .LBB14_6
-; RV32-NEXT: .LBB14_2: # %else2
-; RV32-NEXT: andi a2, a1, 4
-; RV32-NEXT: bnez a2, .LBB14_7
-; RV32-NEXT: .LBB14_3: # %else5
-; RV32-NEXT: andi a1, a1, 8
-; RV32-NEXT: bnez a1, .LBB14_8
-; RV32-NEXT: .LBB14_4: # %else8
-; RV32-NEXT: ret
-; RV32-NEXT: .LBB14_5: # %cond.store
-; RV32-NEXT: li a2, 32
-; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV32-NEXT: vsrl.vx v10, v8, a2
-; RV32-NEXT: vmv.x.s a2, v10
-; RV32-NEXT: vmv.x.s a3, v8
-; RV32-NEXT: sw a3, 0(a0)
-; RV32-NEXT: sw a2, 4(a0)
-; RV32-NEXT: addi a0, a0, 8
-; RV32-NEXT: andi a2, a1, 2
-; RV32-NEXT: beqz a2, .LBB14_2
-; RV32-NEXT: .LBB14_6: # %cond.store1
-; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV32-NEXT: vslidedown.vi v10, v8, 1
-; RV32-NEXT: li a2, 32
-; RV32-NEXT: vsrl.vx v12, v10, a2
-; RV32-NEXT: vmv.x.s a2, v12
-; RV32-NEXT: vmv.x.s a3, v10
-; RV32-NEXT: sw a3, 0(a0)
-; RV32-NEXT: sw a2, 4(a0)
-; RV32-NEXT: addi a0, a0, 8
-; RV32-NEXT: andi a2, a1, 4
-; RV32-NEXT: beqz a2, .LBB14_3
-; RV32-NEXT: .LBB14_7: # %cond.store4
-; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV32-NEXT: vslidedown.vi v10, v8, 2
-; RV32-NEXT: li a2, 32
-; RV32-NEXT: vsrl.vx v12, v10, a2
-; RV32-NEXT: vmv.x.s a2, v12
-; RV32-NEXT: vmv.x.s a3, v10
-; RV32-NEXT: sw a3, 0(a0)
-; RV32-NEXT: sw a2, 4(a0)
-; RV32-NEXT: addi a0, a0, 8
-; RV32-NEXT: andi a1, a1, 8
-; RV32-NEXT: beqz a1, .LBB14_4
-; RV32-NEXT: .LBB14_8: # %cond.store7
-; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v8, 3
-; RV32-NEXT: li a1, 32
-; RV32-NEXT: vsrl.vx v10, v8, a1
-; RV32-NEXT: vmv.x.s a1, v10
-; RV32-NEXT: vmv.x.s a2, v8
-; RV32-NEXT: sw a2, 0(a0)
-; RV32-NEXT: sw a1, 4(a0)
-; RV32-NEXT: ret
-;
-; RV64-LABEL: compressstore_v4i64:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64-NEXT: vmv.x.s a1, v0
-; RV64-NEXT: andi a2, a1, 1
-; RV64-NEXT: bnez a2, .LBB14_5
-; RV64-NEXT: # %bb.1: # %else
-; RV64-NEXT: andi a2, a1, 2
-; RV64-NEXT: bnez a2, .LBB14_6
-; RV64-NEXT: .LBB14_2: # %else2
-; RV64-NEXT: andi a2, a1, 4
-; RV64-NEXT: bnez a2, .LBB14_7
-; RV64-NEXT: .LBB14_3: # %else5
-; RV64-NEXT: andi a1, a1, 8
-; RV64-NEXT: bnez a1, .LBB14_8
-; RV64-NEXT: .LBB14_4: # %else8
-; RV64-NEXT: ret
-; RV64-NEXT: .LBB14_5: # %cond.store
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vse64.v v8, (a0)
-; RV64-NEXT: addi a0, a0, 8
-; RV64-NEXT: andi a2, a1, 2
-; RV64-NEXT: beqz a2, .LBB14_2
-; RV64-NEXT: .LBB14_6: # %cond.store1
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vslidedown.vi v10, v8, 1
-; RV64-NEXT: vse64.v v10, (a0)
-; RV64-NEXT: addi a0, a0, 8
-; RV64-NEXT: andi a2, a1, 4
-; RV64-NEXT: beqz a2, .LBB14_3
-; RV64-NEXT: .LBB14_7: # %cond.store4
-; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV64-NEXT: vslidedown.vi v10, v8, 2
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vse64.v v10, (a0)
-; RV64-NEXT: addi a0, a0, 8
-; RV64-NEXT: andi a1, a1, 8
-; RV64-NEXT: beqz a1, .LBB14_4
-; RV64-NEXT: .LBB14_8: # %cond.store7
-; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV64-NEXT: vslidedown.vi v8, v8, 3
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vse64.v v8, (a0)
-; RV64-NEXT: ret
+; CHECK-LABEL: compressstore_v4i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; CHECK-NEXT: vcompress.vm v10, v8, v0
+; CHECK-NEXT: vcpop.m a1, v0
+; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-NEXT: vse64.v v10, (a0)
+; CHECK-NEXT: ret
call void @llvm.masked.compressstore.v4i64(<4 x i64> %v, ptr align 8 %base, <4 x i1> %mask)
ret void
}
declare void @llvm.masked.compressstore.v8i64(<8 x i64>, ptr, <8 x i1>)
define void @compressstore_v8i64(ptr %base, <8 x i64> %v, <8 x i1> %mask) {
-; RV32-LABEL: compressstore_v8i64:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV32-NEXT: vmv.x.s a1, v0
-; RV32-NEXT: andi a2, a1, 1
-; RV32-NEXT: bnez a2, .LBB15_9
-; RV32-NEXT: # %bb.1: # %else
-; RV32-NEXT: andi a2, a1, 2
-; RV32-NEXT: bnez a2, .LBB15_10
-; RV32-NEXT: .LBB15_2: # %else2
-; RV32-NEXT: andi a2, a1, 4
-; RV32-NEXT: bnez a2, .LBB15_11
-; RV32-NEXT: .LBB15_3: # %else5
-; RV32-NEXT: andi a2, a1, 8
-; RV32-NEXT: bnez a2, .LBB15_12
-; RV32-NEXT: .LBB15_4: # %else8
-; RV32-NEXT: andi a2, a1, 16
-; RV32-NEXT: bnez a2, .LBB15_13
-; RV32-NEXT: .LBB15_5: # %else11
-; RV32-NEXT: andi a2, a1, 32
-; RV32-NEXT: bnez a2, .LBB15_14
-; RV32-NEXT: .LBB15_6: # %else14
-; RV32-NEXT: andi a2, a1, 64
-; RV32-NEXT: bnez a2, .LBB15_15
-; RV32-NEXT: .LBB15_7: # %else17
-; RV32-NEXT: andi a1, a1, -128
-; RV32-NEXT: bnez a1, .LBB15_16
-; RV32-NEXT: .LBB15_8: # %else20
-; RV32-NEXT: ret
-; RV32-NEXT: .LBB15_9: # %cond.store
-; RV32-NEXT: li a2, 32
-; RV32-NEXT: vsetivli zero, 1, e64, m4, ta, ma
-; RV32-NEXT: vsrl.vx v12, v8, a2
-; RV32-NEXT: vmv.x.s a2, v12
-; RV32-NEXT: vmv.x.s a3, v8
-; RV32-NEXT: sw a3, 0(a0)
-; RV32-NEXT: sw a2, 4(a0)
-; RV32-NEXT: addi a0, a0, 8
-; RV32-NEXT: andi a2, a1, 2
-; RV32-NEXT: beqz a2, .LBB15_2
-; RV32-NEXT: .LBB15_10: # %cond.store1
-; RV32-NEXT: vsetivli zero, 1, e64, m4, ta, ma
-; RV32-NEXT: vslidedown.vi v12, v8, 1
-; RV32-NEXT: li a2, 32
-; RV32-NEXT: vsrl.vx v16, v12, a2
-; RV32-NEXT: vmv.x.s a2, v16
-; RV32-NEXT: vmv.x.s a3, v12
-; RV32-NEXT: sw a3, 0(a0)
-; RV32-NEXT: sw a2, 4(a0)
-; RV32-NEXT: addi a0, a0, 8
-; RV32-NEXT: andi a2, a1, 4
-; RV32-NEXT: beqz a2, .LBB15_3
-; RV32-NEXT: .LBB15_11: # %cond.store4
-; RV32-NEXT: vsetivli zero, 1, e64, m4, ta, ma
-; RV32-NEXT: vslidedown.vi v12, v8, 2
-; RV32-NEXT: li a2, 32
-; RV32-NEXT: vsrl.vx v16, v12, a2
-; RV32-NEXT: vmv.x.s a2, v16
-; RV32-NEXT: vmv.x.s a3, v12
-; RV32-NEXT: sw a3, 0(a0)
-; RV32-NEXT: sw a2, 4(a0)
-; RV32-NEXT: addi a0, a0, 8
-; RV32-NEXT: andi a2, a1, 8
-; RV32-NEXT: beqz a2, .LBB15_4
-; RV32-NEXT: .LBB15_12: # %cond.store7
-; RV32-NEXT: vsetivli zero, 1, e64, m4, ta, ma
-; RV32-NEXT: vslidedown.vi v12, v8, 3
-; RV32-NEXT: li a2, 32
-; RV32-NEXT: vsrl.vx v16, v12, a2
-; RV32-NEXT: vmv.x.s a2, v16
-; RV32-NEXT: vmv.x.s a3, v12
-; RV32-NEXT: sw a3, 0(a0)
-; RV32-NEXT: sw a2, 4(a0)
-; RV32-NEXT: addi a0, a0, 8
-; RV32-NEXT: andi a2, a1, 16
-; RV32-NEXT: beqz a2, .LBB15_5
-; RV32-NEXT: .LBB15_13: # %cond.store10
-; RV32-NEXT: vsetivli zero, 1, e64, m4, ta, ma
-; RV32-NEXT: vslidedown.vi v12, v8, 4
-; RV32-NEXT: li a2, 32
-; RV32-NEXT: vsrl.vx v16, v12, a2
-; RV32-NEXT: vmv.x.s a2, v16
-; RV32-NEXT: vmv.x.s a3, v12
-; RV32-NEXT: sw a3, 0(a0)
-; RV32-NEXT: sw a2, 4(a0)
-; RV32-NEXT: addi a0, a0, 8
-; RV32-NEXT: andi a2, a1, 32
-; RV32-NEXT: beqz a2, .LBB15_6
-; RV32-NEXT: .LBB15_14: # %cond.store13
-; RV32-NEXT: vsetivli zero, 1, e64, m4, ta, ma
-; RV32-NEXT: vslidedown.vi v12, v8, 5
-; RV32-NEXT: li a2, 32
-; RV32-NEXT: vsrl.vx v16, v12, a2
-; RV32-NEXT: vmv.x.s a2, v16
-; RV32-NEXT: vmv.x.s a3, v12
-; RV32-NEXT: sw a3, 0(a0)
-; RV32-NEXT: sw a2, 4(a0)
-; RV32-NEXT: addi a0, a0, 8
-; RV32-NEXT: andi a2, a1, 64
-; RV32-NEXT: beqz a2, .LBB15_7
-; RV32-NEXT: .LBB15_15: # %cond.store16
-; RV32-NEXT: vsetivli zero, 1, e64, m4, ta, ma
-; RV32-NEXT: vslidedown.vi v12, v8, 6
-; RV32-NEXT: li a2, 32
-; RV32-NEXT: vsrl.vx v16, v12, a2
-; RV32-NEXT: vmv.x.s a2, v16
-; RV32-NEXT: vmv.x.s a3, v12
-; RV32-NEXT: sw a3, 0(a0)
-; RV32-NEXT: sw a2, 4(a0)
-; RV32-NEXT: addi a0, a0, 8
-; RV32-NEXT: andi a1, a1, -128
-; RV32-NEXT: beqz a1, .LBB15_8
-; RV32-NEXT: .LBB15_16: # %cond.store19
-; RV32-NEXT: vsetivli zero, 1, e64, m4, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v8, 7
-; RV32-NEXT: li a1, 32
-; RV32-NEXT: vsrl.vx v12, v8, a1
-; RV32-NEXT: vmv.x.s a1, v12
-; RV32-NEXT: vmv.x.s a2, v8
-; RV32-NEXT: sw a2, 0(a0)
-; RV32-NEXT: sw a1, 4(a0)
-; RV32-NEXT: ret
-;
-; RV64-LABEL: compressstore_v8i64:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64-NEXT: vmv.x.s a1, v0
-; RV64-NEXT: andi a2, a1, 1
-; RV64-NEXT: bnez a2, .LBB15_11
-; RV64-NEXT: # %bb.1: # %else
-; RV64-NEXT: andi a2, a1, 2
-; RV64-NEXT: bnez a2, .LBB15_12
-; RV64-NEXT: .LBB15_2: # %else2
-; RV64-NEXT: andi a2, a1, 4
-; RV64-NEXT: bnez a2, .LBB15_13
-; RV64-NEXT: .LBB15_3: # %else5
-; RV64-NEXT: andi a2, a1, 8
-; RV64-NEXT: beqz a2, .LBB15_5
-; RV64-NEXT: .LBB15_4: # %cond.store7
-; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV64-NEXT: vslidedown.vi v12, v8, 3
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vse64.v v12, (a0)
-; RV64-NEXT: addi a0, a0, 8
-; RV64-NEXT: .LBB15_5: # %else8
-; RV64-NEXT: addi sp, sp, -320
-; RV64-NEXT: .cfi_def_cfa_offset 320
-; RV64-NEXT: sd ra, 312(sp) # 8-byte Folded Spill
-; RV64-NEXT: sd s0, 304(sp) # 8-byte Folded Spill
-; RV64-NEXT: .cfi_offset ra, -8
-; RV64-NEXT: .cfi_offset s0, -16
-; RV64-NEXT: addi s0, sp, 320
-; RV64-NEXT: .cfi_def_cfa s0, 0
-; RV64-NEXT: andi sp, sp, -64
-; RV64-NEXT: andi a2, a1, 16
-; RV64-NEXT: bnez a2, .LBB15_14
-; RV64-NEXT: # %bb.6: # %else11
-; RV64-NEXT: andi a2, a1, 32
-; RV64-NEXT: bnez a2, .LBB15_15
-; RV64-NEXT: .LBB15_7: # %else14
-; RV64-NEXT: andi a2, a1, 64
-; RV64-NEXT: bnez a2, .LBB15_16
-; RV64-NEXT: .LBB15_8: # %else17
-; RV64-NEXT: andi a1, a1, -128
-; RV64-NEXT: beqz a1, .LBB15_10
-; RV64-NEXT: .LBB15_9: # %cond.store19
-; RV64-NEXT: mv a1, sp
-; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64-NEXT: vse64.v v8, (a1)
-; RV64-NEXT: ld a1, 56(sp)
-; RV64-NEXT: sd a1, 0(a0)
-; RV64-NEXT: .LBB15_10: # %else20
-; RV64-NEXT: addi sp, s0, -320
-; RV64-NEXT: ld ra, 312(sp) # 8-byte Folded Reload
-; RV64-NEXT: ld s0, 304(sp) # 8-byte Folded Reload
-; RV64-NEXT: addi sp, sp, 320
-; RV64-NEXT: ret
-; RV64-NEXT: .LBB15_11: # %cond.store
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vse64.v v8, (a0)
-; RV64-NEXT: addi a0, a0, 8
-; RV64-NEXT: andi a2, a1, 2
-; RV64-NEXT: beqz a2, .LBB15_2
-; RV64-NEXT: .LBB15_12: # %cond.store1
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vslidedown.vi v12, v8, 1
-; RV64-NEXT: vse64.v v12, (a0)
-; RV64-NEXT: addi a0, a0, 8
-; RV64-NEXT: andi a2, a1, 4
-; RV64-NEXT: beqz a2, .LBB15_3
-; RV64-NEXT: .LBB15_13: # %cond.store4
-; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV64-NEXT: vslidedown.vi v12, v8, 2
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vse64.v v12, (a0)
-; RV64-NEXT: addi a0, a0, 8
-; RV64-NEXT: andi a2, a1, 8
-; RV64-NEXT: bnez a2, .LBB15_4
-; RV64-NEXT: j .LBB15_5
-; RV64-NEXT: .LBB15_14: # %cond.store10
-; RV64-NEXT: addi a2, sp, 192
-; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64-NEXT: vse64.v v8, (a2)
-; RV64-NEXT: ld a2, 224(sp)
-; RV64-NEXT: sd a2, 0(a0)
-; RV64-NEXT: addi a0, a0, 8
-; RV64-NEXT: andi a2, a1, 32
-; RV64-NEXT: beqz a2, .LBB15_7
-; RV64-NEXT: .LBB15_15: # %cond.store13
-; RV64-NEXT: addi a2, sp, 128
-; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64-NEXT: vse64.v v8, (a2)
-; RV64-NEXT: ld a2, 168(sp)
-; RV64-NEXT: sd a2, 0(a0)
-; RV64-NEXT: addi a0, a0, 8
-; RV64-NEXT: andi a2, a1, 64
-; RV64-NEXT: beqz a2, .LBB15_8
-; RV64-NEXT: .LBB15_16: # %cond.store16
-; RV64-NEXT: addi a2, sp, 64
-; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64-NEXT: vse64.v v8, (a2)
-; RV64-NEXT: ld a2, 112(sp)
-; RV64-NEXT: sd a2, 0(a0)
-; RV64-NEXT: addi a0, a0, 8
-; RV64-NEXT: andi a1, a1, -128
-; RV64-NEXT: bnez a1, .LBB15_9
-; RV64-NEXT: j .LBB15_10
+; CHECK-LABEL: compressstore_v8i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; CHECK-NEXT: vcompress.vm v12, v8, v0
+; CHECK-NEXT: vcpop.m a1, v0
+; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-NEXT: vse64.v v12, (a0)
+; CHECK-NEXT: ret
call void @llvm.masked.compressstore.v8i64(<8 x i64> %v, ptr align 8 %base, <8 x i1> %mask)
ret void
}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; RV32: {{.*}}
+; RV64: {{.*}}
More information about the llvm-commits
mailing list