[llvm] [RISCV] Support `llvm.masked.compressstore` intrinsic (PR #83457)
Kolya Panchenko via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 12 12:32:45 PDT 2024
https://github.com/nikolaypanchenko updated https://github.com/llvm/llvm-project/pull/83457
>From 7034dc801c8d92731fafdc4755701e217af8969e Mon Sep 17 00:00:00 2001
From: Kolya Panchenko <kolya.panchenko at sifive.com>
Date: Thu, 29 Feb 2024 10:15:33 -0800
Subject: [PATCH 1/5] [RISCV] Support `llvm.masked.compressstore` intrinsic
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 16 +-
.../Target/RISCV/RISCVTargetTransformInfo.cpp | 7 +
.../Target/RISCV/RISCVTargetTransformInfo.h | 2 +
llvm/test/CodeGen/RISCV/rvv/compressstore.ll | 17545 ++++++++++++++++
4 files changed, 17568 insertions(+), 2 deletions(-)
create mode 100644 llvm/test/CodeGen/RISCV/rvv/compressstore.ll
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 08678a859ae2b6..803774fd16dbf0 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -10466,6 +10466,7 @@ SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op,
SDValue BasePtr = MemSD->getBasePtr();
SDValue Val, Mask, VL;
+ bool IsCompressingStore = false;
if (const auto *VPStore = dyn_cast<VPStoreSDNode>(Op)) {
Val = VPStore->getValue();
Mask = VPStore->getMask();
@@ -10474,9 +10475,11 @@ SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op,
const auto *MStore = cast<MaskedStoreSDNode>(Op);
Val = MStore->getValue();
Mask = MStore->getMask();
+ IsCompressingStore = MStore->isCompressingStore();
}
- bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
+ bool IsUnmasked =
+ ISD::isConstantSplatVectorAllOnes(Mask.getNode()) || IsCompressingStore;
MVT VT = Val.getSimpleValueType();
MVT XLenVT = Subtarget.getXLenVT();
@@ -10486,7 +10489,7 @@ SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op,
ContainerVT = getContainerForFixedLengthVector(VT);
Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
- if (!IsUnmasked) {
+ if (!IsUnmasked || IsCompressingStore) {
MVT MaskVT = getMaskTypeFor(ContainerVT);
Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
}
@@ -10495,6 +10498,15 @@ SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op,
if (!VL)
VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
+ if (IsCompressingStore) {
+ Val = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ContainerVT,
+ DAG.getConstant(Intrinsic::riscv_vcompress, DL, XLenVT),
+ DAG.getUNDEF(ContainerVT), Val, Mask, VL);
+ VL =
+ DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Mask,
+ getAllOnesMask(Mask.getSimpleValueType(), VL, DL, DAG), VL);
+ }
+
unsigned IntID =
IsUnmasked ? Intrinsic::riscv_vse : Intrinsic::riscv_vse_mask;
SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index ecd373649e2c79..60bada663957bb 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -1620,3 +1620,10 @@ bool RISCVTTIImpl::isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
C2.NumIVMuls, C2.NumBaseAdds,
C2.ScaleCost, C2.ImmCost, C2.SetupCost);
}
+
+bool RISCVTTIImpl::isLegalMaskedCompressStore(Type *DataTy) {
+ auto *VTy = dyn_cast<VectorType>(DataTy);
+ if (!VTy || VTy->isScalableTy() || !ST->hasVInstructions())
+ return false;
+ return getRegUsageForType(VTy) <= 8;
+}
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
index af36e9d5d5e886..6433027cce0e27 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -261,6 +261,8 @@ class RISCVTTIImpl : public BasicTTIImplBase<RISCVTTIImpl> {
return TLI->isLegalStridedLoadStore(DataTypeVT, Alignment);
}
+ bool isLegalMaskedCompressStore(Type *DataTy);
+
bool isVScaleKnownToBeAPowerOfTwo() const {
return TLI->isVScaleKnownToBeAPowerOfTwo();
}
diff --git a/llvm/test/CodeGen/RISCV/rvv/compressstore.ll b/llvm/test/CodeGen/RISCV/rvv/compressstore.ll
new file mode 100644
index 00000000000000..f227f39740a003
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/compressstore.ll
@@ -0,0 +1,17545 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -verify-machineinstrs -mtriple=riscv64 -mattr=+v,+d %s -o - | FileCheck %s --check-prefix=RV64
+; RUN: llc -verify-machineinstrs -mtriple=riscv32 -mattr=+v,+d %s -o - | FileCheck %s --check-prefix=RV32
+
+; Compress + store for i8 type
+
+define void @test_compresstore_i8_v1(ptr %p, <1 x i1> %mask, <1 x i8> %data) {
+; RV64-LABEL: test_compresstore_i8_v1:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
+; RV64-NEXT: vcompress.vm v9, v8, v0
+; RV64-NEXT: vcpop.m a1, v0
+; RV64-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; RV64-NEXT: vse8.v v9, (a0)
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_compresstore_i8_v1:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
+; RV32-NEXT: vcompress.vm v9, v8, v0
+; RV32-NEXT: vcpop.m a1, v0
+; RV32-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; RV32-NEXT: vse8.v v9, (a0)
+; RV32-NEXT: ret
+entry:
+ tail call void @llvm.masked.compressstore.v1i8(<1 x i8> %data, ptr %p, <1 x i1> %mask)
+ ret void
+}
+
+define void @test_compresstore_i8_v2(ptr %p, <2 x i1> %mask, <2 x i8> %data) {
+; RV64-LABEL: test_compresstore_i8_v2:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
+; RV64-NEXT: vcompress.vm v9, v8, v0
+; RV64-NEXT: vcpop.m a1, v0
+; RV64-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; RV64-NEXT: vse8.v v9, (a0)
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_compresstore_i8_v2:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
+; RV32-NEXT: vcompress.vm v9, v8, v0
+; RV32-NEXT: vcpop.m a1, v0
+; RV32-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; RV32-NEXT: vse8.v v9, (a0)
+; RV32-NEXT: ret
+entry:
+ tail call void @llvm.masked.compressstore.v2i8(<2 x i8> %data, ptr %p, <2 x i1> %mask)
+ ret void
+}
+
+define void @test_compresstore_i8_v4(ptr %p, <4 x i1> %mask, <4 x i8> %data) {
+; RV64-LABEL: test_compresstore_i8_v4:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
+; RV64-NEXT: vcompress.vm v9, v8, v0
+; RV64-NEXT: vcpop.m a1, v0
+; RV64-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; RV64-NEXT: vse8.v v9, (a0)
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_compresstore_i8_v4:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
+; RV32-NEXT: vcompress.vm v9, v8, v0
+; RV32-NEXT: vcpop.m a1, v0
+; RV32-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; RV32-NEXT: vse8.v v9, (a0)
+; RV32-NEXT: ret
+entry:
+ tail call void @llvm.masked.compressstore.v4i8(<4 x i8> %data, ptr %p, <4 x i1> %mask)
+ ret void
+}
+
+define void @test_compresstore_i8_v8(ptr %p, <8 x i1> %mask, <8 x i8> %data) {
+; RV64-LABEL: test_compresstore_i8_v8:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV64-NEXT: vcompress.vm v9, v8, v0
+; RV64-NEXT: vcpop.m a1, v0
+; RV64-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; RV64-NEXT: vse8.v v9, (a0)
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_compresstore_i8_v8:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV32-NEXT: vcompress.vm v9, v8, v0
+; RV32-NEXT: vcpop.m a1, v0
+; RV32-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; RV32-NEXT: vse8.v v9, (a0)
+; RV32-NEXT: ret
+entry:
+ tail call void @llvm.masked.compressstore.v8i8(<8 x i8> %data, ptr %p, <8 x i1> %mask)
+ ret void
+}
+
+define void @test_compresstore_i8_v16(ptr %p, <16 x i1> %mask, <16 x i8> %data) {
+; RV64-LABEL: test_compresstore_i8_v16:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; RV64-NEXT: vcompress.vm v9, v8, v0
+; RV64-NEXT: vcpop.m a1, v0
+; RV64-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; RV64-NEXT: vse8.v v9, (a0)
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_compresstore_i8_v16:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; RV32-NEXT: vcompress.vm v9, v8, v0
+; RV32-NEXT: vcpop.m a1, v0
+; RV32-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; RV32-NEXT: vse8.v v9, (a0)
+; RV32-NEXT: ret
+entry:
+ tail call void @llvm.masked.compressstore.v16i8(<16 x i8> %data, ptr %p, <16 x i1> %mask)
+ ret void
+}
+
+define void @test_compresstore_i8_v32(ptr %p, <32 x i1> %mask, <32 x i8> %data) {
+; RV64-LABEL: test_compresstore_i8_v32:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: li a1, 32
+; RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; RV64-NEXT: vcompress.vm v10, v8, v0
+; RV64-NEXT: vcpop.m a1, v0
+; RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; RV64-NEXT: vse8.v v10, (a0)
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_compresstore_i8_v32:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: li a1, 32
+; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; RV32-NEXT: vcompress.vm v10, v8, v0
+; RV32-NEXT: vcpop.m a1, v0
+; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; RV32-NEXT: vse8.v v10, (a0)
+; RV32-NEXT: ret
+entry:
+ tail call void @llvm.masked.compressstore.v32i8(<32 x i8> %data, ptr %p, <32 x i1> %mask)
+ ret void
+}
+
+define void @test_compresstore_i8_v64(ptr %p, <64 x i1> %mask, <64 x i8> %data) {
+; RV64-LABEL: test_compresstore_i8_v64:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: li a1, 64
+; RV64-NEXT: vsetvli zero, a1, e8, m4, ta, ma
+; RV64-NEXT: vcompress.vm v12, v8, v0
+; RV64-NEXT: vcpop.m a1, v0
+; RV64-NEXT: vsetvli zero, a1, e8, m4, ta, ma
+; RV64-NEXT: vse8.v v12, (a0)
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_compresstore_i8_v64:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: li a1, 64
+; RV32-NEXT: vsetvli zero, a1, e8, m4, ta, ma
+; RV32-NEXT: vcompress.vm v12, v8, v0
+; RV32-NEXT: vcpop.m a1, v0
+; RV32-NEXT: vsetvli zero, a1, e8, m4, ta, ma
+; RV32-NEXT: vse8.v v12, (a0)
+; RV32-NEXT: ret
+entry:
+ tail call void @llvm.masked.compressstore.v64i8(<64 x i8> %data, ptr %p, <64 x i1> %mask)
+ ret void
+}
+
+define void @test_compresstore_i8_v128(ptr %p, <128 x i1> %mask, <128 x i8> %data) {
+; RV64-LABEL: test_compresstore_i8_v128:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: li a1, 128
+; RV64-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; RV64-NEXT: vcompress.vm v16, v8, v0
+; RV64-NEXT: vcpop.m a1, v0
+; RV64-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a0)
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_compresstore_i8_v128:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: li a1, 128
+; RV32-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; RV32-NEXT: vcompress.vm v16, v8, v0
+; RV32-NEXT: vcpop.m a1, v0
+; RV32-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a0)
+; RV32-NEXT: ret
+entry:
+ tail call void @llvm.masked.compressstore.v128i8(<128 x i8> %data, ptr %p, <128 x i1> %mask)
+ ret void
+}
+
+define void @test_compresstore_i8_v256(ptr %p, <256 x i1> %mask, <256 x i8> %data) {
+; RV64-LABEL: test_compresstore_i8_v256:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: li a2, 128
+; RV64-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; RV64-NEXT: vle8.v v24, (a1)
+; RV64-NEXT: vsetvli zero, a2, e64, m1, ta, ma
+; RV64-NEXT: vmv.x.s a2, v0
+; RV64-NEXT: andi a1, a2, 1
+; RV64-NEXT: bnez a1, .LBB8_273
+; RV64-NEXT: # %bb.1: # %else
+; RV64-NEXT: andi a1, a2, 2
+; RV64-NEXT: bnez a1, .LBB8_274
+; RV64-NEXT: .LBB8_2: # %else2
+; RV64-NEXT: andi a1, a2, 4
+; RV64-NEXT: bnez a1, .LBB8_275
+; RV64-NEXT: .LBB8_3: # %else5
+; RV64-NEXT: andi a1, a2, 8
+; RV64-NEXT: bnez a1, .LBB8_276
+; RV64-NEXT: .LBB8_4: # %else8
+; RV64-NEXT: andi a1, a2, 16
+; RV64-NEXT: bnez a1, .LBB8_277
+; RV64-NEXT: .LBB8_5: # %else11
+; RV64-NEXT: andi a1, a2, 32
+; RV64-NEXT: bnez a1, .LBB8_278
+; RV64-NEXT: .LBB8_6: # %else14
+; RV64-NEXT: andi a1, a2, 64
+; RV64-NEXT: bnez a1, .LBB8_279
+; RV64-NEXT: .LBB8_7: # %else17
+; RV64-NEXT: andi a1, a2, 128
+; RV64-NEXT: bnez a1, .LBB8_280
+; RV64-NEXT: .LBB8_8: # %else20
+; RV64-NEXT: andi a1, a2, 256
+; RV64-NEXT: bnez a1, .LBB8_281
+; RV64-NEXT: .LBB8_9: # %else23
+; RV64-NEXT: andi a1, a2, 512
+; RV64-NEXT: bnez a1, .LBB8_282
+; RV64-NEXT: .LBB8_10: # %else26
+; RV64-NEXT: andi a1, a2, 1024
+; RV64-NEXT: bnez a1, .LBB8_283
+; RV64-NEXT: .LBB8_11: # %else29
+; RV64-NEXT: slli a1, a2, 52
+; RV64-NEXT: bltz a1, .LBB8_284
+; RV64-NEXT: .LBB8_12: # %else32
+; RV64-NEXT: slli a1, a2, 51
+; RV64-NEXT: bltz a1, .LBB8_285
+; RV64-NEXT: .LBB8_13: # %else35
+; RV64-NEXT: slli a1, a2, 50
+; RV64-NEXT: bltz a1, .LBB8_286
+; RV64-NEXT: .LBB8_14: # %else38
+; RV64-NEXT: slli a1, a2, 49
+; RV64-NEXT: bltz a1, .LBB8_287
+; RV64-NEXT: .LBB8_15: # %else41
+; RV64-NEXT: slli a1, a2, 48
+; RV64-NEXT: bltz a1, .LBB8_288
+; RV64-NEXT: .LBB8_16: # %else44
+; RV64-NEXT: slli a1, a2, 47
+; RV64-NEXT: bltz a1, .LBB8_289
+; RV64-NEXT: .LBB8_17: # %else47
+; RV64-NEXT: slli a1, a2, 46
+; RV64-NEXT: bltz a1, .LBB8_290
+; RV64-NEXT: .LBB8_18: # %else50
+; RV64-NEXT: slli a1, a2, 45
+; RV64-NEXT: bltz a1, .LBB8_291
+; RV64-NEXT: .LBB8_19: # %else53
+; RV64-NEXT: slli a1, a2, 44
+; RV64-NEXT: bltz a1, .LBB8_292
+; RV64-NEXT: .LBB8_20: # %else56
+; RV64-NEXT: slli a1, a2, 43
+; RV64-NEXT: bltz a1, .LBB8_293
+; RV64-NEXT: .LBB8_21: # %else59
+; RV64-NEXT: slli a1, a2, 42
+; RV64-NEXT: bltz a1, .LBB8_294
+; RV64-NEXT: .LBB8_22: # %else62
+; RV64-NEXT: slli a1, a2, 41
+; RV64-NEXT: bgez a1, .LBB8_23
+; RV64-NEXT: j .LBB8_295
+; RV64-NEXT: .LBB8_23: # %else65
+; RV64-NEXT: slli a1, a2, 40
+; RV64-NEXT: bgez a1, .LBB8_24
+; RV64-NEXT: j .LBB8_296
+; RV64-NEXT: .LBB8_24: # %else68
+; RV64-NEXT: slli a1, a2, 39
+; RV64-NEXT: bgez a1, .LBB8_25
+; RV64-NEXT: j .LBB8_297
+; RV64-NEXT: .LBB8_25: # %else71
+; RV64-NEXT: slli a1, a2, 38
+; RV64-NEXT: bgez a1, .LBB8_26
+; RV64-NEXT: j .LBB8_298
+; RV64-NEXT: .LBB8_26: # %else74
+; RV64-NEXT: slli a1, a2, 37
+; RV64-NEXT: bgez a1, .LBB8_27
+; RV64-NEXT: j .LBB8_299
+; RV64-NEXT: .LBB8_27: # %else77
+; RV64-NEXT: slli a1, a2, 36
+; RV64-NEXT: bgez a1, .LBB8_28
+; RV64-NEXT: j .LBB8_300
+; RV64-NEXT: .LBB8_28: # %else80
+; RV64-NEXT: slli a1, a2, 35
+; RV64-NEXT: bgez a1, .LBB8_29
+; RV64-NEXT: j .LBB8_301
+; RV64-NEXT: .LBB8_29: # %else83
+; RV64-NEXT: slli a1, a2, 34
+; RV64-NEXT: bgez a1, .LBB8_30
+; RV64-NEXT: j .LBB8_302
+; RV64-NEXT: .LBB8_30: # %else86
+; RV64-NEXT: slli a1, a2, 33
+; RV64-NEXT: bgez a1, .LBB8_31
+; RV64-NEXT: j .LBB8_303
+; RV64-NEXT: .LBB8_31: # %else89
+; RV64-NEXT: slli a1, a2, 32
+; RV64-NEXT: bgez a1, .LBB8_33
+; RV64-NEXT: .LBB8_32: # %cond.store91
+; RV64-NEXT: vsetivli zero, 1, e8, m2, ta, ma
+; RV64-NEXT: vslidedown.vi v10, v16, 31
+; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64-NEXT: vse8.v v10, (a0)
+; RV64-NEXT: addi a0, a0, 1
+; RV64-NEXT: .LBB8_33: # %else92
+; RV64-NEXT: addi sp, sp, -2032
+; RV64-NEXT: .cfi_def_cfa_offset 2032
+; RV64-NEXT: sd ra, 2024(sp) # 8-byte Folded Spill
+; RV64-NEXT: sd s0, 2016(sp) # 8-byte Folded Spill
+; RV64-NEXT: .cfi_offset ra, -8
+; RV64-NEXT: .cfi_offset s0, -16
+; RV64-NEXT: addi s0, sp, 2032
+; RV64-NEXT: .cfi_def_cfa s0, 0
+; RV64-NEXT: lui a1, 6
+; RV64-NEXT: addiw a1, a1, -1776
+; RV64-NEXT: sub sp, sp, a1
+; RV64-NEXT: andi sp, sp, -128
+; RV64-NEXT: slli a3, a2, 31
+; RV64-NEXT: lui a1, 6
+; RV64-NEXT: addiw a1, a1, -984
+; RV64-NEXT: add a1, sp, a1
+; RV64-NEXT: bgez a3, .LBB8_34
+; RV64-NEXT: j .LBB8_304
+; RV64-NEXT: .LBB8_34: # %else95
+; RV64-NEXT: slli a3, a2, 30
+; RV64-NEXT: bgez a3, .LBB8_35
+; RV64-NEXT: j .LBB8_305
+; RV64-NEXT: .LBB8_35: # %else98
+; RV64-NEXT: slli a3, a2, 29
+; RV64-NEXT: bgez a3, .LBB8_36
+; RV64-NEXT: j .LBB8_306
+; RV64-NEXT: .LBB8_36: # %else101
+; RV64-NEXT: slli a3, a2, 28
+; RV64-NEXT: bgez a3, .LBB8_37
+; RV64-NEXT: j .LBB8_307
+; RV64-NEXT: .LBB8_37: # %else104
+; RV64-NEXT: slli a3, a2, 27
+; RV64-NEXT: bgez a3, .LBB8_38
+; RV64-NEXT: j .LBB8_308
+; RV64-NEXT: .LBB8_38: # %else107
+; RV64-NEXT: slli a3, a2, 26
+; RV64-NEXT: bgez a3, .LBB8_39
+; RV64-NEXT: j .LBB8_309
+; RV64-NEXT: .LBB8_39: # %else110
+; RV64-NEXT: slli a3, a2, 25
+; RV64-NEXT: bgez a3, .LBB8_40
+; RV64-NEXT: j .LBB8_310
+; RV64-NEXT: .LBB8_40: # %else113
+; RV64-NEXT: slli a3, a2, 24
+; RV64-NEXT: bgez a3, .LBB8_41
+; RV64-NEXT: j .LBB8_311
+; RV64-NEXT: .LBB8_41: # %else116
+; RV64-NEXT: slli a3, a2, 23
+; RV64-NEXT: bgez a3, .LBB8_43
+; RV64-NEXT: .LBB8_42: # %cond.store118
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: li a4, 23
+; RV64-NEXT: slli a4, a4, 10
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a1, 0(a1)
+; RV64-NEXT: sb a1, 0(a0)
+; RV64-NEXT: addi a0, a0, 1
+; RV64-NEXT: .LBB8_43: # %else119
+; RV64-NEXT: slli a3, a2, 22
+; RV64-NEXT: lui a1, 5
+; RV64-NEXT: addiw a1, a1, 953
+; RV64-NEXT: add a1, sp, a1
+; RV64-NEXT: bgez a3, .LBB8_44
+; RV64-NEXT: j .LBB8_312
+; RV64-NEXT: .LBB8_44: # %else122
+; RV64-NEXT: slli a3, a2, 21
+; RV64-NEXT: bgez a3, .LBB8_45
+; RV64-NEXT: j .LBB8_313
+; RV64-NEXT: .LBB8_45: # %else125
+; RV64-NEXT: slli a3, a2, 20
+; RV64-NEXT: bgez a3, .LBB8_46
+; RV64-NEXT: j .LBB8_314
+; RV64-NEXT: .LBB8_46: # %else128
+; RV64-NEXT: slli a3, a2, 19
+; RV64-NEXT: bgez a3, .LBB8_47
+; RV64-NEXT: j .LBB8_315
+; RV64-NEXT: .LBB8_47: # %else131
+; RV64-NEXT: slli a3, a2, 18
+; RV64-NEXT: bgez a3, .LBB8_48
+; RV64-NEXT: j .LBB8_316
+; RV64-NEXT: .LBB8_48: # %else134
+; RV64-NEXT: slli a3, a2, 17
+; RV64-NEXT: bgez a3, .LBB8_49
+; RV64-NEXT: j .LBB8_317
+; RV64-NEXT: .LBB8_49: # %else137
+; RV64-NEXT: slli a3, a2, 16
+; RV64-NEXT: bgez a3, .LBB8_50
+; RV64-NEXT: j .LBB8_318
+; RV64-NEXT: .LBB8_50: # %else140
+; RV64-NEXT: slli a3, a2, 15
+; RV64-NEXT: bgez a3, .LBB8_51
+; RV64-NEXT: j .LBB8_319
+; RV64-NEXT: .LBB8_51: # %else143
+; RV64-NEXT: slli a3, a2, 14
+; RV64-NEXT: bgez a3, .LBB8_52
+; RV64-NEXT: j .LBB8_320
+; RV64-NEXT: .LBB8_52: # %else146
+; RV64-NEXT: slli a3, a2, 13
+; RV64-NEXT: bgez a3, .LBB8_53
+; RV64-NEXT: j .LBB8_321
+; RV64-NEXT: .LBB8_53: # %else149
+; RV64-NEXT: slli a3, a2, 12
+; RV64-NEXT: bgez a3, .LBB8_54
+; RV64-NEXT: j .LBB8_322
+; RV64-NEXT: .LBB8_54: # %else152
+; RV64-NEXT: slli a3, a2, 11
+; RV64-NEXT: bgez a3, .LBB8_55
+; RV64-NEXT: j .LBB8_323
+; RV64-NEXT: .LBB8_55: # %else155
+; RV64-NEXT: slli a3, a2, 10
+; RV64-NEXT: bgez a3, .LBB8_56
+; RV64-NEXT: j .LBB8_324
+; RV64-NEXT: .LBB8_56: # %else158
+; RV64-NEXT: slli a3, a2, 9
+; RV64-NEXT: bgez a3, .LBB8_57
+; RV64-NEXT: j .LBB8_325
+; RV64-NEXT: .LBB8_57: # %else161
+; RV64-NEXT: slli a3, a2, 8
+; RV64-NEXT: bgez a3, .LBB8_58
+; RV64-NEXT: j .LBB8_326
+; RV64-NEXT: .LBB8_58: # %else164
+; RV64-NEXT: slli a3, a2, 7
+; RV64-NEXT: bgez a3, .LBB8_59
+; RV64-NEXT: j .LBB8_327
+; RV64-NEXT: .LBB8_59: # %else167
+; RV64-NEXT: slli a3, a2, 6
+; RV64-NEXT: bgez a3, .LBB8_61
+; RV64-NEXT: .LBB8_60: # %cond.store169
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: lui a4, 5
+; RV64-NEXT: addiw a4, a4, 896
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a1, 0(a1)
+; RV64-NEXT: addi a3, a0, 1
+; RV64-NEXT: sb a1, 0(a0)
+; RV64-NEXT: mv a0, a3
+; RV64-NEXT: .LBB8_61: # %else170
+; RV64-NEXT: slli a1, a2, 5
+; RV64-NEXT: lui a3, 5
+; RV64-NEXT: addiw a3, a3, -1206
+; RV64-NEXT: add a3, sp, a3
+; RV64-NEXT: bgez a1, .LBB8_62
+; RV64-NEXT: j .LBB8_328
+; RV64-NEXT: .LBB8_62: # %else173
+; RV64-NEXT: slli a1, a2, 4
+; RV64-NEXT: bgez a1, .LBB8_63
+; RV64-NEXT: j .LBB8_329
+; RV64-NEXT: .LBB8_63: # %else176
+; RV64-NEXT: slli a1, a2, 3
+; RV64-NEXT: bgez a1, .LBB8_64
+; RV64-NEXT: j .LBB8_330
+; RV64-NEXT: .LBB8_64: # %else179
+; RV64-NEXT: slli a1, a2, 2
+; RV64-NEXT: bgez a1, .LBB8_66
+; RV64-NEXT: .LBB8_65: # %cond.store181
+; RV64-NEXT: li a1, 128
+; RV64-NEXT: lui a4, 5
+; RV64-NEXT: addiw a4, a4, 384
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a1, 1651(a3)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a1, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: .LBB8_66: # %else182
+; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: slli a1, a2, 1
+; RV64-NEXT: vslidedown.vi v9, v0, 1
+; RV64-NEXT: bgez a1, .LBB8_68
+; RV64-NEXT: # %bb.67: # %cond.store184
+; RV64-NEXT: li a1, 128
+; RV64-NEXT: lui a4, 5
+; RV64-NEXT: addiw a4, a4, 256
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a1, 1524(a3)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a1, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: .LBB8_68: # %else185
+; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vmv.x.s a1, v9
+; RV64-NEXT: bgez a2, .LBB8_69
+; RV64-NEXT: j .LBB8_331
+; RV64-NEXT: .LBB8_69: # %else188
+; RV64-NEXT: andi a2, a1, 1
+; RV64-NEXT: beqz a2, .LBB8_70
+; RV64-NEXT: j .LBB8_332
+; RV64-NEXT: .LBB8_70: # %else191
+; RV64-NEXT: andi a2, a1, 2
+; RV64-NEXT: beqz a2, .LBB8_71
+; RV64-NEXT: j .LBB8_333
+; RV64-NEXT: .LBB8_71: # %else194
+; RV64-NEXT: andi a2, a1, 4
+; RV64-NEXT: beqz a2, .LBB8_72
+; RV64-NEXT: j .LBB8_334
+; RV64-NEXT: .LBB8_72: # %else197
+; RV64-NEXT: andi a2, a1, 8
+; RV64-NEXT: beqz a2, .LBB8_73
+; RV64-NEXT: j .LBB8_335
+; RV64-NEXT: .LBB8_73: # %else200
+; RV64-NEXT: andi a2, a1, 16
+; RV64-NEXT: beqz a2, .LBB8_74
+; RV64-NEXT: j .LBB8_336
+; RV64-NEXT: .LBB8_74: # %else203
+; RV64-NEXT: andi a2, a1, 32
+; RV64-NEXT: beqz a2, .LBB8_75
+; RV64-NEXT: j .LBB8_337
+; RV64-NEXT: .LBB8_75: # %else206
+; RV64-NEXT: andi a2, a1, 64
+; RV64-NEXT: beqz a2, .LBB8_76
+; RV64-NEXT: j .LBB8_338
+; RV64-NEXT: .LBB8_76: # %else209
+; RV64-NEXT: andi a2, a1, 128
+; RV64-NEXT: beqz a2, .LBB8_77
+; RV64-NEXT: j .LBB8_339
+; RV64-NEXT: .LBB8_77: # %else212
+; RV64-NEXT: andi a2, a1, 256
+; RV64-NEXT: beqz a2, .LBB8_78
+; RV64-NEXT: j .LBB8_340
+; RV64-NEXT: .LBB8_78: # %else215
+; RV64-NEXT: andi a2, a1, 512
+; RV64-NEXT: beqz a2, .LBB8_79
+; RV64-NEXT: j .LBB8_341
+; RV64-NEXT: .LBB8_79: # %else218
+; RV64-NEXT: andi a2, a1, 1024
+; RV64-NEXT: beqz a2, .LBB8_81
+; RV64-NEXT: .LBB8_80: # %cond.store220
+; RV64-NEXT: li a2, 128
+; RV64-NEXT: lui a4, 5
+; RV64-NEXT: addiw a4, a4, -1280
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a2, 0(a3)
+; RV64-NEXT: addi a3, a0, 1
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: mv a0, a3
+; RV64-NEXT: .LBB8_81: # %else221
+; RV64-NEXT: slli a3, a1, 52
+; RV64-NEXT: lui a2, 4
+; RV64-NEXT: addiw a2, a2, 731
+; RV64-NEXT: add a2, sp, a2
+; RV64-NEXT: bgez a3, .LBB8_82
+; RV64-NEXT: j .LBB8_342
+; RV64-NEXT: .LBB8_82: # %else224
+; RV64-NEXT: slli a3, a1, 51
+; RV64-NEXT: bgez a3, .LBB8_83
+; RV64-NEXT: j .LBB8_343
+; RV64-NEXT: .LBB8_83: # %else227
+; RV64-NEXT: slli a3, a1, 50
+; RV64-NEXT: bgez a3, .LBB8_84
+; RV64-NEXT: j .LBB8_344
+; RV64-NEXT: .LBB8_84: # %else230
+; RV64-NEXT: slli a3, a1, 49
+; RV64-NEXT: bgez a3, .LBB8_85
+; RV64-NEXT: j .LBB8_345
+; RV64-NEXT: .LBB8_85: # %else233
+; RV64-NEXT: slli a3, a1, 48
+; RV64-NEXT: bgez a3, .LBB8_86
+; RV64-NEXT: j .LBB8_346
+; RV64-NEXT: .LBB8_86: # %else236
+; RV64-NEXT: slli a3, a1, 47
+; RV64-NEXT: bgez a3, .LBB8_87
+; RV64-NEXT: j .LBB8_347
+; RV64-NEXT: .LBB8_87: # %else239
+; RV64-NEXT: slli a3, a1, 46
+; RV64-NEXT: bgez a3, .LBB8_88
+; RV64-NEXT: j .LBB8_348
+; RV64-NEXT: .LBB8_88: # %else242
+; RV64-NEXT: slli a3, a1, 45
+; RV64-NEXT: bgez a3, .LBB8_89
+; RV64-NEXT: j .LBB8_349
+; RV64-NEXT: .LBB8_89: # %else245
+; RV64-NEXT: slli a3, a1, 44
+; RV64-NEXT: bgez a3, .LBB8_90
+; RV64-NEXT: j .LBB8_350
+; RV64-NEXT: .LBB8_90: # %else248
+; RV64-NEXT: slli a3, a1, 43
+; RV64-NEXT: bgez a3, .LBB8_91
+; RV64-NEXT: j .LBB8_351
+; RV64-NEXT: .LBB8_91: # %else251
+; RV64-NEXT: slli a3, a1, 42
+; RV64-NEXT: bgez a3, .LBB8_92
+; RV64-NEXT: j .LBB8_352
+; RV64-NEXT: .LBB8_92: # %else254
+; RV64-NEXT: slli a3, a1, 41
+; RV64-NEXT: bgez a3, .LBB8_93
+; RV64-NEXT: j .LBB8_353
+; RV64-NEXT: .LBB8_93: # %else257
+; RV64-NEXT: slli a3, a1, 40
+; RV64-NEXT: bgez a3, .LBB8_94
+; RV64-NEXT: j .LBB8_354
+; RV64-NEXT: .LBB8_94: # %else260
+; RV64-NEXT: slli a3, a1, 39
+; RV64-NEXT: bgez a3, .LBB8_95
+; RV64-NEXT: j .LBB8_355
+; RV64-NEXT: .LBB8_95: # %else263
+; RV64-NEXT: slli a3, a1, 38
+; RV64-NEXT: bgez a3, .LBB8_96
+; RV64-NEXT: j .LBB8_356
+; RV64-NEXT: .LBB8_96: # %else266
+; RV64-NEXT: slli a3, a1, 37
+; RV64-NEXT: bgez a3, .LBB8_97
+; RV64-NEXT: j .LBB8_357
+; RV64-NEXT: .LBB8_97: # %else269
+; RV64-NEXT: slli a3, a1, 36
+; RV64-NEXT: bgez a3, .LBB8_99
+; RV64-NEXT: .LBB8_98: # %cond.store271
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: lui a4, 4
+; RV64-NEXT: addiw a4, a4, 640
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a2, 0(a2)
+; RV64-NEXT: addi a3, a0, 1
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: mv a0, a3
+; RV64-NEXT: .LBB8_99: # %else272
+; RV64-NEXT: slli a3, a1, 35
+; RV64-NEXT: lui a2, 4
+; RV64-NEXT: addiw a2, a2, -1428
+; RV64-NEXT: add a2, sp, a2
+; RV64-NEXT: bgez a3, .LBB8_100
+; RV64-NEXT: j .LBB8_358
+; RV64-NEXT: .LBB8_100: # %else275
+; RV64-NEXT: slli a3, a1, 34
+; RV64-NEXT: bgez a3, .LBB8_101
+; RV64-NEXT: j .LBB8_359
+; RV64-NEXT: .LBB8_101: # %else278
+; RV64-NEXT: slli a3, a1, 33
+; RV64-NEXT: bgez a3, .LBB8_102
+; RV64-NEXT: j .LBB8_360
+; RV64-NEXT: .LBB8_102: # %else281
+; RV64-NEXT: slli a3, a1, 32
+; RV64-NEXT: bgez a3, .LBB8_103
+; RV64-NEXT: j .LBB8_361
+; RV64-NEXT: .LBB8_103: # %else284
+; RV64-NEXT: slli a3, a1, 31
+; RV64-NEXT: bgez a3, .LBB8_104
+; RV64-NEXT: j .LBB8_362
+; RV64-NEXT: .LBB8_104: # %else287
+; RV64-NEXT: slli a3, a1, 30
+; RV64-NEXT: bgez a3, .LBB8_105
+; RV64-NEXT: j .LBB8_363
+; RV64-NEXT: .LBB8_105: # %else290
+; RV64-NEXT: slli a3, a1, 29
+; RV64-NEXT: bgez a3, .LBB8_106
+; RV64-NEXT: j .LBB8_364
+; RV64-NEXT: .LBB8_106: # %else293
+; RV64-NEXT: slli a3, a1, 28
+; RV64-NEXT: bgez a3, .LBB8_107
+; RV64-NEXT: j .LBB8_365
+; RV64-NEXT: .LBB8_107: # %else296
+; RV64-NEXT: slli a3, a1, 27
+; RV64-NEXT: bgez a3, .LBB8_108
+; RV64-NEXT: j .LBB8_366
+; RV64-NEXT: .LBB8_108: # %else299
+; RV64-NEXT: slli a3, a1, 26
+; RV64-NEXT: bgez a3, .LBB8_109
+; RV64-NEXT: j .LBB8_367
+; RV64-NEXT: .LBB8_109: # %else302
+; RV64-NEXT: slli a3, a1, 25
+; RV64-NEXT: bgez a3, .LBB8_110
+; RV64-NEXT: j .LBB8_368
+; RV64-NEXT: .LBB8_110: # %else305
+; RV64-NEXT: slli a3, a1, 24
+; RV64-NEXT: bgez a3, .LBB8_111
+; RV64-NEXT: j .LBB8_369
+; RV64-NEXT: .LBB8_111: # %else308
+; RV64-NEXT: slli a3, a1, 23
+; RV64-NEXT: bgez a3, .LBB8_112
+; RV64-NEXT: j .LBB8_370
+; RV64-NEXT: .LBB8_112: # %else311
+; RV64-NEXT: slli a3, a1, 22
+; RV64-NEXT: bgez a3, .LBB8_113
+; RV64-NEXT: j .LBB8_371
+; RV64-NEXT: .LBB8_113: # %else314
+; RV64-NEXT: slli a3, a1, 21
+; RV64-NEXT: bgez a3, .LBB8_114
+; RV64-NEXT: j .LBB8_372
+; RV64-NEXT: .LBB8_114: # %else317
+; RV64-NEXT: slli a3, a1, 20
+; RV64-NEXT: bgez a3, .LBB8_115
+; RV64-NEXT: j .LBB8_373
+; RV64-NEXT: .LBB8_115: # %else320
+; RV64-NEXT: slli a3, a1, 19
+; RV64-NEXT: bgez a3, .LBB8_117
+; RV64-NEXT: .LBB8_116: # %cond.store322
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: li a4, 29
+; RV64-NEXT: slli a4, a4, 9
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a2, 0(a2)
+; RV64-NEXT: addi a3, a0, 1
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: mv a0, a3
+; RV64-NEXT: .LBB8_117: # %else323
+; RV64-NEXT: slli a3, a1, 18
+; RV64-NEXT: lui a2, 3
+; RV64-NEXT: addiw a2, a2, 509
+; RV64-NEXT: add a2, sp, a2
+; RV64-NEXT: bgez a3, .LBB8_118
+; RV64-NEXT: j .LBB8_374
+; RV64-NEXT: .LBB8_118: # %else326
+; RV64-NEXT: slli a3, a1, 17
+; RV64-NEXT: bgez a3, .LBB8_119
+; RV64-NEXT: j .LBB8_375
+; RV64-NEXT: .LBB8_119: # %else329
+; RV64-NEXT: slli a3, a1, 16
+; RV64-NEXT: bgez a3, .LBB8_120
+; RV64-NEXT: j .LBB8_376
+; RV64-NEXT: .LBB8_120: # %else332
+; RV64-NEXT: slli a3, a1, 15
+; RV64-NEXT: bgez a3, .LBB8_121
+; RV64-NEXT: j .LBB8_377
+; RV64-NEXT: .LBB8_121: # %else335
+; RV64-NEXT: slli a3, a1, 14
+; RV64-NEXT: bgez a3, .LBB8_122
+; RV64-NEXT: j .LBB8_378
+; RV64-NEXT: .LBB8_122: # %else338
+; RV64-NEXT: slli a3, a1, 13
+; RV64-NEXT: bgez a3, .LBB8_123
+; RV64-NEXT: j .LBB8_379
+; RV64-NEXT: .LBB8_123: # %else341
+; RV64-NEXT: slli a3, a1, 12
+; RV64-NEXT: bgez a3, .LBB8_124
+; RV64-NEXT: j .LBB8_380
+; RV64-NEXT: .LBB8_124: # %else344
+; RV64-NEXT: slli a3, a1, 11
+; RV64-NEXT: bgez a3, .LBB8_125
+; RV64-NEXT: j .LBB8_381
+; RV64-NEXT: .LBB8_125: # %else347
+; RV64-NEXT: slli a3, a1, 10
+; RV64-NEXT: bgez a3, .LBB8_126
+; RV64-NEXT: j .LBB8_382
+; RV64-NEXT: .LBB8_126: # %else350
+; RV64-NEXT: slli a3, a1, 9
+; RV64-NEXT: bgez a3, .LBB8_127
+; RV64-NEXT: j .LBB8_383
+; RV64-NEXT: .LBB8_127: # %else353
+; RV64-NEXT: slli a3, a1, 8
+; RV64-NEXT: bgez a3, .LBB8_128
+; RV64-NEXT: j .LBB8_384
+; RV64-NEXT: .LBB8_128: # %else356
+; RV64-NEXT: slli a3, a1, 7
+; RV64-NEXT: bgez a3, .LBB8_129
+; RV64-NEXT: j .LBB8_385
+; RV64-NEXT: .LBB8_129: # %else359
+; RV64-NEXT: slli a3, a1, 6
+; RV64-NEXT: bgez a3, .LBB8_130
+; RV64-NEXT: j .LBB8_386
+; RV64-NEXT: .LBB8_130: # %else362
+; RV64-NEXT: slli a3, a1, 5
+; RV64-NEXT: bgez a3, .LBB8_131
+; RV64-NEXT: j .LBB8_387
+; RV64-NEXT: .LBB8_131: # %else365
+; RV64-NEXT: slli a3, a1, 4
+; RV64-NEXT: bgez a3, .LBB8_132
+; RV64-NEXT: j .LBB8_388
+; RV64-NEXT: .LBB8_132: # %else368
+; RV64-NEXT: slli a3, a1, 3
+; RV64-NEXT: bgez a3, .LBB8_133
+; RV64-NEXT: j .LBB8_389
+; RV64-NEXT: .LBB8_133: # %else371
+; RV64-NEXT: slli a3, a1, 2
+; RV64-NEXT: bgez a3, .LBB8_135
+; RV64-NEXT: .LBB8_134: # %cond.store373
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: lui a4, 3
+; RV64-NEXT: addiw a4, a4, 384
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a2, 0(a2)
+; RV64-NEXT: addi a3, a0, 1
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: mv a0, a3
+; RV64-NEXT: .LBB8_135: # %else374
+; RV64-NEXT: slli a2, a1, 1
+; RV64-NEXT: lui a3, 3
+; RV64-NEXT: addiw a3, a3, -1619
+; RV64-NEXT: add a3, sp, a3
+; RV64-NEXT: bgez a2, .LBB8_137
+; RV64-NEXT: # %bb.136: # %cond.store376
+; RV64-NEXT: li a2, 128
+; RV64-NEXT: lui a4, 3
+; RV64-NEXT: addiw a4, a4, 256
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a2, 2001(a3)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: .LBB8_137: # %else377
+; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vmv.x.s a2, v8
+; RV64-NEXT: bgez a1, .LBB8_138
+; RV64-NEXT: j .LBB8_390
+; RV64-NEXT: .LBB8_138: # %else380
+; RV64-NEXT: andi a1, a2, 1
+; RV64-NEXT: beqz a1, .LBB8_139
+; RV64-NEXT: j .LBB8_391
+; RV64-NEXT: .LBB8_139: # %else383
+; RV64-NEXT: andi a1, a2, 2
+; RV64-NEXT: beqz a1, .LBB8_140
+; RV64-NEXT: j .LBB8_392
+; RV64-NEXT: .LBB8_140: # %else386
+; RV64-NEXT: andi a1, a2, 4
+; RV64-NEXT: beqz a1, .LBB8_141
+; RV64-NEXT: j .LBB8_393
+; RV64-NEXT: .LBB8_141: # %else389
+; RV64-NEXT: andi a1, a2, 8
+; RV64-NEXT: beqz a1, .LBB8_142
+; RV64-NEXT: j .LBB8_394
+; RV64-NEXT: .LBB8_142: # %else392
+; RV64-NEXT: andi a1, a2, 16
+; RV64-NEXT: beqz a1, .LBB8_143
+; RV64-NEXT: j .LBB8_395
+; RV64-NEXT: .LBB8_143: # %else395
+; RV64-NEXT: andi a1, a2, 32
+; RV64-NEXT: beqz a1, .LBB8_144
+; RV64-NEXT: j .LBB8_396
+; RV64-NEXT: .LBB8_144: # %else398
+; RV64-NEXT: andi a1, a2, 64
+; RV64-NEXT: beqz a1, .LBB8_145
+; RV64-NEXT: j .LBB8_397
+; RV64-NEXT: .LBB8_145: # %else401
+; RV64-NEXT: andi a1, a2, 128
+; RV64-NEXT: beqz a1, .LBB8_146
+; RV64-NEXT: j .LBB8_398
+; RV64-NEXT: .LBB8_146: # %else404
+; RV64-NEXT: andi a1, a2, 256
+; RV64-NEXT: beqz a1, .LBB8_147
+; RV64-NEXT: j .LBB8_399
+; RV64-NEXT: .LBB8_147: # %else407
+; RV64-NEXT: andi a1, a2, 512
+; RV64-NEXT: beqz a1, .LBB8_148
+; RV64-NEXT: j .LBB8_400
+; RV64-NEXT: .LBB8_148: # %else410
+; RV64-NEXT: andi a1, a2, 1024
+; RV64-NEXT: beqz a1, .LBB8_149
+; RV64-NEXT: j .LBB8_401
+; RV64-NEXT: .LBB8_149: # %else413
+; RV64-NEXT: slli a1, a2, 52
+; RV64-NEXT: bgez a1, .LBB8_150
+; RV64-NEXT: j .LBB8_402
+; RV64-NEXT: .LBB8_150: # %else416
+; RV64-NEXT: slli a1, a2, 51
+; RV64-NEXT: bgez a1, .LBB8_151
+; RV64-NEXT: j .LBB8_403
+; RV64-NEXT: .LBB8_151: # %else419
+; RV64-NEXT: slli a1, a2, 50
+; RV64-NEXT: bgez a1, .LBB8_152
+; RV64-NEXT: j .LBB8_404
+; RV64-NEXT: .LBB8_152: # %else422
+; RV64-NEXT: slli a1, a2, 49
+; RV64-NEXT: bgez a1, .LBB8_153
+; RV64-NEXT: j .LBB8_405
+; RV64-NEXT: .LBB8_153: # %else425
+; RV64-NEXT: slli a1, a2, 48
+; RV64-NEXT: bgez a1, .LBB8_154
+; RV64-NEXT: j .LBB8_406
+; RV64-NEXT: .LBB8_154: # %else428
+; RV64-NEXT: slli a1, a2, 47
+; RV64-NEXT: bgez a1, .LBB8_155
+; RV64-NEXT: j .LBB8_407
+; RV64-NEXT: .LBB8_155: # %else431
+; RV64-NEXT: slli a1, a2, 46
+; RV64-NEXT: bgez a1, .LBB8_156
+; RV64-NEXT: j .LBB8_408
+; RV64-NEXT: .LBB8_156: # %else434
+; RV64-NEXT: slli a1, a2, 45
+; RV64-NEXT: bgez a1, .LBB8_157
+; RV64-NEXT: j .LBB8_409
+; RV64-NEXT: .LBB8_157: # %else437
+; RV64-NEXT: slli a1, a2, 44
+; RV64-NEXT: bgez a1, .LBB8_158
+; RV64-NEXT: j .LBB8_410
+; RV64-NEXT: .LBB8_158: # %else440
+; RV64-NEXT: slli a1, a2, 43
+; RV64-NEXT: bgez a1, .LBB8_159
+; RV64-NEXT: j .LBB8_411
+; RV64-NEXT: .LBB8_159: # %else443
+; RV64-NEXT: slli a1, a2, 42
+; RV64-NEXT: bgez a1, .LBB8_160
+; RV64-NEXT: j .LBB8_412
+; RV64-NEXT: .LBB8_160: # %else446
+; RV64-NEXT: slli a1, a2, 41
+; RV64-NEXT: bgez a1, .LBB8_161
+; RV64-NEXT: j .LBB8_413
+; RV64-NEXT: .LBB8_161: # %else449
+; RV64-NEXT: slli a1, a2, 40
+; RV64-NEXT: bgez a1, .LBB8_162
+; RV64-NEXT: j .LBB8_414
+; RV64-NEXT: .LBB8_162: # %else452
+; RV64-NEXT: slli a1, a2, 39
+; RV64-NEXT: bgez a1, .LBB8_163
+; RV64-NEXT: j .LBB8_415
+; RV64-NEXT: .LBB8_163: # %else455
+; RV64-NEXT: slli a1, a2, 38
+; RV64-NEXT: bgez a1, .LBB8_164
+; RV64-NEXT: j .LBB8_416
+; RV64-NEXT: .LBB8_164: # %else458
+; RV64-NEXT: slli a1, a2, 37
+; RV64-NEXT: bgez a1, .LBB8_165
+; RV64-NEXT: j .LBB8_417
+; RV64-NEXT: .LBB8_165: # %else461
+; RV64-NEXT: slli a1, a2, 36
+; RV64-NEXT: bgez a1, .LBB8_166
+; RV64-NEXT: j .LBB8_418
+; RV64-NEXT: .LBB8_166: # %else464
+; RV64-NEXT: slli a1, a2, 35
+; RV64-NEXT: bgez a1, .LBB8_167
+; RV64-NEXT: j .LBB8_419
+; RV64-NEXT: .LBB8_167: # %else467
+; RV64-NEXT: slli a1, a2, 34
+; RV64-NEXT: bgez a1, .LBB8_168
+; RV64-NEXT: j .LBB8_420
+; RV64-NEXT: .LBB8_168: # %else470
+; RV64-NEXT: slli a1, a2, 33
+; RV64-NEXT: bgez a1, .LBB8_169
+; RV64-NEXT: j .LBB8_421
+; RV64-NEXT: .LBB8_169: # %else473
+; RV64-NEXT: slli a1, a2, 32
+; RV64-NEXT: bgez a1, .LBB8_170
+; RV64-NEXT: j .LBB8_422
+; RV64-NEXT: .LBB8_170: # %else476
+; RV64-NEXT: slli a1, a2, 31
+; RV64-NEXT: bgez a1, .LBB8_171
+; RV64-NEXT: j .LBB8_423
+; RV64-NEXT: .LBB8_171: # %else479
+; RV64-NEXT: slli a1, a2, 30
+; RV64-NEXT: bgez a1, .LBB8_172
+; RV64-NEXT: j .LBB8_424
+; RV64-NEXT: .LBB8_172: # %else482
+; RV64-NEXT: slli a1, a2, 29
+; RV64-NEXT: bgez a1, .LBB8_173
+; RV64-NEXT: j .LBB8_425
+; RV64-NEXT: .LBB8_173: # %else485
+; RV64-NEXT: slli a1, a2, 28
+; RV64-NEXT: bgez a1, .LBB8_174
+; RV64-NEXT: j .LBB8_426
+; RV64-NEXT: .LBB8_174: # %else488
+; RV64-NEXT: slli a1, a2, 27
+; RV64-NEXT: bgez a1, .LBB8_175
+; RV64-NEXT: j .LBB8_427
+; RV64-NEXT: .LBB8_175: # %else491
+; RV64-NEXT: slli a1, a2, 26
+; RV64-NEXT: bgez a1, .LBB8_176
+; RV64-NEXT: j .LBB8_428
+; RV64-NEXT: .LBB8_176: # %else494
+; RV64-NEXT: slli a1, a2, 25
+; RV64-NEXT: bgez a1, .LBB8_177
+; RV64-NEXT: j .LBB8_429
+; RV64-NEXT: .LBB8_177: # %else497
+; RV64-NEXT: slli a1, a2, 24
+; RV64-NEXT: bgez a1, .LBB8_178
+; RV64-NEXT: j .LBB8_430
+; RV64-NEXT: .LBB8_178: # %else500
+; RV64-NEXT: slli a1, a2, 23
+; RV64-NEXT: bgez a1, .LBB8_179
+; RV64-NEXT: j .LBB8_431
+; RV64-NEXT: .LBB8_179: # %else503
+; RV64-NEXT: slli a1, a2, 22
+; RV64-NEXT: bgez a1, .LBB8_180
+; RV64-NEXT: j .LBB8_432
+; RV64-NEXT: .LBB8_180: # %else506
+; RV64-NEXT: slli a1, a2, 21
+; RV64-NEXT: bgez a1, .LBB8_181
+; RV64-NEXT: j .LBB8_433
+; RV64-NEXT: .LBB8_181: # %else509
+; RV64-NEXT: slli a1, a2, 20
+; RV64-NEXT: bgez a1, .LBB8_182
+; RV64-NEXT: j .LBB8_434
+; RV64-NEXT: .LBB8_182: # %else512
+; RV64-NEXT: slli a1, a2, 19
+; RV64-NEXT: bgez a1, .LBB8_183
+; RV64-NEXT: j .LBB8_435
+; RV64-NEXT: .LBB8_183: # %else515
+; RV64-NEXT: slli a1, a2, 18
+; RV64-NEXT: bgez a1, .LBB8_185
+; RV64-NEXT: .LBB8_184: # %cond.store517
+; RV64-NEXT: li a1, 128
+; RV64-NEXT: lui a4, 3
+; RV64-NEXT: addiw a4, a4, -1664
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v24, (a4)
+; RV64-NEXT: lbu a1, 0(a3)
+; RV64-NEXT: addi a3, a0, 1
+; RV64-NEXT: sb a1, 0(a0)
+; RV64-NEXT: mv a0, a3
+; RV64-NEXT: .LBB8_185: # %else518
+; RV64-NEXT: slli a3, a2, 17
+; RV64-NEXT: lui a1, 2
+; RV64-NEXT: addiw a1, a1, 318
+; RV64-NEXT: add a1, sp, a1
+; RV64-NEXT: bgez a3, .LBB8_186
+; RV64-NEXT: j .LBB8_436
+; RV64-NEXT: .LBB8_186: # %else521
+; RV64-NEXT: slli a3, a2, 16
+; RV64-NEXT: bgez a3, .LBB8_187
+; RV64-NEXT: j .LBB8_437
+; RV64-NEXT: .LBB8_187: # %else524
+; RV64-NEXT: slli a3, a2, 15
+; RV64-NEXT: bgez a3, .LBB8_188
+; RV64-NEXT: j .LBB8_438
+; RV64-NEXT: .LBB8_188: # %else527
+; RV64-NEXT: slli a3, a2, 14
+; RV64-NEXT: bgez a3, .LBB8_189
+; RV64-NEXT: j .LBB8_439
+; RV64-NEXT: .LBB8_189: # %else530
+; RV64-NEXT: slli a3, a2, 13
+; RV64-NEXT: bgez a3, .LBB8_190
+; RV64-NEXT: j .LBB8_440
+; RV64-NEXT: .LBB8_190: # %else533
+; RV64-NEXT: slli a3, a2, 12
+; RV64-NEXT: bgez a3, .LBB8_191
+; RV64-NEXT: j .LBB8_441
+; RV64-NEXT: .LBB8_191: # %else536
+; RV64-NEXT: slli a3, a2, 11
+; RV64-NEXT: bgez a3, .LBB8_192
+; RV64-NEXT: j .LBB8_442
+; RV64-NEXT: .LBB8_192: # %else539
+; RV64-NEXT: slli a3, a2, 10
+; RV64-NEXT: bgez a3, .LBB8_193
+; RV64-NEXT: j .LBB8_443
+; RV64-NEXT: .LBB8_193: # %else542
+; RV64-NEXT: slli a3, a2, 9
+; RV64-NEXT: bgez a3, .LBB8_194
+; RV64-NEXT: j .LBB8_444
+; RV64-NEXT: .LBB8_194: # %else545
+; RV64-NEXT: slli a3, a2, 8
+; RV64-NEXT: bgez a3, .LBB8_195
+; RV64-NEXT: j .LBB8_445
+; RV64-NEXT: .LBB8_195: # %else548
+; RV64-NEXT: slli a3, a2, 7
+; RV64-NEXT: bgez a3, .LBB8_196
+; RV64-NEXT: j .LBB8_446
+; RV64-NEXT: .LBB8_196: # %else551
+; RV64-NEXT: slli a3, a2, 6
+; RV64-NEXT: bgez a3, .LBB8_197
+; RV64-NEXT: j .LBB8_447
+; RV64-NEXT: .LBB8_197: # %else554
+; RV64-NEXT: slli a3, a2, 5
+; RV64-NEXT: bgez a3, .LBB8_198
+; RV64-NEXT: j .LBB8_448
+; RV64-NEXT: .LBB8_198: # %else557
+; RV64-NEXT: slli a3, a2, 4
+; RV64-NEXT: bgez a3, .LBB8_199
+; RV64-NEXT: j .LBB8_449
+; RV64-NEXT: .LBB8_199: # %else560
+; RV64-NEXT: slli a3, a2, 3
+; RV64-NEXT: bgez a3, .LBB8_200
+; RV64-NEXT: j .LBB8_450
+; RV64-NEXT: .LBB8_200: # %else563
+; RV64-NEXT: slli a3, a2, 2
+; RV64-NEXT: bgez a3, .LBB8_202
+; RV64-NEXT: .LBB8_201: # %cond.store565
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: lui a4, 2
+; RV64-NEXT: addiw a4, a4, 384
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v24, (a4)
+; RV64-NEXT: lbu a3, 127(a1)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: .LBB8_202: # %else566
+; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: slli a3, a2, 1
+; RV64-NEXT: vslidedown.vi v8, v8, 1
+; RV64-NEXT: bgez a3, .LBB8_204
+; RV64-NEXT: # %bb.203: # %cond.store568
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: lui a4, 2
+; RV64-NEXT: addiw a4, a4, 256
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v24, (a4)
+; RV64-NEXT: lbu a1, 0(a1)
+; RV64-NEXT: addi a3, a0, 1
+; RV64-NEXT: sb a1, 0(a0)
+; RV64-NEXT: mv a0, a3
+; RV64-NEXT: .LBB8_204: # %else569
+; RV64-NEXT: lui a1, 2
+; RV64-NEXT: addiw a1, a1, -1841
+; RV64-NEXT: add a3, sp, a1
+; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vmv.x.s a1, v8
+; RV64-NEXT: bgez a2, .LBB8_205
+; RV64-NEXT: j .LBB8_451
+; RV64-NEXT: .LBB8_205: # %else572
+; RV64-NEXT: andi a2, a1, 1
+; RV64-NEXT: beqz a2, .LBB8_206
+; RV64-NEXT: j .LBB8_452
+; RV64-NEXT: .LBB8_206: # %else575
+; RV64-NEXT: andi a2, a1, 2
+; RV64-NEXT: beqz a2, .LBB8_207
+; RV64-NEXT: j .LBB8_453
+; RV64-NEXT: .LBB8_207: # %else578
+; RV64-NEXT: andi a2, a1, 4
+; RV64-NEXT: beqz a2, .LBB8_208
+; RV64-NEXT: j .LBB8_454
+; RV64-NEXT: .LBB8_208: # %else581
+; RV64-NEXT: andi a2, a1, 8
+; RV64-NEXT: beqz a2, .LBB8_209
+; RV64-NEXT: j .LBB8_455
+; RV64-NEXT: .LBB8_209: # %else584
+; RV64-NEXT: andi a2, a1, 16
+; RV64-NEXT: beqz a2, .LBB8_210
+; RV64-NEXT: j .LBB8_456
+; RV64-NEXT: .LBB8_210: # %else587
+; RV64-NEXT: andi a2, a1, 32
+; RV64-NEXT: beqz a2, .LBB8_211
+; RV64-NEXT: j .LBB8_457
+; RV64-NEXT: .LBB8_211: # %else590
+; RV64-NEXT: andi a2, a1, 64
+; RV64-NEXT: beqz a2, .LBB8_212
+; RV64-NEXT: j .LBB8_458
+; RV64-NEXT: .LBB8_212: # %else593
+; RV64-NEXT: andi a2, a1, 128
+; RV64-NEXT: beqz a2, .LBB8_213
+; RV64-NEXT: j .LBB8_459
+; RV64-NEXT: .LBB8_213: # %else596
+; RV64-NEXT: andi a2, a1, 256
+; RV64-NEXT: beqz a2, .LBB8_214
+; RV64-NEXT: j .LBB8_460
+; RV64-NEXT: .LBB8_214: # %else599
+; RV64-NEXT: andi a2, a1, 512
+; RV64-NEXT: beqz a2, .LBB8_215
+; RV64-NEXT: j .LBB8_461
+; RV64-NEXT: .LBB8_215: # %else602
+; RV64-NEXT: andi a2, a1, 1024
+; RV64-NEXT: beqz a2, .LBB8_216
+; RV64-NEXT: j .LBB8_462
+; RV64-NEXT: .LBB8_216: # %else605
+; RV64-NEXT: slli a2, a1, 52
+; RV64-NEXT: bgez a2, .LBB8_217
+; RV64-NEXT: j .LBB8_463
+; RV64-NEXT: .LBB8_217: # %else608
+; RV64-NEXT: slli a2, a1, 51
+; RV64-NEXT: bgez a2, .LBB8_218
+; RV64-NEXT: j .LBB8_464
+; RV64-NEXT: .LBB8_218: # %else611
+; RV64-NEXT: slli a2, a1, 50
+; RV64-NEXT: bgez a2, .LBB8_219
+; RV64-NEXT: j .LBB8_465
+; RV64-NEXT: .LBB8_219: # %else614
+; RV64-NEXT: slli a2, a1, 49
+; RV64-NEXT: bgez a2, .LBB8_220
+; RV64-NEXT: j .LBB8_466
+; RV64-NEXT: .LBB8_220: # %else617
+; RV64-NEXT: slli a2, a1, 48
+; RV64-NEXT: bgez a2, .LBB8_222
+; RV64-NEXT: .LBB8_221: # %cond.store619
+; RV64-NEXT: li a2, 128
+; RV64-NEXT: lui a4, 2
+; RV64-NEXT: addiw a4, a4, -1920
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v24, (a4)
+; RV64-NEXT: lbu a2, 0(a3)
+; RV64-NEXT: addi a3, a0, 1
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: mv a0, a3
+; RV64-NEXT: .LBB8_222: # %else620
+; RV64-NEXT: slli a3, a1, 47
+; RV64-NEXT: lui a2, 1
+; RV64-NEXT: addiw a2, a2, 96
+; RV64-NEXT: add a2, sp, a2
+; RV64-NEXT: bgez a3, .LBB8_223
+; RV64-NEXT: j .LBB8_467
+; RV64-NEXT: .LBB8_223: # %else623
+; RV64-NEXT: slli a3, a1, 46
+; RV64-NEXT: bgez a3, .LBB8_224
+; RV64-NEXT: j .LBB8_468
+; RV64-NEXT: .LBB8_224: # %else626
+; RV64-NEXT: slli a3, a1, 45
+; RV64-NEXT: bgez a3, .LBB8_225
+; RV64-NEXT: j .LBB8_469
+; RV64-NEXT: .LBB8_225: # %else629
+; RV64-NEXT: slli a3, a1, 44
+; RV64-NEXT: bgez a3, .LBB8_226
+; RV64-NEXT: j .LBB8_470
+; RV64-NEXT: .LBB8_226: # %else632
+; RV64-NEXT: slli a3, a1, 43
+; RV64-NEXT: bgez a3, .LBB8_227
+; RV64-NEXT: j .LBB8_471
+; RV64-NEXT: .LBB8_227: # %else635
+; RV64-NEXT: slli a3, a1, 42
+; RV64-NEXT: bgez a3, .LBB8_228
+; RV64-NEXT: j .LBB8_472
+; RV64-NEXT: .LBB8_228: # %else638
+; RV64-NEXT: slli a3, a1, 41
+; RV64-NEXT: bgez a3, .LBB8_229
+; RV64-NEXT: j .LBB8_473
+; RV64-NEXT: .LBB8_229: # %else641
+; RV64-NEXT: slli a3, a1, 40
+; RV64-NEXT: bgez a3, .LBB8_230
+; RV64-NEXT: j .LBB8_474
+; RV64-NEXT: .LBB8_230: # %else644
+; RV64-NEXT: slli a3, a1, 39
+; RV64-NEXT: bgez a3, .LBB8_231
+; RV64-NEXT: j .LBB8_475
+; RV64-NEXT: .LBB8_231: # %else647
+; RV64-NEXT: slli a3, a1, 38
+; RV64-NEXT: bgez a3, .LBB8_232
+; RV64-NEXT: j .LBB8_476
+; RV64-NEXT: .LBB8_232: # %else650
+; RV64-NEXT: slli a3, a1, 37
+; RV64-NEXT: bgez a3, .LBB8_233
+; RV64-NEXT: j .LBB8_477
+; RV64-NEXT: .LBB8_233: # %else653
+; RV64-NEXT: slli a3, a1, 36
+; RV64-NEXT: bgez a3, .LBB8_234
+; RV64-NEXT: j .LBB8_478
+; RV64-NEXT: .LBB8_234: # %else656
+; RV64-NEXT: slli a3, a1, 35
+; RV64-NEXT: bgez a3, .LBB8_235
+; RV64-NEXT: j .LBB8_479
+; RV64-NEXT: .LBB8_235: # %else659
+; RV64-NEXT: slli a3, a1, 34
+; RV64-NEXT: bgez a3, .LBB8_236
+; RV64-NEXT: j .LBB8_480
+; RV64-NEXT: .LBB8_236: # %else662
+; RV64-NEXT: slli a3, a1, 33
+; RV64-NEXT: bgez a3, .LBB8_237
+; RV64-NEXT: j .LBB8_481
+; RV64-NEXT: .LBB8_237: # %else665
+; RV64-NEXT: slli a3, a1, 32
+; RV64-NEXT: bgez a3, .LBB8_238
+; RV64-NEXT: j .LBB8_482
+; RV64-NEXT: .LBB8_238: # %else668
+; RV64-NEXT: slli a3, a1, 31
+; RV64-NEXT: bgez a3, .LBB8_240
+; RV64-NEXT: .LBB8_239: # %cond.store670
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: lui a4, 1
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v24, (a4)
+; RV64-NEXT: lbu a2, 0(a2)
+; RV64-NEXT: addi a3, a0, 1
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: mv a0, a3
+; RV64-NEXT: .LBB8_240: # %else671
+; RV64-NEXT: slli a3, a1, 30
+; RV64-NEXT: addi a2, sp, 2033
+; RV64-NEXT: bgez a3, .LBB8_241
+; RV64-NEXT: j .LBB8_483
+; RV64-NEXT: .LBB8_241: # %else674
+; RV64-NEXT: slli a3, a1, 29
+; RV64-NEXT: bgez a3, .LBB8_242
+; RV64-NEXT: j .LBB8_484
+; RV64-NEXT: .LBB8_242: # %else677
+; RV64-NEXT: slli a3, a1, 28
+; RV64-NEXT: bgez a3, .LBB8_243
+; RV64-NEXT: j .LBB8_485
+; RV64-NEXT: .LBB8_243: # %else680
+; RV64-NEXT: slli a3, a1, 27
+; RV64-NEXT: bgez a3, .LBB8_244
+; RV64-NEXT: j .LBB8_486
+; RV64-NEXT: .LBB8_244: # %else683
+; RV64-NEXT: slli a3, a1, 26
+; RV64-NEXT: bgez a3, .LBB8_245
+; RV64-NEXT: j .LBB8_487
+; RV64-NEXT: .LBB8_245: # %else686
+; RV64-NEXT: slli a3, a1, 25
+; RV64-NEXT: bgez a3, .LBB8_246
+; RV64-NEXT: j .LBB8_488
+; RV64-NEXT: .LBB8_246: # %else689
+; RV64-NEXT: slli a3, a1, 24
+; RV64-NEXT: bgez a3, .LBB8_247
+; RV64-NEXT: j .LBB8_489
+; RV64-NEXT: .LBB8_247: # %else692
+; RV64-NEXT: slli a3, a1, 23
+; RV64-NEXT: bgez a3, .LBB8_248
+; RV64-NEXT: j .LBB8_490
+; RV64-NEXT: .LBB8_248: # %else695
+; RV64-NEXT: slli a3, a1, 22
+; RV64-NEXT: bgez a3, .LBB8_249
+; RV64-NEXT: j .LBB8_491
+; RV64-NEXT: .LBB8_249: # %else698
+; RV64-NEXT: slli a3, a1, 21
+; RV64-NEXT: bgez a3, .LBB8_250
+; RV64-NEXT: j .LBB8_492
+; RV64-NEXT: .LBB8_250: # %else701
+; RV64-NEXT: slli a3, a1, 20
+; RV64-NEXT: bgez a3, .LBB8_251
+; RV64-NEXT: j .LBB8_493
+; RV64-NEXT: .LBB8_251: # %else704
+; RV64-NEXT: slli a3, a1, 19
+; RV64-NEXT: bgez a3, .LBB8_252
+; RV64-NEXT: j .LBB8_494
+; RV64-NEXT: .LBB8_252: # %else707
+; RV64-NEXT: slli a3, a1, 18
+; RV64-NEXT: bgez a3, .LBB8_253
+; RV64-NEXT: j .LBB8_495
+; RV64-NEXT: .LBB8_253: # %else710
+; RV64-NEXT: slli a3, a1, 17
+; RV64-NEXT: bgez a3, .LBB8_254
+; RV64-NEXT: j .LBB8_496
+; RV64-NEXT: .LBB8_254: # %else713
+; RV64-NEXT: slli a3, a1, 16
+; RV64-NEXT: bgez a3, .LBB8_255
+; RV64-NEXT: j .LBB8_497
+; RV64-NEXT: .LBB8_255: # %else716
+; RV64-NEXT: slli a3, a1, 15
+; RV64-NEXT: bgez a3, .LBB8_256
+; RV64-NEXT: j .LBB8_498
+; RV64-NEXT: .LBB8_256: # %else719
+; RV64-NEXT: slli a3, a1, 14
+; RV64-NEXT: bgez a3, .LBB8_257
+; RV64-NEXT: j .LBB8_499
+; RV64-NEXT: .LBB8_257: # %else722
+; RV64-NEXT: slli a2, a1, 13
+; RV64-NEXT: bgez a2, .LBB8_258
+; RV64-NEXT: j .LBB8_500
+; RV64-NEXT: .LBB8_258: # %else725
+; RV64-NEXT: slli a2, a1, 12
+; RV64-NEXT: bgez a2, .LBB8_259
+; RV64-NEXT: j .LBB8_501
+; RV64-NEXT: .LBB8_259: # %else728
+; RV64-NEXT: slli a2, a1, 11
+; RV64-NEXT: bgez a2, .LBB8_260
+; RV64-NEXT: j .LBB8_502
+; RV64-NEXT: .LBB8_260: # %else731
+; RV64-NEXT: slli a2, a1, 10
+; RV64-NEXT: bgez a2, .LBB8_261
+; RV64-NEXT: j .LBB8_503
+; RV64-NEXT: .LBB8_261: # %else734
+; RV64-NEXT: slli a2, a1, 9
+; RV64-NEXT: bgez a2, .LBB8_262
+; RV64-NEXT: j .LBB8_504
+; RV64-NEXT: .LBB8_262: # %else737
+; RV64-NEXT: slli a2, a1, 8
+; RV64-NEXT: bgez a2, .LBB8_263
+; RV64-NEXT: j .LBB8_505
+; RV64-NEXT: .LBB8_263: # %else740
+; RV64-NEXT: slli a2, a1, 7
+; RV64-NEXT: bgez a2, .LBB8_264
+; RV64-NEXT: j .LBB8_506
+; RV64-NEXT: .LBB8_264: # %else743
+; RV64-NEXT: slli a2, a1, 6
+; RV64-NEXT: bgez a2, .LBB8_265
+; RV64-NEXT: j .LBB8_507
+; RV64-NEXT: .LBB8_265: # %else746
+; RV64-NEXT: slli a2, a1, 5
+; RV64-NEXT: bgez a2, .LBB8_266
+; RV64-NEXT: j .LBB8_508
+; RV64-NEXT: .LBB8_266: # %else749
+; RV64-NEXT: slli a2, a1, 4
+; RV64-NEXT: bgez a2, .LBB8_267
+; RV64-NEXT: j .LBB8_509
+; RV64-NEXT: .LBB8_267: # %else752
+; RV64-NEXT: slli a2, a1, 3
+; RV64-NEXT: bgez a2, .LBB8_268
+; RV64-NEXT: j .LBB8_510
+; RV64-NEXT: .LBB8_268: # %else755
+; RV64-NEXT: slli a2, a1, 2
+; RV64-NEXT: bgez a2, .LBB8_269
+; RV64-NEXT: j .LBB8_511
+; RV64-NEXT: .LBB8_269: # %else758
+; RV64-NEXT: slli a2, a1, 1
+; RV64-NEXT: bgez a2, .LBB8_270
+; RV64-NEXT: j .LBB8_512
+; RV64-NEXT: .LBB8_270: # %else761
+; RV64-NEXT: bgez a1, .LBB8_272
+; RV64-NEXT: .LBB8_271: # %cond.store763
+; RV64-NEXT: li a1, 128
+; RV64-NEXT: addi a2, sp, 128
+; RV64-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v24, (a2)
+; RV64-NEXT: lbu a1, 255(sp)
+; RV64-NEXT: sb a1, 0(a0)
+; RV64-NEXT: .LBB8_272: # %else764
+; RV64-NEXT: lui a0, 6
+; RV64-NEXT: addiw a0, a0, 256
+; RV64-NEXT: sub sp, s0, a0
+; RV64-NEXT: lui a0, 6
+; RV64-NEXT: addiw a0, a0, -1776
+; RV64-NEXT: add sp, sp, a0
+; RV64-NEXT: ld ra, 2024(sp) # 8-byte Folded Reload
+; RV64-NEXT: ld s0, 2016(sp) # 8-byte Folded Reload
+; RV64-NEXT: addi sp, sp, 2032
+; RV64-NEXT: ret
+; RV64-NEXT: .LBB8_273: # %cond.store
+; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64-NEXT: vse8.v v16, (a0)
+; RV64-NEXT: addi a0, a0, 1
+; RV64-NEXT: andi a1, a2, 2
+; RV64-NEXT: beqz a1, .LBB8_2
+; RV64-NEXT: .LBB8_274: # %cond.store1
+; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64-NEXT: vslidedown.vi v9, v16, 1
+; RV64-NEXT: vse8.v v9, (a0)
+; RV64-NEXT: addi a0, a0, 1
+; RV64-NEXT: andi a1, a2, 4
+; RV64-NEXT: beqz a1, .LBB8_3
+; RV64-NEXT: .LBB8_275: # %cond.store4
+; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64-NEXT: vslidedown.vi v9, v16, 2
+; RV64-NEXT: vse8.v v9, (a0)
+; RV64-NEXT: addi a0, a0, 1
+; RV64-NEXT: andi a1, a2, 8
+; RV64-NEXT: beqz a1, .LBB8_4
+; RV64-NEXT: .LBB8_276: # %cond.store7
+; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64-NEXT: vslidedown.vi v9, v16, 3
+; RV64-NEXT: vse8.v v9, (a0)
+; RV64-NEXT: addi a0, a0, 1
+; RV64-NEXT: andi a1, a2, 16
+; RV64-NEXT: beqz a1, .LBB8_5
+; RV64-NEXT: .LBB8_277: # %cond.store10
+; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64-NEXT: vslidedown.vi v9, v16, 4
+; RV64-NEXT: vse8.v v9, (a0)
+; RV64-NEXT: addi a0, a0, 1
+; RV64-NEXT: andi a1, a2, 32
+; RV64-NEXT: beqz a1, .LBB8_6
+; RV64-NEXT: .LBB8_278: # %cond.store13
+; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64-NEXT: vslidedown.vi v9, v16, 5
+; RV64-NEXT: vse8.v v9, (a0)
+; RV64-NEXT: addi a0, a0, 1
+; RV64-NEXT: andi a1, a2, 64
+; RV64-NEXT: beqz a1, .LBB8_7
+; RV64-NEXT: .LBB8_279: # %cond.store16
+; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64-NEXT: vslidedown.vi v9, v16, 6
+; RV64-NEXT: vse8.v v9, (a0)
+; RV64-NEXT: addi a0, a0, 1
+; RV64-NEXT: andi a1, a2, 128
+; RV64-NEXT: beqz a1, .LBB8_8
+; RV64-NEXT: .LBB8_280: # %cond.store19
+; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64-NEXT: vslidedown.vi v9, v16, 7
+; RV64-NEXT: vse8.v v9, (a0)
+; RV64-NEXT: addi a0, a0, 1
+; RV64-NEXT: andi a1, a2, 256
+; RV64-NEXT: beqz a1, .LBB8_9
+; RV64-NEXT: .LBB8_281: # %cond.store22
+; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64-NEXT: vslidedown.vi v9, v16, 8
+; RV64-NEXT: vse8.v v9, (a0)
+; RV64-NEXT: addi a0, a0, 1
+; RV64-NEXT: andi a1, a2, 512
+; RV64-NEXT: beqz a1, .LBB8_10
+; RV64-NEXT: .LBB8_282: # %cond.store25
+; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64-NEXT: vslidedown.vi v9, v16, 9
+; RV64-NEXT: vse8.v v9, (a0)
+; RV64-NEXT: addi a0, a0, 1
+; RV64-NEXT: andi a1, a2, 1024
+; RV64-NEXT: beqz a1, .LBB8_11
+; RV64-NEXT: .LBB8_283: # %cond.store28
+; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64-NEXT: vslidedown.vi v9, v16, 10
+; RV64-NEXT: vse8.v v9, (a0)
+; RV64-NEXT: addi a0, a0, 1
+; RV64-NEXT: slli a1, a2, 52
+; RV64-NEXT: bgez a1, .LBB8_12
+; RV64-NEXT: .LBB8_284: # %cond.store31
+; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64-NEXT: vslidedown.vi v9, v16, 11
+; RV64-NEXT: vse8.v v9, (a0)
+; RV64-NEXT: addi a0, a0, 1
+; RV64-NEXT: slli a1, a2, 51
+; RV64-NEXT: bgez a1, .LBB8_13
+; RV64-NEXT: .LBB8_285: # %cond.store34
+; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64-NEXT: vslidedown.vi v9, v16, 12
+; RV64-NEXT: vse8.v v9, (a0)
+; RV64-NEXT: addi a0, a0, 1
+; RV64-NEXT: slli a1, a2, 50
+; RV64-NEXT: bgez a1, .LBB8_14
+; RV64-NEXT: .LBB8_286: # %cond.store37
+; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64-NEXT: vslidedown.vi v9, v16, 13
+; RV64-NEXT: vse8.v v9, (a0)
+; RV64-NEXT: addi a0, a0, 1
+; RV64-NEXT: slli a1, a2, 49
+; RV64-NEXT: bgez a1, .LBB8_15
+; RV64-NEXT: .LBB8_287: # %cond.store40
+; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64-NEXT: vslidedown.vi v9, v16, 14
+; RV64-NEXT: vse8.v v9, (a0)
+; RV64-NEXT: addi a0, a0, 1
+; RV64-NEXT: slli a1, a2, 48
+; RV64-NEXT: bgez a1, .LBB8_16
+; RV64-NEXT: .LBB8_288: # %cond.store43
+; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64-NEXT: vslidedown.vi v9, v16, 15
+; RV64-NEXT: vse8.v v9, (a0)
+; RV64-NEXT: addi a0, a0, 1
+; RV64-NEXT: slli a1, a2, 47
+; RV64-NEXT: bgez a1, .LBB8_17
+; RV64-NEXT: .LBB8_289: # %cond.store46
+; RV64-NEXT: vsetivli zero, 1, e8, m2, ta, ma
+; RV64-NEXT: vslidedown.vi v10, v16, 16
+; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64-NEXT: vse8.v v10, (a0)
+; RV64-NEXT: addi a0, a0, 1
+; RV64-NEXT: slli a1, a2, 46
+; RV64-NEXT: bgez a1, .LBB8_18
+; RV64-NEXT: .LBB8_290: # %cond.store49
+; RV64-NEXT: vsetivli zero, 1, e8, m2, ta, ma
+; RV64-NEXT: vslidedown.vi v10, v16, 17
+; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64-NEXT: vse8.v v10, (a0)
+; RV64-NEXT: addi a0, a0, 1
+; RV64-NEXT: slli a1, a2, 45
+; RV64-NEXT: bgez a1, .LBB8_19
+; RV64-NEXT: .LBB8_291: # %cond.store52
+; RV64-NEXT: vsetivli zero, 1, e8, m2, ta, ma
+; RV64-NEXT: vslidedown.vi v10, v16, 18
+; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64-NEXT: vse8.v v10, (a0)
+; RV64-NEXT: addi a0, a0, 1
+; RV64-NEXT: slli a1, a2, 44
+; RV64-NEXT: bgez a1, .LBB8_20
+; RV64-NEXT: .LBB8_292: # %cond.store55
+; RV64-NEXT: vsetivli zero, 1, e8, m2, ta, ma
+; RV64-NEXT: vslidedown.vi v10, v16, 19
+; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64-NEXT: vse8.v v10, (a0)
+; RV64-NEXT: addi a0, a0, 1
+; RV64-NEXT: slli a1, a2, 43
+; RV64-NEXT: bgez a1, .LBB8_21
+; RV64-NEXT: .LBB8_293: # %cond.store58
+; RV64-NEXT: vsetivli zero, 1, e8, m2, ta, ma
+; RV64-NEXT: vslidedown.vi v10, v16, 20
+; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64-NEXT: vse8.v v10, (a0)
+; RV64-NEXT: addi a0, a0, 1
+; RV64-NEXT: slli a1, a2, 42
+; RV64-NEXT: bgez a1, .LBB8_22
+; RV64-NEXT: .LBB8_294: # %cond.store61
+; RV64-NEXT: vsetivli zero, 1, e8, m2, ta, ma
+; RV64-NEXT: vslidedown.vi v10, v16, 21
+; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64-NEXT: vse8.v v10, (a0)
+; RV64-NEXT: addi a0, a0, 1
+; RV64-NEXT: slli a1, a2, 41
+; RV64-NEXT: bgez a1, .LBB8_23
+; RV64-NEXT: .LBB8_295: # %cond.store64
+; RV64-NEXT: vsetivli zero, 1, e8, m2, ta, ma
+; RV64-NEXT: vslidedown.vi v10, v16, 22
+; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64-NEXT: vse8.v v10, (a0)
+; RV64-NEXT: addi a0, a0, 1
+; RV64-NEXT: slli a1, a2, 40
+; RV64-NEXT: bltz a1, .LBB8_296
+; RV64-NEXT: j .LBB8_24
+; RV64-NEXT: .LBB8_296: # %cond.store67
+; RV64-NEXT: vsetivli zero, 1, e8, m2, ta, ma
+; RV64-NEXT: vslidedown.vi v10, v16, 23
+; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64-NEXT: vse8.v v10, (a0)
+; RV64-NEXT: addi a0, a0, 1
+; RV64-NEXT: slli a1, a2, 39
+; RV64-NEXT: bltz a1, .LBB8_297
+; RV64-NEXT: j .LBB8_25
+; RV64-NEXT: .LBB8_297: # %cond.store70
+; RV64-NEXT: vsetivli zero, 1, e8, m2, ta, ma
+; RV64-NEXT: vslidedown.vi v10, v16, 24
+; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64-NEXT: vse8.v v10, (a0)
+; RV64-NEXT: addi a0, a0, 1
+; RV64-NEXT: slli a1, a2, 38
+; RV64-NEXT: bltz a1, .LBB8_298
+; RV64-NEXT: j .LBB8_26
+; RV64-NEXT: .LBB8_298: # %cond.store73
+; RV64-NEXT: vsetivli zero, 1, e8, m2, ta, ma
+; RV64-NEXT: vslidedown.vi v10, v16, 25
+; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64-NEXT: vse8.v v10, (a0)
+; RV64-NEXT: addi a0, a0, 1
+; RV64-NEXT: slli a1, a2, 37
+; RV64-NEXT: bltz a1, .LBB8_299
+; RV64-NEXT: j .LBB8_27
+; RV64-NEXT: .LBB8_299: # %cond.store76
+; RV64-NEXT: vsetivli zero, 1, e8, m2, ta, ma
+; RV64-NEXT: vslidedown.vi v10, v16, 26
+; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64-NEXT: vse8.v v10, (a0)
+; RV64-NEXT: addi a0, a0, 1
+; RV64-NEXT: slli a1, a2, 36
+; RV64-NEXT: bltz a1, .LBB8_300
+; RV64-NEXT: j .LBB8_28
+; RV64-NEXT: .LBB8_300: # %cond.store79
+; RV64-NEXT: vsetivli zero, 1, e8, m2, ta, ma
+; RV64-NEXT: vslidedown.vi v10, v16, 27
+; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64-NEXT: vse8.v v10, (a0)
+; RV64-NEXT: addi a0, a0, 1
+; RV64-NEXT: slli a1, a2, 35
+; RV64-NEXT: bltz a1, .LBB8_301
+; RV64-NEXT: j .LBB8_29
+; RV64-NEXT: .LBB8_301: # %cond.store82
+; RV64-NEXT: vsetivli zero, 1, e8, m2, ta, ma
+; RV64-NEXT: vslidedown.vi v10, v16, 28
+; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64-NEXT: vse8.v v10, (a0)
+; RV64-NEXT: addi a0, a0, 1
+; RV64-NEXT: slli a1, a2, 34
+; RV64-NEXT: bltz a1, .LBB8_302
+; RV64-NEXT: j .LBB8_30
+; RV64-NEXT: .LBB8_302: # %cond.store85
+; RV64-NEXT: vsetivli zero, 1, e8, m2, ta, ma
+; RV64-NEXT: vslidedown.vi v10, v16, 29
+; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64-NEXT: vse8.v v10, (a0)
+; RV64-NEXT: addi a0, a0, 1
+; RV64-NEXT: slli a1, a2, 33
+; RV64-NEXT: bltz a1, .LBB8_303
+; RV64-NEXT: j .LBB8_31
+; RV64-NEXT: .LBB8_303: # %cond.store88
+; RV64-NEXT: vsetivli zero, 1, e8, m2, ta, ma
+; RV64-NEXT: vslidedown.vi v10, v16, 30
+; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64-NEXT: vse8.v v10, (a0)
+; RV64-NEXT: addi a0, a0, 1
+; RV64-NEXT: slli a1, a2, 32
+; RV64-NEXT: bgez a1, .LBB8_513
+; RV64-NEXT: j .LBB8_32
+; RV64-NEXT: .LBB8_513: # %cond.store88
+; RV64-NEXT: j .LBB8_33
+; RV64-NEXT: .LBB8_304: # %cond.store94
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: lui a4, 6
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a3, 1016(a1)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: addi a0, a0, 1
+; RV64-NEXT: slli a3, a2, 30
+; RV64-NEXT: bltz a3, .LBB8_305
+; RV64-NEXT: j .LBB8_35
+; RV64-NEXT: .LBB8_305: # %cond.store97
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: lui a4, 6
+; RV64-NEXT: addiw a4, a4, -128
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a3, 889(a1)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: addi a0, a0, 1
+; RV64-NEXT: slli a3, a2, 29
+; RV64-NEXT: bltz a3, .LBB8_306
+; RV64-NEXT: j .LBB8_36
+; RV64-NEXT: .LBB8_306: # %cond.store100
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: lui a4, 6
+; RV64-NEXT: addiw a4, a4, -256
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a3, 762(a1)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: addi a0, a0, 1
+; RV64-NEXT: slli a3, a2, 28
+; RV64-NEXT: bltz a3, .LBB8_307
+; RV64-NEXT: j .LBB8_37
+; RV64-NEXT: .LBB8_307: # %cond.store103
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: lui a4, 6
+; RV64-NEXT: addiw a4, a4, -384
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a3, 635(a1)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: addi a0, a0, 1
+; RV64-NEXT: slli a3, a2, 27
+; RV64-NEXT: bltz a3, .LBB8_308
+; RV64-NEXT: j .LBB8_38
+; RV64-NEXT: .LBB8_308: # %cond.store106
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: lui a4, 6
+; RV64-NEXT: addiw a4, a4, -512
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a3, 508(a1)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: addi a0, a0, 1
+; RV64-NEXT: slli a3, a2, 26
+; RV64-NEXT: bltz a3, .LBB8_309
+; RV64-NEXT: j .LBB8_39
+; RV64-NEXT: .LBB8_309: # %cond.store109
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: lui a4, 6
+; RV64-NEXT: addiw a4, a4, -640
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a3, 381(a1)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: addi a0, a0, 1
+; RV64-NEXT: slli a3, a2, 25
+; RV64-NEXT: bltz a3, .LBB8_310
+; RV64-NEXT: j .LBB8_40
+; RV64-NEXT: .LBB8_310: # %cond.store112
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: lui a4, 6
+; RV64-NEXT: addiw a4, a4, -768
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a3, 254(a1)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: addi a0, a0, 1
+; RV64-NEXT: slli a3, a2, 24
+; RV64-NEXT: bltz a3, .LBB8_311
+; RV64-NEXT: j .LBB8_41
+; RV64-NEXT: .LBB8_311: # %cond.store115
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: lui a4, 6
+; RV64-NEXT: addiw a4, a4, -896
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a3, 127(a1)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: addi a0, a0, 1
+; RV64-NEXT: slli a3, a2, 23
+; RV64-NEXT: bgez a3, .LBB8_514
+; RV64-NEXT: j .LBB8_42
+; RV64-NEXT: .LBB8_514: # %cond.store115
+; RV64-NEXT: j .LBB8_43
+; RV64-NEXT: .LBB8_312: # %cond.store121
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: lui a4, 6
+; RV64-NEXT: addiw a4, a4, -1152
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a3, 2032(a1)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: addi a0, a0, 1
+; RV64-NEXT: slli a3, a2, 21
+; RV64-NEXT: bltz a3, .LBB8_313
+; RV64-NEXT: j .LBB8_45
+; RV64-NEXT: .LBB8_313: # %cond.store124
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: lui a4, 6
+; RV64-NEXT: addiw a4, a4, -1280
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a3, 1905(a1)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: addi a0, a0, 1
+; RV64-NEXT: slli a3, a2, 20
+; RV64-NEXT: bltz a3, .LBB8_314
+; RV64-NEXT: j .LBB8_46
+; RV64-NEXT: .LBB8_314: # %cond.store127
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: lui a4, 6
+; RV64-NEXT: addiw a4, a4, -1408
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a3, 1778(a1)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: addi a0, a0, 1
+; RV64-NEXT: slli a3, a2, 19
+; RV64-NEXT: bltz a3, .LBB8_315
+; RV64-NEXT: j .LBB8_47
+; RV64-NEXT: .LBB8_315: # %cond.store130
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: lui a4, 6
+; RV64-NEXT: addiw a4, a4, -1536
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a3, 1651(a1)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: addi a0, a0, 1
+; RV64-NEXT: slli a3, a2, 18
+; RV64-NEXT: bltz a3, .LBB8_316
+; RV64-NEXT: j .LBB8_48
+; RV64-NEXT: .LBB8_316: # %cond.store133
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: lui a4, 6
+; RV64-NEXT: addiw a4, a4, -1664
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a3, 1524(a1)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: addi a0, a0, 1
+; RV64-NEXT: slli a3, a2, 17
+; RV64-NEXT: bltz a3, .LBB8_317
+; RV64-NEXT: j .LBB8_49
+; RV64-NEXT: .LBB8_317: # %cond.store136
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: lui a4, 6
+; RV64-NEXT: addiw a4, a4, -1792
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a3, 1397(a1)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: addi a0, a0, 1
+; RV64-NEXT: slli a3, a2, 16
+; RV64-NEXT: bltz a3, .LBB8_318
+; RV64-NEXT: j .LBB8_50
+; RV64-NEXT: .LBB8_318: # %cond.store139
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: lui a4, 6
+; RV64-NEXT: addiw a4, a4, -1920
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a3, 1270(a1)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: addi a0, a0, 1
+; RV64-NEXT: slli a3, a2, 15
+; RV64-NEXT: bltz a3, .LBB8_319
+; RV64-NEXT: j .LBB8_51
+; RV64-NEXT: .LBB8_319: # %cond.store142
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: li a4, 11
+; RV64-NEXT: slli a4, a4, 11
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a3, 1143(a1)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: addi a0, a0, 1
+; RV64-NEXT: slli a3, a2, 14
+; RV64-NEXT: bltz a3, .LBB8_320
+; RV64-NEXT: j .LBB8_52
+; RV64-NEXT: .LBB8_320: # %cond.store145
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: lui a4, 5
+; RV64-NEXT: addiw a4, a4, 1920
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a3, 1016(a1)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: addi a0, a0, 1
+; RV64-NEXT: slli a3, a2, 13
+; RV64-NEXT: bltz a3, .LBB8_321
+; RV64-NEXT: j .LBB8_53
+; RV64-NEXT: .LBB8_321: # %cond.store148
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: lui a4, 5
+; RV64-NEXT: addiw a4, a4, 1792
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a3, 889(a1)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: addi a0, a0, 1
+; RV64-NEXT: slli a3, a2, 12
+; RV64-NEXT: bltz a3, .LBB8_322
+; RV64-NEXT: j .LBB8_54
+; RV64-NEXT: .LBB8_322: # %cond.store151
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: lui a4, 5
+; RV64-NEXT: addiw a4, a4, 1664
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a3, 762(a1)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a2, 11
+; RV64-NEXT: bltz a3, .LBB8_323
+; RV64-NEXT: j .LBB8_55
+; RV64-NEXT: .LBB8_323: # %cond.store154
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: lui a4, 5
+; RV64-NEXT: addiw a4, a4, 1536
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a3, 635(a1)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a2, 10
+; RV64-NEXT: bltz a3, .LBB8_324
+; RV64-NEXT: j .LBB8_56
+; RV64-NEXT: .LBB8_324: # %cond.store157
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: lui a4, 5
+; RV64-NEXT: addiw a4, a4, 1408
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a3, 508(a1)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a2, 9
+; RV64-NEXT: bltz a3, .LBB8_325
+; RV64-NEXT: j .LBB8_57
+; RV64-NEXT: .LBB8_325: # %cond.store160
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: lui a4, 5
+; RV64-NEXT: addiw a4, a4, 1280
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a3, 381(a1)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a2, 8
+; RV64-NEXT: bltz a3, .LBB8_326
+; RV64-NEXT: j .LBB8_58
+; RV64-NEXT: .LBB8_326: # %cond.store163
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: lui a4, 5
+; RV64-NEXT: addiw a4, a4, 1152
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a3, 254(a1)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a2, 7
+; RV64-NEXT: bltz a3, .LBB8_327
+; RV64-NEXT: j .LBB8_59
+; RV64-NEXT: .LBB8_327: # %cond.store166
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: li a4, 21
+; RV64-NEXT: slli a4, a4, 10
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a3, 127(a1)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a2, 6
+; RV64-NEXT: bgez a3, .LBB8_515
+; RV64-NEXT: j .LBB8_60
+; RV64-NEXT: .LBB8_515: # %cond.store166
+; RV64-NEXT: j .LBB8_61
+; RV64-NEXT: .LBB8_328: # %cond.store172
+; RV64-NEXT: li a1, 128
+; RV64-NEXT: lui a4, 5
+; RV64-NEXT: addiw a4, a4, 768
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a1, 2032(a3)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a1, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a1, a2, 4
+; RV64-NEXT: bltz a1, .LBB8_329
+; RV64-NEXT: j .LBB8_63
+; RV64-NEXT: .LBB8_329: # %cond.store175
+; RV64-NEXT: li a1, 128
+; RV64-NEXT: lui a4, 5
+; RV64-NEXT: addiw a4, a4, 640
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a1, 1905(a3)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a1, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a1, a2, 3
+; RV64-NEXT: bltz a1, .LBB8_330
+; RV64-NEXT: j .LBB8_64
+; RV64-NEXT: .LBB8_330: # %cond.store178
+; RV64-NEXT: li a1, 128
+; RV64-NEXT: lui a4, 5
+; RV64-NEXT: addiw a4, a4, 512
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a1, 1778(a3)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a1, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a1, a2, 2
+; RV64-NEXT: bgez a1, .LBB8_516
+; RV64-NEXT: j .LBB8_65
+; RV64-NEXT: .LBB8_516: # %cond.store178
+; RV64-NEXT: j .LBB8_66
+; RV64-NEXT: .LBB8_331: # %cond.store187
+; RV64-NEXT: li a2, 128
+; RV64-NEXT: lui a4, 5
+; RV64-NEXT: addiw a4, a4, 128
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a2, 1397(a3)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: andi a2, a1, 1
+; RV64-NEXT: bnez a2, .LBB8_332
+; RV64-NEXT: j .LBB8_70
+; RV64-NEXT: .LBB8_332: # %cond.store190
+; RV64-NEXT: li a2, 128
+; RV64-NEXT: lui a4, 5
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a2, 1270(a3)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: andi a2, a1, 2
+; RV64-NEXT: bnez a2, .LBB8_333
+; RV64-NEXT: j .LBB8_71
+; RV64-NEXT: .LBB8_333: # %cond.store193
+; RV64-NEXT: li a2, 128
+; RV64-NEXT: lui a4, 5
+; RV64-NEXT: addiw a4, a4, -128
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a2, 1143(a3)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: andi a2, a1, 4
+; RV64-NEXT: bnez a2, .LBB8_334
+; RV64-NEXT: j .LBB8_72
+; RV64-NEXT: .LBB8_334: # %cond.store196
+; RV64-NEXT: li a2, 128
+; RV64-NEXT: lui a4, 5
+; RV64-NEXT: addiw a4, a4, -256
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a2, 1016(a3)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: andi a2, a1, 8
+; RV64-NEXT: bnez a2, .LBB8_335
+; RV64-NEXT: j .LBB8_73
+; RV64-NEXT: .LBB8_335: # %cond.store199
+; RV64-NEXT: li a2, 128
+; RV64-NEXT: lui a4, 5
+; RV64-NEXT: addiw a4, a4, -384
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a2, 889(a3)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: andi a2, a1, 16
+; RV64-NEXT: bnez a2, .LBB8_336
+; RV64-NEXT: j .LBB8_74
+; RV64-NEXT: .LBB8_336: # %cond.store202
+; RV64-NEXT: li a2, 128
+; RV64-NEXT: lui a4, 5
+; RV64-NEXT: addiw a4, a4, -512
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a2, 762(a3)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: andi a2, a1, 32
+; RV64-NEXT: bnez a2, .LBB8_337
+; RV64-NEXT: j .LBB8_75
+; RV64-NEXT: .LBB8_337: # %cond.store205
+; RV64-NEXT: li a2, 128
+; RV64-NEXT: lui a4, 5
+; RV64-NEXT: addiw a4, a4, -640
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a2, 635(a3)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: andi a2, a1, 64
+; RV64-NEXT: bnez a2, .LBB8_338
+; RV64-NEXT: j .LBB8_76
+; RV64-NEXT: .LBB8_338: # %cond.store208
+; RV64-NEXT: li a2, 128
+; RV64-NEXT: lui a4, 5
+; RV64-NEXT: addiw a4, a4, -768
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a2, 508(a3)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: andi a2, a1, 128
+; RV64-NEXT: bnez a2, .LBB8_339
+; RV64-NEXT: j .LBB8_77
+; RV64-NEXT: .LBB8_339: # %cond.store211
+; RV64-NEXT: li a2, 128
+; RV64-NEXT: lui a4, 5
+; RV64-NEXT: addiw a4, a4, -896
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a2, 381(a3)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: andi a2, a1, 256
+; RV64-NEXT: bnez a2, .LBB8_340
+; RV64-NEXT: j .LBB8_78
+; RV64-NEXT: .LBB8_340: # %cond.store214
+; RV64-NEXT: li a2, 128
+; RV64-NEXT: li a4, 19
+; RV64-NEXT: slli a4, a4, 10
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a2, 254(a3)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: andi a2, a1, 512
+; RV64-NEXT: bnez a2, .LBB8_341
+; RV64-NEXT: j .LBB8_79
+; RV64-NEXT: .LBB8_341: # %cond.store217
+; RV64-NEXT: li a2, 128
+; RV64-NEXT: lui a4, 5
+; RV64-NEXT: addiw a4, a4, -1152
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a2, 127(a3)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: andi a2, a1, 1024
+; RV64-NEXT: beqz a2, .LBB8_517
+; RV64-NEXT: j .LBB8_80
+; RV64-NEXT: .LBB8_517: # %cond.store217
+; RV64-NEXT: j .LBB8_81
+; RV64-NEXT: .LBB8_342: # %cond.store223
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: lui a4, 5
+; RV64-NEXT: addiw a4, a4, -1408
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a3, 2032(a2)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a1, 51
+; RV64-NEXT: bltz a3, .LBB8_343
+; RV64-NEXT: j .LBB8_83
+; RV64-NEXT: .LBB8_343: # %cond.store226
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: lui a4, 5
+; RV64-NEXT: addiw a4, a4, -1536
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a3, 1905(a2)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a1, 50
+; RV64-NEXT: bltz a3, .LBB8_344
+; RV64-NEXT: j .LBB8_84
+; RV64-NEXT: .LBB8_344: # %cond.store229
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: lui a4, 5
+; RV64-NEXT: addiw a4, a4, -1664
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a3, 1778(a2)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a1, 49
+; RV64-NEXT: bltz a3, .LBB8_345
+; RV64-NEXT: j .LBB8_85
+; RV64-NEXT: .LBB8_345: # %cond.store232
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: lui a4, 5
+; RV64-NEXT: addiw a4, a4, -1792
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a3, 1651(a2)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a1, 48
+; RV64-NEXT: bltz a3, .LBB8_346
+; RV64-NEXT: j .LBB8_86
+; RV64-NEXT: .LBB8_346: # %cond.store235
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: lui a4, 5
+; RV64-NEXT: addiw a4, a4, -1920
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a3, 1524(a2)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a1, 47
+; RV64-NEXT: bltz a3, .LBB8_347
+; RV64-NEXT: j .LBB8_87
+; RV64-NEXT: .LBB8_347: # %cond.store238
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: li a4, 9
+; RV64-NEXT: slli a4, a4, 11
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a3, 1397(a2)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a1, 46
+; RV64-NEXT: bltz a3, .LBB8_348
+; RV64-NEXT: j .LBB8_88
+; RV64-NEXT: .LBB8_348: # %cond.store241
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: lui a4, 4
+; RV64-NEXT: addiw a4, a4, 1920
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a3, 1270(a2)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a1, 45
+; RV64-NEXT: bltz a3, .LBB8_349
+; RV64-NEXT: j .LBB8_89
+; RV64-NEXT: .LBB8_349: # %cond.store244
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: lui a4, 4
+; RV64-NEXT: addiw a4, a4, 1792
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a3, 1143(a2)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a1, 44
+; RV64-NEXT: bltz a3, .LBB8_350
+; RV64-NEXT: j .LBB8_90
+; RV64-NEXT: .LBB8_350: # %cond.store247
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: lui a4, 4
+; RV64-NEXT: addiw a4, a4, 1664
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a3, 1016(a2)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a1, 43
+; RV64-NEXT: bltz a3, .LBB8_351
+; RV64-NEXT: j .LBB8_91
+; RV64-NEXT: .LBB8_351: # %cond.store250
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: lui a4, 4
+; RV64-NEXT: addiw a4, a4, 1536
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a3, 889(a2)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a1, 42
+; RV64-NEXT: bltz a3, .LBB8_352
+; RV64-NEXT: j .LBB8_92
+; RV64-NEXT: .LBB8_352: # %cond.store253
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: lui a4, 4
+; RV64-NEXT: addiw a4, a4, 1408
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a3, 762(a2)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a1, 41
+; RV64-NEXT: bltz a3, .LBB8_353
+; RV64-NEXT: j .LBB8_93
+; RV64-NEXT: .LBB8_353: # %cond.store256
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: lui a4, 4
+; RV64-NEXT: addiw a4, a4, 1280
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a3, 635(a2)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a1, 40
+; RV64-NEXT: bltz a3, .LBB8_354
+; RV64-NEXT: j .LBB8_94
+; RV64-NEXT: .LBB8_354: # %cond.store259
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: lui a4, 4
+; RV64-NEXT: addiw a4, a4, 1152
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a3, 508(a2)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a1, 39
+; RV64-NEXT: bltz a3, .LBB8_355
+; RV64-NEXT: j .LBB8_95
+; RV64-NEXT: .LBB8_355: # %cond.store262
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: li a4, 17
+; RV64-NEXT: slli a4, a4, 10
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a3, 381(a2)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a1, 38
+; RV64-NEXT: bltz a3, .LBB8_356
+; RV64-NEXT: j .LBB8_96
+; RV64-NEXT: .LBB8_356: # %cond.store265
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: lui a4, 4
+; RV64-NEXT: addiw a4, a4, 896
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a3, 254(a2)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a1, 37
+; RV64-NEXT: bltz a3, .LBB8_357
+; RV64-NEXT: j .LBB8_97
+; RV64-NEXT: .LBB8_357: # %cond.store268
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: lui a4, 4
+; RV64-NEXT: addiw a4, a4, 768
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a3, 127(a2)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a1, 36
+; RV64-NEXT: bgez a3, .LBB8_518
+; RV64-NEXT: j .LBB8_98
+; RV64-NEXT: .LBB8_518: # %cond.store268
+; RV64-NEXT: j .LBB8_99
+; RV64-NEXT: .LBB8_358: # %cond.store274
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: lui a4, 4
+; RV64-NEXT: addiw a4, a4, 512
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a3, 2032(a2)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a1, 34
+; RV64-NEXT: bltz a3, .LBB8_359
+; RV64-NEXT: j .LBB8_101
+; RV64-NEXT: .LBB8_359: # %cond.store277
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: lui a4, 4
+; RV64-NEXT: addiw a4, a4, 384
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a3, 1905(a2)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a1, 33
+; RV64-NEXT: bltz a3, .LBB8_360
+; RV64-NEXT: j .LBB8_102
+; RV64-NEXT: .LBB8_360: # %cond.store280
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: lui a4, 4
+; RV64-NEXT: addiw a4, a4, 256
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a3, 1778(a2)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a1, 32
+; RV64-NEXT: bltz a3, .LBB8_361
+; RV64-NEXT: j .LBB8_103
+; RV64-NEXT: .LBB8_361: # %cond.store283
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: lui a4, 4
+; RV64-NEXT: addiw a4, a4, 128
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a3, 1651(a2)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a1, 31
+; RV64-NEXT: bltz a3, .LBB8_362
+; RV64-NEXT: j .LBB8_104
+; RV64-NEXT: .LBB8_362: # %cond.store286
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: lui a4, 4
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a3, 1524(a2)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a1, 30
+; RV64-NEXT: bltz a3, .LBB8_363
+; RV64-NEXT: j .LBB8_105
+; RV64-NEXT: .LBB8_363: # %cond.store289
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: lui a4, 4
+; RV64-NEXT: addiw a4, a4, -128
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a3, 1397(a2)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a1, 29
+; RV64-NEXT: bltz a3, .LBB8_364
+; RV64-NEXT: j .LBB8_106
+; RV64-NEXT: .LBB8_364: # %cond.store292
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: lui a4, 4
+; RV64-NEXT: addiw a4, a4, -256
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a3, 1270(a2)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a1, 28
+; RV64-NEXT: bltz a3, .LBB8_365
+; RV64-NEXT: j .LBB8_107
+; RV64-NEXT: .LBB8_365: # %cond.store295
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: lui a4, 4
+; RV64-NEXT: addiw a4, a4, -384
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a3, 1143(a2)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a1, 27
+; RV64-NEXT: bltz a3, .LBB8_366
+; RV64-NEXT: j .LBB8_108
+; RV64-NEXT: .LBB8_366: # %cond.store298
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: li a4, 31
+; RV64-NEXT: slli a4, a4, 9
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a3, 1016(a2)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a1, 26
+; RV64-NEXT: bltz a3, .LBB8_367
+; RV64-NEXT: j .LBB8_109
+; RV64-NEXT: .LBB8_367: # %cond.store301
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: lui a4, 4
+; RV64-NEXT: addiw a4, a4, -640
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a3, 889(a2)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a1, 25
+; RV64-NEXT: bltz a3, .LBB8_368
+; RV64-NEXT: j .LBB8_110
+; RV64-NEXT: .LBB8_368: # %cond.store304
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: lui a4, 4
+; RV64-NEXT: addiw a4, a4, -768
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a3, 762(a2)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a1, 24
+; RV64-NEXT: bltz a3, .LBB8_369
+; RV64-NEXT: j .LBB8_111
+; RV64-NEXT: .LBB8_369: # %cond.store307
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: lui a4, 4
+; RV64-NEXT: addiw a4, a4, -896
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a3, 635(a2)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a1, 23
+; RV64-NEXT: bltz a3, .LBB8_370
+; RV64-NEXT: j .LBB8_112
+; RV64-NEXT: .LBB8_370: # %cond.store310
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: li a4, 15
+; RV64-NEXT: slli a4, a4, 10
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a3, 508(a2)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a1, 22
+; RV64-NEXT: bltz a3, .LBB8_371
+; RV64-NEXT: j .LBB8_113
+; RV64-NEXT: .LBB8_371: # %cond.store313
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: lui a4, 4
+; RV64-NEXT: addiw a4, a4, -1152
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a3, 381(a2)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a1, 21
+; RV64-NEXT: bltz a3, .LBB8_372
+; RV64-NEXT: j .LBB8_114
+; RV64-NEXT: .LBB8_372: # %cond.store316
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: lui a4, 4
+; RV64-NEXT: addiw a4, a4, -1280
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a3, 254(a2)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a1, 20
+; RV64-NEXT: bltz a3, .LBB8_373
+; RV64-NEXT: j .LBB8_115
+; RV64-NEXT: .LBB8_373: # %cond.store319
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: lui a4, 4
+; RV64-NEXT: addiw a4, a4, -1408
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a3, 127(a2)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a1, 19
+; RV64-NEXT: bgez a3, .LBB8_519
+; RV64-NEXT: j .LBB8_116
+; RV64-NEXT: .LBB8_519: # %cond.store319
+; RV64-NEXT: j .LBB8_117
+; RV64-NEXT: .LBB8_374: # %cond.store325
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: lui a4, 4
+; RV64-NEXT: addiw a4, a4, -1664
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a3, 2032(a2)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a1, 17
+; RV64-NEXT: bltz a3, .LBB8_375
+; RV64-NEXT: j .LBB8_119
+; RV64-NEXT: .LBB8_375: # %cond.store328
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: lui a4, 4
+; RV64-NEXT: addiw a4, a4, -1792
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a3, 1905(a2)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a1, 16
+; RV64-NEXT: bltz a3, .LBB8_376
+; RV64-NEXT: j .LBB8_120
+; RV64-NEXT: .LBB8_376: # %cond.store331
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: lui a4, 4
+; RV64-NEXT: addiw a4, a4, -1920
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a3, 1778(a2)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a1, 15
+; RV64-NEXT: bltz a3, .LBB8_377
+; RV64-NEXT: j .LBB8_121
+; RV64-NEXT: .LBB8_377: # %cond.store334
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: li a4, 7
+; RV64-NEXT: slli a4, a4, 11
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a3, 1651(a2)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a1, 14
+; RV64-NEXT: bltz a3, .LBB8_378
+; RV64-NEXT: j .LBB8_122
+; RV64-NEXT: .LBB8_378: # %cond.store337
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: lui a4, 3
+; RV64-NEXT: addiw a4, a4, 1920
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a3, 1524(a2)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a1, 13
+; RV64-NEXT: bltz a3, .LBB8_379
+; RV64-NEXT: j .LBB8_123
+; RV64-NEXT: .LBB8_379: # %cond.store340
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: lui a4, 3
+; RV64-NEXT: addiw a4, a4, 1792
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a3, 1397(a2)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a1, 12
+; RV64-NEXT: bltz a3, .LBB8_380
+; RV64-NEXT: j .LBB8_124
+; RV64-NEXT: .LBB8_380: # %cond.store343
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: lui a4, 3
+; RV64-NEXT: addiw a4, a4, 1664
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a3, 1270(a2)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a1, 11
+; RV64-NEXT: bltz a3, .LBB8_381
+; RV64-NEXT: j .LBB8_125
+; RV64-NEXT: .LBB8_381: # %cond.store346
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: li a4, 27
+; RV64-NEXT: slli a4, a4, 9
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a3, 1143(a2)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a1, 10
+; RV64-NEXT: bltz a3, .LBB8_382
+; RV64-NEXT: j .LBB8_126
+; RV64-NEXT: .LBB8_382: # %cond.store349
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: lui a4, 3
+; RV64-NEXT: addiw a4, a4, 1408
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a3, 1016(a2)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a1, 9
+; RV64-NEXT: bltz a3, .LBB8_383
+; RV64-NEXT: j .LBB8_127
+; RV64-NEXT: .LBB8_383: # %cond.store352
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: lui a4, 3
+; RV64-NEXT: addiw a4, a4, 1280
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a3, 889(a2)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a1, 8
+; RV64-NEXT: bltz a3, .LBB8_384
+; RV64-NEXT: j .LBB8_128
+; RV64-NEXT: .LBB8_384: # %cond.store355
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: lui a4, 3
+; RV64-NEXT: addiw a4, a4, 1152
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a3, 762(a2)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a1, 7
+; RV64-NEXT: bltz a3, .LBB8_385
+; RV64-NEXT: j .LBB8_129
+; RV64-NEXT: .LBB8_385: # %cond.store358
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: li a4, 13
+; RV64-NEXT: slli a4, a4, 10
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a3, 635(a2)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a1, 6
+; RV64-NEXT: bltz a3, .LBB8_386
+; RV64-NEXT: j .LBB8_130
+; RV64-NEXT: .LBB8_386: # %cond.store361
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: lui a4, 3
+; RV64-NEXT: addiw a4, a4, 896
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a3, 508(a2)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a1, 5
+; RV64-NEXT: bltz a3, .LBB8_387
+; RV64-NEXT: j .LBB8_131
+; RV64-NEXT: .LBB8_387: # %cond.store364
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: lui a4, 3
+; RV64-NEXT: addiw a4, a4, 768
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a3, 381(a2)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a1, 4
+; RV64-NEXT: bltz a3, .LBB8_388
+; RV64-NEXT: j .LBB8_132
+; RV64-NEXT: .LBB8_388: # %cond.store367
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: lui a4, 3
+; RV64-NEXT: addiw a4, a4, 640
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a3, 254(a2)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a1, 3
+; RV64-NEXT: bltz a3, .LBB8_389
+; RV64-NEXT: j .LBB8_133
+; RV64-NEXT: .LBB8_389: # %cond.store370
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: li a4, 25
+; RV64-NEXT: slli a4, a4, 9
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a3, 127(a2)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a1, 2
+; RV64-NEXT: bgez a3, .LBB8_520
+; RV64-NEXT: j .LBB8_134
+; RV64-NEXT: .LBB8_520: # %cond.store370
+; RV64-NEXT: j .LBB8_135
+; RV64-NEXT: .LBB8_390: # %cond.store379
+; RV64-NEXT: li a1, 128
+; RV64-NEXT: lui a4, 3
+; RV64-NEXT: addiw a4, a4, 128
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a1, 1874(a3)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a1, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: andi a1, a2, 1
+; RV64-NEXT: bnez a1, .LBB8_391
+; RV64-NEXT: j .LBB8_139
+; RV64-NEXT: .LBB8_391: # %cond.store382
+; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64-NEXT: addi a1, a0, 1
+; RV64-NEXT: vse8.v v24, (a0)
+; RV64-NEXT: mv a0, a1
+; RV64-NEXT: andi a1, a2, 2
+; RV64-NEXT: bnez a1, .LBB8_392
+; RV64-NEXT: j .LBB8_140
+; RV64-NEXT: .LBB8_392: # %cond.store385
+; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64-NEXT: vslidedown.vi v9, v24, 1
+; RV64-NEXT: addi a1, a0, 1
+; RV64-NEXT: vse8.v v9, (a0)
+; RV64-NEXT: mv a0, a1
+; RV64-NEXT: andi a1, a2, 4
+; RV64-NEXT: bnez a1, .LBB8_393
+; RV64-NEXT: j .LBB8_141
+; RV64-NEXT: .LBB8_393: # %cond.store388
+; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64-NEXT: vslidedown.vi v9, v24, 2
+; RV64-NEXT: addi a1, a0, 1
+; RV64-NEXT: vse8.v v9, (a0)
+; RV64-NEXT: mv a0, a1
+; RV64-NEXT: andi a1, a2, 8
+; RV64-NEXT: bnez a1, .LBB8_394
+; RV64-NEXT: j .LBB8_142
+; RV64-NEXT: .LBB8_394: # %cond.store391
+; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64-NEXT: vslidedown.vi v9, v24, 3
+; RV64-NEXT: addi a1, a0, 1
+; RV64-NEXT: vse8.v v9, (a0)
+; RV64-NEXT: mv a0, a1
+; RV64-NEXT: andi a1, a2, 16
+; RV64-NEXT: bnez a1, .LBB8_395
+; RV64-NEXT: j .LBB8_143
+; RV64-NEXT: .LBB8_395: # %cond.store394
+; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64-NEXT: vslidedown.vi v9, v24, 4
+; RV64-NEXT: addi a1, a0, 1
+; RV64-NEXT: vse8.v v9, (a0)
+; RV64-NEXT: mv a0, a1
+; RV64-NEXT: andi a1, a2, 32
+; RV64-NEXT: bnez a1, .LBB8_396
+; RV64-NEXT: j .LBB8_144
+; RV64-NEXT: .LBB8_396: # %cond.store397
+; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64-NEXT: vslidedown.vi v9, v24, 5
+; RV64-NEXT: addi a1, a0, 1
+; RV64-NEXT: vse8.v v9, (a0)
+; RV64-NEXT: mv a0, a1
+; RV64-NEXT: andi a1, a2, 64
+; RV64-NEXT: bnez a1, .LBB8_397
+; RV64-NEXT: j .LBB8_145
+; RV64-NEXT: .LBB8_397: # %cond.store400
+; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64-NEXT: vslidedown.vi v9, v24, 6
+; RV64-NEXT: addi a1, a0, 1
+; RV64-NEXT: vse8.v v9, (a0)
+; RV64-NEXT: mv a0, a1
+; RV64-NEXT: andi a1, a2, 128
+; RV64-NEXT: bnez a1, .LBB8_398
+; RV64-NEXT: j .LBB8_146
+; RV64-NEXT: .LBB8_398: # %cond.store403
+; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64-NEXT: vslidedown.vi v9, v24, 7
+; RV64-NEXT: addi a1, a0, 1
+; RV64-NEXT: vse8.v v9, (a0)
+; RV64-NEXT: mv a0, a1
+; RV64-NEXT: andi a1, a2, 256
+; RV64-NEXT: bnez a1, .LBB8_399
+; RV64-NEXT: j .LBB8_147
+; RV64-NEXT: .LBB8_399: # %cond.store406
+; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64-NEXT: vslidedown.vi v9, v24, 8
+; RV64-NEXT: addi a1, a0, 1
+; RV64-NEXT: vse8.v v9, (a0)
+; RV64-NEXT: mv a0, a1
+; RV64-NEXT: andi a1, a2, 512
+; RV64-NEXT: bnez a1, .LBB8_400
+; RV64-NEXT: j .LBB8_148
+; RV64-NEXT: .LBB8_400: # %cond.store409
+; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64-NEXT: vslidedown.vi v9, v24, 9
+; RV64-NEXT: addi a1, a0, 1
+; RV64-NEXT: vse8.v v9, (a0)
+; RV64-NEXT: mv a0, a1
+; RV64-NEXT: andi a1, a2, 1024
+; RV64-NEXT: bnez a1, .LBB8_401
+; RV64-NEXT: j .LBB8_149
+; RV64-NEXT: .LBB8_401: # %cond.store412
+; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64-NEXT: vslidedown.vi v9, v24, 10
+; RV64-NEXT: addi a1, a0, 1
+; RV64-NEXT: vse8.v v9, (a0)
+; RV64-NEXT: mv a0, a1
+; RV64-NEXT: slli a1, a2, 52
+; RV64-NEXT: bltz a1, .LBB8_402
+; RV64-NEXT: j .LBB8_150
+; RV64-NEXT: .LBB8_402: # %cond.store415
+; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64-NEXT: vslidedown.vi v9, v24, 11
+; RV64-NEXT: addi a1, a0, 1
+; RV64-NEXT: vse8.v v9, (a0)
+; RV64-NEXT: mv a0, a1
+; RV64-NEXT: slli a1, a2, 51
+; RV64-NEXT: bltz a1, .LBB8_403
+; RV64-NEXT: j .LBB8_151
+; RV64-NEXT: .LBB8_403: # %cond.store418
+; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64-NEXT: vslidedown.vi v9, v24, 12
+; RV64-NEXT: addi a1, a0, 1
+; RV64-NEXT: vse8.v v9, (a0)
+; RV64-NEXT: mv a0, a1
+; RV64-NEXT: slli a1, a2, 50
+; RV64-NEXT: bltz a1, .LBB8_404
+; RV64-NEXT: j .LBB8_152
+; RV64-NEXT: .LBB8_404: # %cond.store421
+; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64-NEXT: vslidedown.vi v9, v24, 13
+; RV64-NEXT: addi a1, a0, 1
+; RV64-NEXT: vse8.v v9, (a0)
+; RV64-NEXT: mv a0, a1
+; RV64-NEXT: slli a1, a2, 49
+; RV64-NEXT: bltz a1, .LBB8_405
+; RV64-NEXT: j .LBB8_153
+; RV64-NEXT: .LBB8_405: # %cond.store424
+; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64-NEXT: vslidedown.vi v9, v24, 14
+; RV64-NEXT: addi a1, a0, 1
+; RV64-NEXT: vse8.v v9, (a0)
+; RV64-NEXT: mv a0, a1
+; RV64-NEXT: slli a1, a2, 48
+; RV64-NEXT: bltz a1, .LBB8_406
+; RV64-NEXT: j .LBB8_154
+; RV64-NEXT: .LBB8_406: # %cond.store427
+; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64-NEXT: vslidedown.vi v9, v24, 15
+; RV64-NEXT: addi a1, a0, 1
+; RV64-NEXT: vse8.v v9, (a0)
+; RV64-NEXT: mv a0, a1
+; RV64-NEXT: slli a1, a2, 47
+; RV64-NEXT: bltz a1, .LBB8_407
+; RV64-NEXT: j .LBB8_155
+; RV64-NEXT: .LBB8_407: # %cond.store430
+; RV64-NEXT: vsetivli zero, 1, e8, m2, ta, ma
+; RV64-NEXT: vslidedown.vi v10, v24, 16
+; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64-NEXT: addi a1, a0, 1
+; RV64-NEXT: vse8.v v10, (a0)
+; RV64-NEXT: mv a0, a1
+; RV64-NEXT: slli a1, a2, 46
+; RV64-NEXT: bltz a1, .LBB8_408
+; RV64-NEXT: j .LBB8_156
+; RV64-NEXT: .LBB8_408: # %cond.store433
+; RV64-NEXT: vsetivli zero, 1, e8, m2, ta, ma
+; RV64-NEXT: vslidedown.vi v10, v24, 17
+; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64-NEXT: addi a1, a0, 1
+; RV64-NEXT: vse8.v v10, (a0)
+; RV64-NEXT: mv a0, a1
+; RV64-NEXT: slli a1, a2, 45
+; RV64-NEXT: bltz a1, .LBB8_409
+; RV64-NEXT: j .LBB8_157
+; RV64-NEXT: .LBB8_409: # %cond.store436
+; RV64-NEXT: vsetivli zero, 1, e8, m2, ta, ma
+; RV64-NEXT: vslidedown.vi v10, v24, 18
+; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64-NEXT: addi a1, a0, 1
+; RV64-NEXT: vse8.v v10, (a0)
+; RV64-NEXT: mv a0, a1
+; RV64-NEXT: slli a1, a2, 44
+; RV64-NEXT: bltz a1, .LBB8_410
+; RV64-NEXT: j .LBB8_158
+; RV64-NEXT: .LBB8_410: # %cond.store439
+; RV64-NEXT: vsetivli zero, 1, e8, m2, ta, ma
+; RV64-NEXT: vslidedown.vi v10, v24, 19
+; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64-NEXT: addi a1, a0, 1
+; RV64-NEXT: vse8.v v10, (a0)
+; RV64-NEXT: mv a0, a1
+; RV64-NEXT: slli a1, a2, 43
+; RV64-NEXT: bltz a1, .LBB8_411
+; RV64-NEXT: j .LBB8_159
+; RV64-NEXT: .LBB8_411: # %cond.store442
+; RV64-NEXT: vsetivli zero, 1, e8, m2, ta, ma
+; RV64-NEXT: vslidedown.vi v10, v24, 20
+; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64-NEXT: addi a1, a0, 1
+; RV64-NEXT: vse8.v v10, (a0)
+; RV64-NEXT: mv a0, a1
+; RV64-NEXT: slli a1, a2, 42
+; RV64-NEXT: bltz a1, .LBB8_412
+; RV64-NEXT: j .LBB8_160
+; RV64-NEXT: .LBB8_412: # %cond.store445
+; RV64-NEXT: vsetivli zero, 1, e8, m2, ta, ma
+; RV64-NEXT: vslidedown.vi v10, v24, 21
+; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64-NEXT: addi a1, a0, 1
+; RV64-NEXT: vse8.v v10, (a0)
+; RV64-NEXT: mv a0, a1
+; RV64-NEXT: slli a1, a2, 41
+; RV64-NEXT: bltz a1, .LBB8_413
+; RV64-NEXT: j .LBB8_161
+; RV64-NEXT: .LBB8_413: # %cond.store448
+; RV64-NEXT: vsetivli zero, 1, e8, m2, ta, ma
+; RV64-NEXT: vslidedown.vi v10, v24, 22
+; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64-NEXT: addi a1, a0, 1
+; RV64-NEXT: vse8.v v10, (a0)
+; RV64-NEXT: mv a0, a1
+; RV64-NEXT: slli a1, a2, 40
+; RV64-NEXT: bltz a1, .LBB8_414
+; RV64-NEXT: j .LBB8_162
+; RV64-NEXT: .LBB8_414: # %cond.store451
+; RV64-NEXT: vsetivli zero, 1, e8, m2, ta, ma
+; RV64-NEXT: vslidedown.vi v10, v24, 23
+; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64-NEXT: addi a1, a0, 1
+; RV64-NEXT: vse8.v v10, (a0)
+; RV64-NEXT: mv a0, a1
+; RV64-NEXT: slli a1, a2, 39
+; RV64-NEXT: bltz a1, .LBB8_415
+; RV64-NEXT: j .LBB8_163
+; RV64-NEXT: .LBB8_415: # %cond.store454
+; RV64-NEXT: vsetivli zero, 1, e8, m2, ta, ma
+; RV64-NEXT: vslidedown.vi v10, v24, 24
+; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64-NEXT: addi a1, a0, 1
+; RV64-NEXT: vse8.v v10, (a0)
+; RV64-NEXT: mv a0, a1
+; RV64-NEXT: slli a1, a2, 38
+; RV64-NEXT: bltz a1, .LBB8_416
+; RV64-NEXT: j .LBB8_164
+; RV64-NEXT: .LBB8_416: # %cond.store457
+; RV64-NEXT: vsetivli zero, 1, e8, m2, ta, ma
+; RV64-NEXT: vslidedown.vi v10, v24, 25
+; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64-NEXT: addi a1, a0, 1
+; RV64-NEXT: vse8.v v10, (a0)
+; RV64-NEXT: mv a0, a1
+; RV64-NEXT: slli a1, a2, 37
+; RV64-NEXT: bltz a1, .LBB8_417
+; RV64-NEXT: j .LBB8_165
+; RV64-NEXT: .LBB8_417: # %cond.store460
+; RV64-NEXT: vsetivli zero, 1, e8, m2, ta, ma
+; RV64-NEXT: vslidedown.vi v10, v24, 26
+; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64-NEXT: addi a1, a0, 1
+; RV64-NEXT: vse8.v v10, (a0)
+; RV64-NEXT: mv a0, a1
+; RV64-NEXT: slli a1, a2, 36
+; RV64-NEXT: bltz a1, .LBB8_418
+; RV64-NEXT: j .LBB8_166
+; RV64-NEXT: .LBB8_418: # %cond.store463
+; RV64-NEXT: vsetivli zero, 1, e8, m2, ta, ma
+; RV64-NEXT: vslidedown.vi v10, v24, 27
+; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64-NEXT: addi a1, a0, 1
+; RV64-NEXT: vse8.v v10, (a0)
+; RV64-NEXT: mv a0, a1
+; RV64-NEXT: slli a1, a2, 35
+; RV64-NEXT: bltz a1, .LBB8_419
+; RV64-NEXT: j .LBB8_167
+; RV64-NEXT: .LBB8_419: # %cond.store466
+; RV64-NEXT: vsetivli zero, 1, e8, m2, ta, ma
+; RV64-NEXT: vslidedown.vi v10, v24, 28
+; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64-NEXT: addi a1, a0, 1
+; RV64-NEXT: vse8.v v10, (a0)
+; RV64-NEXT: mv a0, a1
+; RV64-NEXT: slli a1, a2, 34
+; RV64-NEXT: bltz a1, .LBB8_420
+; RV64-NEXT: j .LBB8_168
+; RV64-NEXT: .LBB8_420: # %cond.store469
+; RV64-NEXT: vsetivli zero, 1, e8, m2, ta, ma
+; RV64-NEXT: vslidedown.vi v10, v24, 29
+; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64-NEXT: addi a1, a0, 1
+; RV64-NEXT: vse8.v v10, (a0)
+; RV64-NEXT: mv a0, a1
+; RV64-NEXT: slli a1, a2, 33
+; RV64-NEXT: bltz a1, .LBB8_421
+; RV64-NEXT: j .LBB8_169
+; RV64-NEXT: .LBB8_421: # %cond.store472
+; RV64-NEXT: vsetivli zero, 1, e8, m2, ta, ma
+; RV64-NEXT: vslidedown.vi v10, v24, 30
+; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64-NEXT: addi a1, a0, 1
+; RV64-NEXT: vse8.v v10, (a0)
+; RV64-NEXT: mv a0, a1
+; RV64-NEXT: slli a1, a2, 32
+; RV64-NEXT: bltz a1, .LBB8_422
+; RV64-NEXT: j .LBB8_170
+; RV64-NEXT: .LBB8_422: # %cond.store475
+; RV64-NEXT: vsetivli zero, 1, e8, m2, ta, ma
+; RV64-NEXT: vslidedown.vi v10, v24, 31
+; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64-NEXT: addi a1, a0, 1
+; RV64-NEXT: vse8.v v10, (a0)
+; RV64-NEXT: mv a0, a1
+; RV64-NEXT: slli a1, a2, 31
+; RV64-NEXT: bltz a1, .LBB8_423
+; RV64-NEXT: j .LBB8_171
+; RV64-NEXT: .LBB8_423: # %cond.store478
+; RV64-NEXT: li a1, 128
+; RV64-NEXT: lui a4, 3
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v24, (a4)
+; RV64-NEXT: lbu a1, 1651(a3)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a1, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a1, a2, 30
+; RV64-NEXT: bltz a1, .LBB8_424
+; RV64-NEXT: j .LBB8_172
+; RV64-NEXT: .LBB8_424: # %cond.store481
+; RV64-NEXT: li a1, 128
+; RV64-NEXT: lui a4, 3
+; RV64-NEXT: addiw a4, a4, -128
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v24, (a4)
+; RV64-NEXT: lbu a1, 1524(a3)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a1, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a1, a2, 29
+; RV64-NEXT: bltz a1, .LBB8_425
+; RV64-NEXT: j .LBB8_173
+; RV64-NEXT: .LBB8_425: # %cond.store484
+; RV64-NEXT: li a1, 128
+; RV64-NEXT: lui a4, 3
+; RV64-NEXT: addiw a4, a4, -256
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v24, (a4)
+; RV64-NEXT: lbu a1, 1397(a3)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a1, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a1, a2, 28
+; RV64-NEXT: bltz a1, .LBB8_426
+; RV64-NEXT: j .LBB8_174
+; RV64-NEXT: .LBB8_426: # %cond.store487
+; RV64-NEXT: li a1, 128
+; RV64-NEXT: lui a4, 3
+; RV64-NEXT: addiw a4, a4, -384
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v24, (a4)
+; RV64-NEXT: lbu a1, 1270(a3)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a1, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a1, a2, 27
+; RV64-NEXT: bltz a1, .LBB8_427
+; RV64-NEXT: j .LBB8_175
+; RV64-NEXT: .LBB8_427: # %cond.store490
+; RV64-NEXT: li a1, 128
+; RV64-NEXT: li a4, 23
+; RV64-NEXT: slli a4, a4, 9
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v24, (a4)
+; RV64-NEXT: lbu a1, 1143(a3)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a1, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a1, a2, 26
+; RV64-NEXT: bltz a1, .LBB8_428
+; RV64-NEXT: j .LBB8_176
+; RV64-NEXT: .LBB8_428: # %cond.store493
+; RV64-NEXT: li a1, 128
+; RV64-NEXT: lui a4, 3
+; RV64-NEXT: addiw a4, a4, -640
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v24, (a4)
+; RV64-NEXT: lbu a1, 1016(a3)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a1, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a1, a2, 25
+; RV64-NEXT: bltz a1, .LBB8_429
+; RV64-NEXT: j .LBB8_177
+; RV64-NEXT: .LBB8_429: # %cond.store496
+; RV64-NEXT: li a1, 128
+; RV64-NEXT: lui a4, 3
+; RV64-NEXT: addiw a4, a4, -768
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v24, (a4)
+; RV64-NEXT: lbu a1, 889(a3)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a1, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a1, a2, 24
+; RV64-NEXT: bltz a1, .LBB8_430
+; RV64-NEXT: j .LBB8_178
+; RV64-NEXT: .LBB8_430: # %cond.store499
+; RV64-NEXT: li a1, 128
+; RV64-NEXT: lui a4, 3
+; RV64-NEXT: addiw a4, a4, -896
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v24, (a4)
+; RV64-NEXT: lbu a1, 762(a3)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a1, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a1, a2, 23
+; RV64-NEXT: bltz a1, .LBB8_431
+; RV64-NEXT: j .LBB8_179
+; RV64-NEXT: .LBB8_431: # %cond.store502
+; RV64-NEXT: li a1, 128
+; RV64-NEXT: li a4, 11
+; RV64-NEXT: slli a4, a4, 10
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v24, (a4)
+; RV64-NEXT: lbu a1, 635(a3)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a1, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a1, a2, 22
+; RV64-NEXT: bltz a1, .LBB8_432
+; RV64-NEXT: j .LBB8_180
+; RV64-NEXT: .LBB8_432: # %cond.store505
+; RV64-NEXT: li a1, 128
+; RV64-NEXT: lui a4, 3
+; RV64-NEXT: addiw a4, a4, -1152
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v24, (a4)
+; RV64-NEXT: lbu a1, 508(a3)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a1, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a1, a2, 21
+; RV64-NEXT: bltz a1, .LBB8_433
+; RV64-NEXT: j .LBB8_181
+; RV64-NEXT: .LBB8_433: # %cond.store508
+; RV64-NEXT: li a1, 128
+; RV64-NEXT: lui a4, 3
+; RV64-NEXT: addiw a4, a4, -1280
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v24, (a4)
+; RV64-NEXT: lbu a1, 381(a3)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a1, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a1, a2, 20
+; RV64-NEXT: bltz a1, .LBB8_434
+; RV64-NEXT: j .LBB8_182
+; RV64-NEXT: .LBB8_434: # %cond.store511
+; RV64-NEXT: li a1, 128
+; RV64-NEXT: lui a4, 3
+; RV64-NEXT: addiw a4, a4, -1408
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v24, (a4)
+; RV64-NEXT: lbu a1, 254(a3)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a1, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a1, a2, 19
+; RV64-NEXT: bltz a1, .LBB8_435
+; RV64-NEXT: j .LBB8_183
+; RV64-NEXT: .LBB8_435: # %cond.store514
+; RV64-NEXT: li a1, 128
+; RV64-NEXT: li a4, 21
+; RV64-NEXT: slli a4, a4, 9
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v24, (a4)
+; RV64-NEXT: lbu a1, 127(a3)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a1, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a1, a2, 18
+; RV64-NEXT: bgez a1, .LBB8_521
+; RV64-NEXT: j .LBB8_184
+; RV64-NEXT: .LBB8_521: # %cond.store514
+; RV64-NEXT: j .LBB8_185
+; RV64-NEXT: .LBB8_436: # %cond.store520
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: lui a4, 3
+; RV64-NEXT: addiw a4, a4, -1792
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v24, (a4)
+; RV64-NEXT: lbu a3, 2032(a1)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a2, 16
+; RV64-NEXT: bltz a3, .LBB8_437
+; RV64-NEXT: j .LBB8_187
+; RV64-NEXT: .LBB8_437: # %cond.store523
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: lui a4, 3
+; RV64-NEXT: addiw a4, a4, -1920
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v24, (a4)
+; RV64-NEXT: lbu a3, 1905(a1)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a2, 15
+; RV64-NEXT: bltz a3, .LBB8_438
+; RV64-NEXT: j .LBB8_188
+; RV64-NEXT: .LBB8_438: # %cond.store526
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: li a4, 5
+; RV64-NEXT: slli a4, a4, 11
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v24, (a4)
+; RV64-NEXT: lbu a3, 1778(a1)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a2, 14
+; RV64-NEXT: bltz a3, .LBB8_439
+; RV64-NEXT: j .LBB8_189
+; RV64-NEXT: .LBB8_439: # %cond.store529
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: lui a4, 2
+; RV64-NEXT: addiw a4, a4, 1920
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v24, (a4)
+; RV64-NEXT: lbu a3, 1651(a1)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a2, 13
+; RV64-NEXT: bltz a3, .LBB8_440
+; RV64-NEXT: j .LBB8_190
+; RV64-NEXT: .LBB8_440: # %cond.store532
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: lui a4, 2
+; RV64-NEXT: addiw a4, a4, 1792
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v24, (a4)
+; RV64-NEXT: lbu a3, 1524(a1)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a2, 12
+; RV64-NEXT: bltz a3, .LBB8_441
+; RV64-NEXT: j .LBB8_191
+; RV64-NEXT: .LBB8_441: # %cond.store535
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: lui a4, 2
+; RV64-NEXT: addiw a4, a4, 1664
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v24, (a4)
+; RV64-NEXT: lbu a3, 1397(a1)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a2, 11
+; RV64-NEXT: bltz a3, .LBB8_442
+; RV64-NEXT: j .LBB8_192
+; RV64-NEXT: .LBB8_442: # %cond.store538
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: li a4, 19
+; RV64-NEXT: slli a4, a4, 9
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v24, (a4)
+; RV64-NEXT: lbu a3, 1270(a1)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a2, 10
+; RV64-NEXT: bltz a3, .LBB8_443
+; RV64-NEXT: j .LBB8_193
+; RV64-NEXT: .LBB8_443: # %cond.store541
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: lui a4, 2
+; RV64-NEXT: addiw a4, a4, 1408
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v24, (a4)
+; RV64-NEXT: lbu a3, 1143(a1)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a2, 9
+; RV64-NEXT: bltz a3, .LBB8_444
+; RV64-NEXT: j .LBB8_194
+; RV64-NEXT: .LBB8_444: # %cond.store544
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: lui a4, 2
+; RV64-NEXT: addiw a4, a4, 1280
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v24, (a4)
+; RV64-NEXT: lbu a3, 1016(a1)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a2, 8
+; RV64-NEXT: bltz a3, .LBB8_445
+; RV64-NEXT: j .LBB8_195
+; RV64-NEXT: .LBB8_445: # %cond.store547
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: lui a4, 2
+; RV64-NEXT: addiw a4, a4, 1152
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v24, (a4)
+; RV64-NEXT: lbu a3, 889(a1)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a2, 7
+; RV64-NEXT: bltz a3, .LBB8_446
+; RV64-NEXT: j .LBB8_196
+; RV64-NEXT: .LBB8_446: # %cond.store550
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: li a4, 9
+; RV64-NEXT: slli a4, a4, 10
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v24, (a4)
+; RV64-NEXT: lbu a3, 762(a1)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a2, 6
+; RV64-NEXT: bltz a3, .LBB8_447
+; RV64-NEXT: j .LBB8_197
+; RV64-NEXT: .LBB8_447: # %cond.store553
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: lui a4, 2
+; RV64-NEXT: addiw a4, a4, 896
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v24, (a4)
+; RV64-NEXT: lbu a3, 635(a1)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a2, 5
+; RV64-NEXT: bltz a3, .LBB8_448
+; RV64-NEXT: j .LBB8_198
+; RV64-NEXT: .LBB8_448: # %cond.store556
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: lui a4, 2
+; RV64-NEXT: addiw a4, a4, 768
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v24, (a4)
+; RV64-NEXT: lbu a3, 508(a1)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a2, 4
+; RV64-NEXT: bltz a3, .LBB8_449
+; RV64-NEXT: j .LBB8_199
+; RV64-NEXT: .LBB8_449: # %cond.store559
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: lui a4, 2
+; RV64-NEXT: addiw a4, a4, 640
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v24, (a4)
+; RV64-NEXT: lbu a3, 381(a1)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a2, 3
+; RV64-NEXT: bltz a3, .LBB8_450
+; RV64-NEXT: j .LBB8_200
+; RV64-NEXT: .LBB8_450: # %cond.store562
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: li a4, 17
+; RV64-NEXT: slli a4, a4, 9
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v24, (a4)
+; RV64-NEXT: lbu a3, 254(a1)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a2, 2
+; RV64-NEXT: bgez a3, .LBB8_522
+; RV64-NEXT: j .LBB8_201
+; RV64-NEXT: .LBB8_522: # %cond.store562
+; RV64-NEXT: j .LBB8_202
+; RV64-NEXT: .LBB8_451: # %cond.store571
+; RV64-NEXT: li a2, 128
+; RV64-NEXT: lui a4, 2
+; RV64-NEXT: addiw a4, a4, 128
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v24, (a4)
+; RV64-NEXT: lbu a2, 2032(a3)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: andi a2, a1, 1
+; RV64-NEXT: bnez a2, .LBB8_452
+; RV64-NEXT: j .LBB8_206
+; RV64-NEXT: .LBB8_452: # %cond.store574
+; RV64-NEXT: li a2, 128
+; RV64-NEXT: lui a4, 2
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v24, (a4)
+; RV64-NEXT: lbu a2, 1905(a3)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: andi a2, a1, 2
+; RV64-NEXT: bnez a2, .LBB8_453
+; RV64-NEXT: j .LBB8_207
+; RV64-NEXT: .LBB8_453: # %cond.store577
+; RV64-NEXT: li a2, 128
+; RV64-NEXT: lui a4, 2
+; RV64-NEXT: addiw a4, a4, -128
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v24, (a4)
+; RV64-NEXT: lbu a2, 1778(a3)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: andi a2, a1, 4
+; RV64-NEXT: bnez a2, .LBB8_454
+; RV64-NEXT: j .LBB8_208
+; RV64-NEXT: .LBB8_454: # %cond.store580
+; RV64-NEXT: li a2, 128
+; RV64-NEXT: li a4, 31
+; RV64-NEXT: slli a4, a4, 8
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v24, (a4)
+; RV64-NEXT: lbu a2, 1651(a3)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: andi a2, a1, 8
+; RV64-NEXT: bnez a2, .LBB8_455
+; RV64-NEXT: j .LBB8_209
+; RV64-NEXT: .LBB8_455: # %cond.store583
+; RV64-NEXT: li a2, 128
+; RV64-NEXT: lui a4, 2
+; RV64-NEXT: addiw a4, a4, -384
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v24, (a4)
+; RV64-NEXT: lbu a2, 1524(a3)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: andi a2, a1, 16
+; RV64-NEXT: bnez a2, .LBB8_456
+; RV64-NEXT: j .LBB8_210
+; RV64-NEXT: .LBB8_456: # %cond.store586
+; RV64-NEXT: li a2, 128
+; RV64-NEXT: li a4, 15
+; RV64-NEXT: slli a4, a4, 9
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v24, (a4)
+; RV64-NEXT: lbu a2, 1397(a3)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: andi a2, a1, 32
+; RV64-NEXT: bnez a2, .LBB8_457
+; RV64-NEXT: j .LBB8_211
+; RV64-NEXT: .LBB8_457: # %cond.store589
+; RV64-NEXT: li a2, 128
+; RV64-NEXT: lui a4, 2
+; RV64-NEXT: addiw a4, a4, -640
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v24, (a4)
+; RV64-NEXT: lbu a2, 1270(a3)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: andi a2, a1, 64
+; RV64-NEXT: bnez a2, .LBB8_458
+; RV64-NEXT: j .LBB8_212
+; RV64-NEXT: .LBB8_458: # %cond.store592
+; RV64-NEXT: li a2, 128
+; RV64-NEXT: li a4, 29
+; RV64-NEXT: slli a4, a4, 8
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v24, (a4)
+; RV64-NEXT: lbu a2, 1143(a3)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: andi a2, a1, 128
+; RV64-NEXT: bnez a2, .LBB8_459
+; RV64-NEXT: j .LBB8_213
+; RV64-NEXT: .LBB8_459: # %cond.store595
+; RV64-NEXT: li a2, 128
+; RV64-NEXT: lui a4, 2
+; RV64-NEXT: addiw a4, a4, -896
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v24, (a4)
+; RV64-NEXT: lbu a2, 1016(a3)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: andi a2, a1, 256
+; RV64-NEXT: bnez a2, .LBB8_460
+; RV64-NEXT: j .LBB8_214
+; RV64-NEXT: .LBB8_460: # %cond.store598
+; RV64-NEXT: li a2, 128
+; RV64-NEXT: li a4, 7
+; RV64-NEXT: slli a4, a4, 10
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v24, (a4)
+; RV64-NEXT: lbu a2, 889(a3)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: andi a2, a1, 512
+; RV64-NEXT: bnez a2, .LBB8_461
+; RV64-NEXT: j .LBB8_215
+; RV64-NEXT: .LBB8_461: # %cond.store601
+; RV64-NEXT: li a2, 128
+; RV64-NEXT: lui a4, 2
+; RV64-NEXT: addiw a4, a4, -1152
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v24, (a4)
+; RV64-NEXT: lbu a2, 762(a3)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: andi a2, a1, 1024
+; RV64-NEXT: bnez a2, .LBB8_462
+; RV64-NEXT: j .LBB8_216
+; RV64-NEXT: .LBB8_462: # %cond.store604
+; RV64-NEXT: li a2, 128
+; RV64-NEXT: li a4, 27
+; RV64-NEXT: slli a4, a4, 8
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v24, (a4)
+; RV64-NEXT: lbu a2, 635(a3)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a2, a1, 52
+; RV64-NEXT: bltz a2, .LBB8_463
+; RV64-NEXT: j .LBB8_217
+; RV64-NEXT: .LBB8_463: # %cond.store607
+; RV64-NEXT: li a2, 128
+; RV64-NEXT: lui a4, 2
+; RV64-NEXT: addiw a4, a4, -1408
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v24, (a4)
+; RV64-NEXT: lbu a2, 508(a3)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a2, a1, 51
+; RV64-NEXT: bltz a2, .LBB8_464
+; RV64-NEXT: j .LBB8_218
+; RV64-NEXT: .LBB8_464: # %cond.store610
+; RV64-NEXT: li a2, 128
+; RV64-NEXT: li a4, 13
+; RV64-NEXT: slli a4, a4, 9
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v24, (a4)
+; RV64-NEXT: lbu a2, 381(a3)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a2, a1, 50
+; RV64-NEXT: bltz a2, .LBB8_465
+; RV64-NEXT: j .LBB8_219
+; RV64-NEXT: .LBB8_465: # %cond.store613
+; RV64-NEXT: li a2, 128
+; RV64-NEXT: lui a4, 2
+; RV64-NEXT: addiw a4, a4, -1664
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v24, (a4)
+; RV64-NEXT: lbu a2, 254(a3)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a2, a1, 49
+; RV64-NEXT: bltz a2, .LBB8_466
+; RV64-NEXT: j .LBB8_220
+; RV64-NEXT: .LBB8_466: # %cond.store616
+; RV64-NEXT: li a2, 128
+; RV64-NEXT: li a4, 25
+; RV64-NEXT: slli a4, a4, 8
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v24, (a4)
+; RV64-NEXT: lbu a2, 127(a3)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a2, a1, 48
+; RV64-NEXT: bgez a2, .LBB8_523
+; RV64-NEXT: j .LBB8_221
+; RV64-NEXT: .LBB8_523: # %cond.store616
+; RV64-NEXT: j .LBB8_222
+; RV64-NEXT: .LBB8_467: # %cond.store622
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: li a4, 3
+; RV64-NEXT: slli a4, a4, 11
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v24, (a4)
+; RV64-NEXT: lbu a3, 2032(a2)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a1, 46
+; RV64-NEXT: bltz a3, .LBB8_468
+; RV64-NEXT: j .LBB8_224
+; RV64-NEXT: .LBB8_468: # %cond.store625
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: lui a4, 1
+; RV64-NEXT: addiw a4, a4, 1920
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v24, (a4)
+; RV64-NEXT: lbu a3, 1905(a2)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a1, 45
+; RV64-NEXT: bltz a3, .LBB8_469
+; RV64-NEXT: j .LBB8_225
+; RV64-NEXT: .LBB8_469: # %cond.store628
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: li a4, 23
+; RV64-NEXT: slli a4, a4, 8
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v24, (a4)
+; RV64-NEXT: lbu a3, 1778(a2)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a1, 44
+; RV64-NEXT: bltz a3, .LBB8_470
+; RV64-NEXT: j .LBB8_226
+; RV64-NEXT: .LBB8_470: # %cond.store631
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: lui a4, 1
+; RV64-NEXT: addiw a4, a4, 1664
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v24, (a4)
+; RV64-NEXT: lbu a3, 1651(a2)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a1, 43
+; RV64-NEXT: bltz a3, .LBB8_471
+; RV64-NEXT: j .LBB8_227
+; RV64-NEXT: .LBB8_471: # %cond.store634
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: li a4, 11
+; RV64-NEXT: slli a4, a4, 9
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v24, (a4)
+; RV64-NEXT: lbu a3, 1524(a2)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a1, 42
+; RV64-NEXT: bltz a3, .LBB8_472
+; RV64-NEXT: j .LBB8_228
+; RV64-NEXT: .LBB8_472: # %cond.store637
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: lui a4, 1
+; RV64-NEXT: addiw a4, a4, 1408
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v24, (a4)
+; RV64-NEXT: lbu a3, 1397(a2)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a1, 41
+; RV64-NEXT: bltz a3, .LBB8_473
+; RV64-NEXT: j .LBB8_229
+; RV64-NEXT: .LBB8_473: # %cond.store640
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: li a4, 21
+; RV64-NEXT: slli a4, a4, 8
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v24, (a4)
+; RV64-NEXT: lbu a3, 1270(a2)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a1, 40
+; RV64-NEXT: bltz a3, .LBB8_474
+; RV64-NEXT: j .LBB8_230
+; RV64-NEXT: .LBB8_474: # %cond.store643
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: lui a4, 1
+; RV64-NEXT: addiw a4, a4, 1152
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v24, (a4)
+; RV64-NEXT: lbu a3, 1143(a2)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a1, 39
+; RV64-NEXT: bltz a3, .LBB8_475
+; RV64-NEXT: j .LBB8_231
+; RV64-NEXT: .LBB8_475: # %cond.store646
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: li a4, 5
+; RV64-NEXT: slli a4, a4, 10
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v24, (a4)
+; RV64-NEXT: lbu a3, 1016(a2)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a1, 38
+; RV64-NEXT: bltz a3, .LBB8_476
+; RV64-NEXT: j .LBB8_232
+; RV64-NEXT: .LBB8_476: # %cond.store649
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: lui a4, 1
+; RV64-NEXT: addiw a4, a4, 896
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v24, (a4)
+; RV64-NEXT: lbu a3, 889(a2)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a1, 37
+; RV64-NEXT: bltz a3, .LBB8_477
+; RV64-NEXT: j .LBB8_233
+; RV64-NEXT: .LBB8_477: # %cond.store652
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: li a4, 19
+; RV64-NEXT: slli a4, a4, 8
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v24, (a4)
+; RV64-NEXT: lbu a3, 762(a2)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a1, 36
+; RV64-NEXT: bltz a3, .LBB8_478
+; RV64-NEXT: j .LBB8_234
+; RV64-NEXT: .LBB8_478: # %cond.store655
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: lui a4, 1
+; RV64-NEXT: addiw a4, a4, 640
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v24, (a4)
+; RV64-NEXT: lbu a3, 635(a2)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a1, 35
+; RV64-NEXT: bltz a3, .LBB8_479
+; RV64-NEXT: j .LBB8_235
+; RV64-NEXT: .LBB8_479: # %cond.store658
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: li a4, 9
+; RV64-NEXT: slli a4, a4, 9
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v24, (a4)
+; RV64-NEXT: lbu a3, 508(a2)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a1, 34
+; RV64-NEXT: bltz a3, .LBB8_480
+; RV64-NEXT: j .LBB8_236
+; RV64-NEXT: .LBB8_480: # %cond.store661
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: lui a4, 1
+; RV64-NEXT: addiw a4, a4, 384
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v24, (a4)
+; RV64-NEXT: lbu a3, 381(a2)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a1, 33
+; RV64-NEXT: bltz a3, .LBB8_481
+; RV64-NEXT: j .LBB8_237
+; RV64-NEXT: .LBB8_481: # %cond.store664
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: li a4, 17
+; RV64-NEXT: slli a4, a4, 8
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v24, (a4)
+; RV64-NEXT: lbu a3, 254(a2)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a1, 32
+; RV64-NEXT: bltz a3, .LBB8_482
+; RV64-NEXT: j .LBB8_238
+; RV64-NEXT: .LBB8_482: # %cond.store667
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: lui a4, 1
+; RV64-NEXT: addiw a4, a4, 128
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v24, (a4)
+; RV64-NEXT: lbu a3, 127(a2)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a1, 31
+; RV64-NEXT: bgez a3, .LBB8_524
+; RV64-NEXT: j .LBB8_239
+; RV64-NEXT: .LBB8_524: # %cond.store667
+; RV64-NEXT: j .LBB8_240
+; RV64-NEXT: .LBB8_483: # %cond.store673
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: addi a4, sp, 2047
+; RV64-NEXT: addi a4, a4, 1921
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v24, (a4)
+; RV64-NEXT: lbu a3, 2032(a2)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a1, 29
+; RV64-NEXT: bltz a3, .LBB8_484
+; RV64-NEXT: j .LBB8_242
+; RV64-NEXT: .LBB8_484: # %cond.store676
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: addi a4, sp, 2047
+; RV64-NEXT: addi a4, a4, 1793
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v24, (a4)
+; RV64-NEXT: lbu a3, 1905(a2)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a1, 28
+; RV64-NEXT: bltz a3, .LBB8_485
+; RV64-NEXT: j .LBB8_243
+; RV64-NEXT: .LBB8_485: # %cond.store679
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: addi a4, sp, 2047
+; RV64-NEXT: addi a4, a4, 1665
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v24, (a4)
+; RV64-NEXT: lbu a3, 1778(a2)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a1, 27
+; RV64-NEXT: bltz a3, .LBB8_486
+; RV64-NEXT: j .LBB8_244
+; RV64-NEXT: .LBB8_486: # %cond.store682
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: addi a4, sp, 2047
+; RV64-NEXT: addi a4, a4, 1537
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v24, (a4)
+; RV64-NEXT: lbu a3, 1651(a2)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a1, 26
+; RV64-NEXT: bltz a3, .LBB8_487
+; RV64-NEXT: j .LBB8_245
+; RV64-NEXT: .LBB8_487: # %cond.store685
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: addi a4, sp, 2047
+; RV64-NEXT: addi a4, a4, 1409
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v24, (a4)
+; RV64-NEXT: lbu a3, 1524(a2)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a1, 25
+; RV64-NEXT: bltz a3, .LBB8_488
+; RV64-NEXT: j .LBB8_246
+; RV64-NEXT: .LBB8_488: # %cond.store688
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: addi a4, sp, 2047
+; RV64-NEXT: addi a4, a4, 1281
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v24, (a4)
+; RV64-NEXT: lbu a3, 1397(a2)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a1, 24
+; RV64-NEXT: bltz a3, .LBB8_489
+; RV64-NEXT: j .LBB8_247
+; RV64-NEXT: .LBB8_489: # %cond.store691
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: addi a4, sp, 2047
+; RV64-NEXT: addi a4, a4, 1153
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v24, (a4)
+; RV64-NEXT: lbu a3, 1270(a2)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a1, 23
+; RV64-NEXT: bltz a3, .LBB8_490
+; RV64-NEXT: j .LBB8_248
+; RV64-NEXT: .LBB8_490: # %cond.store694
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: addi a4, sp, 2047
+; RV64-NEXT: addi a4, a4, 1025
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v24, (a4)
+; RV64-NEXT: lbu a3, 1143(a2)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a1, 22
+; RV64-NEXT: bltz a3, .LBB8_491
+; RV64-NEXT: j .LBB8_249
+; RV64-NEXT: .LBB8_491: # %cond.store697
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: addi a4, sp, 2047
+; RV64-NEXT: addi a4, a4, 897
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v24, (a4)
+; RV64-NEXT: lbu a3, 1016(a2)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a1, 21
+; RV64-NEXT: bltz a3, .LBB8_492
+; RV64-NEXT: j .LBB8_250
+; RV64-NEXT: .LBB8_492: # %cond.store700
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: addi a4, sp, 2047
+; RV64-NEXT: addi a4, a4, 769
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v24, (a4)
+; RV64-NEXT: lbu a3, 889(a2)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a1, 20
+; RV64-NEXT: bltz a3, .LBB8_493
+; RV64-NEXT: j .LBB8_251
+; RV64-NEXT: .LBB8_493: # %cond.store703
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: addi a4, sp, 2047
+; RV64-NEXT: addi a4, a4, 641
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v24, (a4)
+; RV64-NEXT: lbu a3, 762(a2)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a1, 19
+; RV64-NEXT: bltz a3, .LBB8_494
+; RV64-NEXT: j .LBB8_252
+; RV64-NEXT: .LBB8_494: # %cond.store706
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: addi a4, sp, 2047
+; RV64-NEXT: addi a4, a4, 513
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v24, (a4)
+; RV64-NEXT: lbu a3, 635(a2)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a1, 18
+; RV64-NEXT: bltz a3, .LBB8_495
+; RV64-NEXT: j .LBB8_253
+; RV64-NEXT: .LBB8_495: # %cond.store709
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: addi a4, sp, 2047
+; RV64-NEXT: addi a4, a4, 385
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v24, (a4)
+; RV64-NEXT: lbu a3, 508(a2)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a1, 17
+; RV64-NEXT: bltz a3, .LBB8_496
+; RV64-NEXT: j .LBB8_254
+; RV64-NEXT: .LBB8_496: # %cond.store712
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: addi a4, sp, 2047
+; RV64-NEXT: addi a4, a4, 257
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v24, (a4)
+; RV64-NEXT: lbu a3, 381(a2)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a1, 16
+; RV64-NEXT: bltz a3, .LBB8_497
+; RV64-NEXT: j .LBB8_255
+; RV64-NEXT: .LBB8_497: # %cond.store715
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: addi a4, sp, 2047
+; RV64-NEXT: addi a4, a4, 129
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v24, (a4)
+; RV64-NEXT: lbu a3, 254(a2)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a1, 15
+; RV64-NEXT: bltz a3, .LBB8_498
+; RV64-NEXT: j .LBB8_256
+; RV64-NEXT: .LBB8_498: # %cond.store718
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: addi a4, sp, 2047
+; RV64-NEXT: addi a4, a4, 1
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v24, (a4)
+; RV64-NEXT: lbu a3, 127(a2)
+; RV64-NEXT: addi a4, a0, 1
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: slli a3, a1, 14
+; RV64-NEXT: bltz a3, .LBB8_499
+; RV64-NEXT: j .LBB8_257
+; RV64-NEXT: .LBB8_499: # %cond.store721
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: addi a4, sp, 1920
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v24, (a4)
+; RV64-NEXT: lbu a2, 0(a2)
+; RV64-NEXT: addi a3, a0, 1
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: mv a0, a3
+; RV64-NEXT: slli a2, a1, 13
+; RV64-NEXT: bltz a2, .LBB8_500
+; RV64-NEXT: j .LBB8_258
+; RV64-NEXT: .LBB8_500: # %cond.store724
+; RV64-NEXT: li a2, 128
+; RV64-NEXT: addi a3, sp, 1792
+; RV64-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v24, (a3)
+; RV64-NEXT: lbu a2, 1906(sp)
+; RV64-NEXT: addi a3, a0, 1
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: mv a0, a3
+; RV64-NEXT: slli a2, a1, 12
+; RV64-NEXT: bltz a2, .LBB8_501
+; RV64-NEXT: j .LBB8_259
+; RV64-NEXT: .LBB8_501: # %cond.store727
+; RV64-NEXT: li a2, 128
+; RV64-NEXT: addi a3, sp, 1664
+; RV64-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v24, (a3)
+; RV64-NEXT: lbu a2, 1779(sp)
+; RV64-NEXT: addi a3, a0, 1
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: mv a0, a3
+; RV64-NEXT: slli a2, a1, 11
+; RV64-NEXT: bltz a2, .LBB8_502
+; RV64-NEXT: j .LBB8_260
+; RV64-NEXT: .LBB8_502: # %cond.store730
+; RV64-NEXT: li a2, 128
+; RV64-NEXT: addi a3, sp, 1536
+; RV64-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v24, (a3)
+; RV64-NEXT: lbu a2, 1652(sp)
+; RV64-NEXT: addi a3, a0, 1
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: mv a0, a3
+; RV64-NEXT: slli a2, a1, 10
+; RV64-NEXT: bltz a2, .LBB8_503
+; RV64-NEXT: j .LBB8_261
+; RV64-NEXT: .LBB8_503: # %cond.store733
+; RV64-NEXT: li a2, 128
+; RV64-NEXT: addi a3, sp, 1408
+; RV64-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v24, (a3)
+; RV64-NEXT: lbu a2, 1525(sp)
+; RV64-NEXT: addi a3, a0, 1
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: mv a0, a3
+; RV64-NEXT: slli a2, a1, 9
+; RV64-NEXT: bltz a2, .LBB8_504
+; RV64-NEXT: j .LBB8_262
+; RV64-NEXT: .LBB8_504: # %cond.store736
+; RV64-NEXT: li a2, 128
+; RV64-NEXT: addi a3, sp, 1280
+; RV64-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v24, (a3)
+; RV64-NEXT: lbu a2, 1398(sp)
+; RV64-NEXT: addi a3, a0, 1
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: mv a0, a3
+; RV64-NEXT: slli a2, a1, 8
+; RV64-NEXT: bltz a2, .LBB8_505
+; RV64-NEXT: j .LBB8_263
+; RV64-NEXT: .LBB8_505: # %cond.store739
+; RV64-NEXT: li a2, 128
+; RV64-NEXT: addi a3, sp, 1152
+; RV64-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v24, (a3)
+; RV64-NEXT: lbu a2, 1271(sp)
+; RV64-NEXT: addi a3, a0, 1
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: mv a0, a3
+; RV64-NEXT: slli a2, a1, 7
+; RV64-NEXT: bltz a2, .LBB8_506
+; RV64-NEXT: j .LBB8_264
+; RV64-NEXT: .LBB8_506: # %cond.store742
+; RV64-NEXT: li a2, 128
+; RV64-NEXT: addi a3, sp, 1024
+; RV64-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v24, (a3)
+; RV64-NEXT: lbu a2, 1144(sp)
+; RV64-NEXT: addi a3, a0, 1
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: mv a0, a3
+; RV64-NEXT: slli a2, a1, 6
+; RV64-NEXT: bltz a2, .LBB8_507
+; RV64-NEXT: j .LBB8_265
+; RV64-NEXT: .LBB8_507: # %cond.store745
+; RV64-NEXT: li a2, 128
+; RV64-NEXT: addi a3, sp, 896
+; RV64-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v24, (a3)
+; RV64-NEXT: lbu a2, 1017(sp)
+; RV64-NEXT: addi a3, a0, 1
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: mv a0, a3
+; RV64-NEXT: slli a2, a1, 5
+; RV64-NEXT: bltz a2, .LBB8_508
+; RV64-NEXT: j .LBB8_266
+; RV64-NEXT: .LBB8_508: # %cond.store748
+; RV64-NEXT: li a2, 128
+; RV64-NEXT: addi a3, sp, 768
+; RV64-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v24, (a3)
+; RV64-NEXT: lbu a2, 890(sp)
+; RV64-NEXT: addi a3, a0, 1
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: mv a0, a3
+; RV64-NEXT: slli a2, a1, 4
+; RV64-NEXT: bltz a2, .LBB8_509
+; RV64-NEXT: j .LBB8_267
+; RV64-NEXT: .LBB8_509: # %cond.store751
+; RV64-NEXT: li a2, 128
+; RV64-NEXT: addi a3, sp, 640
+; RV64-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v24, (a3)
+; RV64-NEXT: lbu a2, 763(sp)
+; RV64-NEXT: addi a3, a0, 1
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: mv a0, a3
+; RV64-NEXT: slli a2, a1, 3
+; RV64-NEXT: bltz a2, .LBB8_510
+; RV64-NEXT: j .LBB8_268
+; RV64-NEXT: .LBB8_510: # %cond.store754
+; RV64-NEXT: li a2, 128
+; RV64-NEXT: addi a3, sp, 512
+; RV64-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v24, (a3)
+; RV64-NEXT: lbu a2, 636(sp)
+; RV64-NEXT: addi a3, a0, 1
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: mv a0, a3
+; RV64-NEXT: slli a2, a1, 2
+; RV64-NEXT: bltz a2, .LBB8_511
+; RV64-NEXT: j .LBB8_269
+; RV64-NEXT: .LBB8_511: # %cond.store757
+; RV64-NEXT: li a2, 128
+; RV64-NEXT: addi a3, sp, 384
+; RV64-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v24, (a3)
+; RV64-NEXT: lbu a2, 509(sp)
+; RV64-NEXT: addi a3, a0, 1
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: mv a0, a3
+; RV64-NEXT: slli a2, a1, 1
+; RV64-NEXT: bltz a2, .LBB8_512
+; RV64-NEXT: j .LBB8_270
+; RV64-NEXT: .LBB8_512: # %cond.store760
+; RV64-NEXT: li a2, 128
+; RV64-NEXT: addi a3, sp, 256
+; RV64-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v24, (a3)
+; RV64-NEXT: lbu a2, 382(sp)
+; RV64-NEXT: addi a3, a0, 1
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: mv a0, a3
+; RV64-NEXT: bgez a1, .LBB8_525
+; RV64-NEXT: j .LBB8_271
+; RV64-NEXT: .LBB8_525: # %cond.store760
+; RV64-NEXT: j .LBB8_272
+;
+; RV32-LABEL: test_compresstore_i8_v256:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: li a2, 128
+; RV32-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; RV32-NEXT: vle8.v v24, (a1)
+; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
+; RV32-NEXT: vmv.x.s a2, v0
+; RV32-NEXT: andi a1, a2, 1
+; RV32-NEXT: beqz a1, .LBB8_1
+; RV32-NEXT: j .LBB8_284
+; RV32-NEXT: .LBB8_1: # %else
+; RV32-NEXT: andi a1, a2, 2
+; RV32-NEXT: beqz a1, .LBB8_2
+; RV32-NEXT: j .LBB8_285
+; RV32-NEXT: .LBB8_2: # %else2
+; RV32-NEXT: andi a1, a2, 4
+; RV32-NEXT: beqz a1, .LBB8_3
+; RV32-NEXT: j .LBB8_286
+; RV32-NEXT: .LBB8_3: # %else5
+; RV32-NEXT: andi a1, a2, 8
+; RV32-NEXT: beqz a1, .LBB8_4
+; RV32-NEXT: j .LBB8_287
+; RV32-NEXT: .LBB8_4: # %else8
+; RV32-NEXT: andi a1, a2, 16
+; RV32-NEXT: beqz a1, .LBB8_5
+; RV32-NEXT: j .LBB8_288
+; RV32-NEXT: .LBB8_5: # %else11
+; RV32-NEXT: andi a1, a2, 32
+; RV32-NEXT: beqz a1, .LBB8_6
+; RV32-NEXT: j .LBB8_289
+; RV32-NEXT: .LBB8_6: # %else14
+; RV32-NEXT: andi a1, a2, 64
+; RV32-NEXT: beqz a1, .LBB8_7
+; RV32-NEXT: j .LBB8_290
+; RV32-NEXT: .LBB8_7: # %else17
+; RV32-NEXT: andi a1, a2, 128
+; RV32-NEXT: beqz a1, .LBB8_8
+; RV32-NEXT: j .LBB8_291
+; RV32-NEXT: .LBB8_8: # %else20
+; RV32-NEXT: andi a1, a2, 256
+; RV32-NEXT: beqz a1, .LBB8_9
+; RV32-NEXT: j .LBB8_292
+; RV32-NEXT: .LBB8_9: # %else23
+; RV32-NEXT: andi a1, a2, 512
+; RV32-NEXT: beqz a1, .LBB8_10
+; RV32-NEXT: j .LBB8_293
+; RV32-NEXT: .LBB8_10: # %else26
+; RV32-NEXT: andi a1, a2, 1024
+; RV32-NEXT: beqz a1, .LBB8_11
+; RV32-NEXT: j .LBB8_294
+; RV32-NEXT: .LBB8_11: # %else29
+; RV32-NEXT: slli a1, a2, 20
+; RV32-NEXT: bgez a1, .LBB8_12
+; RV32-NEXT: j .LBB8_295
+; RV32-NEXT: .LBB8_12: # %else32
+; RV32-NEXT: slli a1, a2, 19
+; RV32-NEXT: bgez a1, .LBB8_13
+; RV32-NEXT: j .LBB8_296
+; RV32-NEXT: .LBB8_13: # %else35
+; RV32-NEXT: slli a1, a2, 18
+; RV32-NEXT: bgez a1, .LBB8_14
+; RV32-NEXT: j .LBB8_297
+; RV32-NEXT: .LBB8_14: # %else38
+; RV32-NEXT: slli a1, a2, 17
+; RV32-NEXT: bgez a1, .LBB8_15
+; RV32-NEXT: j .LBB8_298
+; RV32-NEXT: .LBB8_15: # %else41
+; RV32-NEXT: slli a1, a2, 16
+; RV32-NEXT: bgez a1, .LBB8_16
+; RV32-NEXT: j .LBB8_299
+; RV32-NEXT: .LBB8_16: # %else44
+; RV32-NEXT: slli a1, a2, 15
+; RV32-NEXT: bgez a1, .LBB8_17
+; RV32-NEXT: j .LBB8_300
+; RV32-NEXT: .LBB8_17: # %else47
+; RV32-NEXT: slli a1, a2, 14
+; RV32-NEXT: bgez a1, .LBB8_18
+; RV32-NEXT: j .LBB8_301
+; RV32-NEXT: .LBB8_18: # %else50
+; RV32-NEXT: slli a1, a2, 13
+; RV32-NEXT: bgez a1, .LBB8_19
+; RV32-NEXT: j .LBB8_302
+; RV32-NEXT: .LBB8_19: # %else53
+; RV32-NEXT: slli a1, a2, 12
+; RV32-NEXT: bgez a1, .LBB8_20
+; RV32-NEXT: j .LBB8_303
+; RV32-NEXT: .LBB8_20: # %else56
+; RV32-NEXT: slli a1, a2, 11
+; RV32-NEXT: bgez a1, .LBB8_21
+; RV32-NEXT: j .LBB8_304
+; RV32-NEXT: .LBB8_21: # %else59
+; RV32-NEXT: slli a1, a2, 10
+; RV32-NEXT: bgez a1, .LBB8_22
+; RV32-NEXT: j .LBB8_305
+; RV32-NEXT: .LBB8_22: # %else62
+; RV32-NEXT: slli a1, a2, 9
+; RV32-NEXT: bgez a1, .LBB8_23
+; RV32-NEXT: j .LBB8_306
+; RV32-NEXT: .LBB8_23: # %else65
+; RV32-NEXT: slli a1, a2, 8
+; RV32-NEXT: bgez a1, .LBB8_24
+; RV32-NEXT: j .LBB8_307
+; RV32-NEXT: .LBB8_24: # %else68
+; RV32-NEXT: slli a1, a2, 7
+; RV32-NEXT: bgez a1, .LBB8_25
+; RV32-NEXT: j .LBB8_308
+; RV32-NEXT: .LBB8_25: # %else71
+; RV32-NEXT: slli a1, a2, 6
+; RV32-NEXT: bgez a1, .LBB8_26
+; RV32-NEXT: j .LBB8_309
+; RV32-NEXT: .LBB8_26: # %else74
+; RV32-NEXT: slli a1, a2, 5
+; RV32-NEXT: bgez a1, .LBB8_27
+; RV32-NEXT: j .LBB8_310
+; RV32-NEXT: .LBB8_27: # %else77
+; RV32-NEXT: slli a1, a2, 4
+; RV32-NEXT: bgez a1, .LBB8_28
+; RV32-NEXT: j .LBB8_311
+; RV32-NEXT: .LBB8_28: # %else80
+; RV32-NEXT: slli a1, a2, 3
+; RV32-NEXT: bgez a1, .LBB8_30
+; RV32-NEXT: .LBB8_29: # %cond.store82
+; RV32-NEXT: vsetivli zero, 1, e8, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v10, v16, 28
+; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV32-NEXT: vse8.v v10, (a0)
+; RV32-NEXT: addi a0, a0, 1
+; RV32-NEXT: .LBB8_30: # %else83
+; RV32-NEXT: slli a3, a2, 2
+; RV32-NEXT: li a1, 32
+; RV32-NEXT: bgez a3, .LBB8_32
+; RV32-NEXT: # %bb.31: # %cond.store85
+; RV32-NEXT: vsetivli zero, 1, e8, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v10, v16, 29
+; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV32-NEXT: vse8.v v10, (a0)
+; RV32-NEXT: addi a0, a0, 1
+; RV32-NEXT: .LBB8_32: # %else86
+; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV32-NEXT: slli a3, a2, 1
+; RV32-NEXT: vsrl.vx v9, v0, a1
+; RV32-NEXT: bgez a3, .LBB8_34
+; RV32-NEXT: # %bb.33: # %cond.store88
+; RV32-NEXT: vsetivli zero, 1, e8, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v10, v16, 30
+; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV32-NEXT: vse8.v v10, (a0)
+; RV32-NEXT: addi a0, a0, 1
+; RV32-NEXT: .LBB8_34: # %else89
+; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV32-NEXT: vmv.x.s a3, v9
+; RV32-NEXT: bgez a2, .LBB8_36
+; RV32-NEXT: # %bb.35: # %cond.store91
+; RV32-NEXT: vsetivli zero, 1, e8, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v10, v16, 31
+; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV32-NEXT: vse8.v v10, (a0)
+; RV32-NEXT: addi a0, a0, 1
+; RV32-NEXT: .LBB8_36: # %else92
+; RV32-NEXT: addi sp, sp, -2032
+; RV32-NEXT: .cfi_def_cfa_offset 2032
+; RV32-NEXT: sw ra, 2028(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s0, 2024(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: .cfi_offset s0, -8
+; RV32-NEXT: addi s0, sp, 2032
+; RV32-NEXT: .cfi_def_cfa s0, 0
+; RV32-NEXT: lui a2, 6
+; RV32-NEXT: addi a2, a2, -1776
+; RV32-NEXT: sub sp, sp, a2
+; RV32-NEXT: andi sp, sp, -128
+; RV32-NEXT: andi a4, a3, 1
+; RV32-NEXT: lui a2, 6
+; RV32-NEXT: addi a2, a2, -984
+; RV32-NEXT: add a2, sp, a2
+; RV32-NEXT: beqz a4, .LBB8_37
+; RV32-NEXT: j .LBB8_312
+; RV32-NEXT: .LBB8_37: # %else95
+; RV32-NEXT: andi a4, a3, 2
+; RV32-NEXT: beqz a4, .LBB8_38
+; RV32-NEXT: j .LBB8_313
+; RV32-NEXT: .LBB8_38: # %else98
+; RV32-NEXT: andi a4, a3, 4
+; RV32-NEXT: beqz a4, .LBB8_39
+; RV32-NEXT: j .LBB8_314
+; RV32-NEXT: .LBB8_39: # %else101
+; RV32-NEXT: andi a4, a3, 8
+; RV32-NEXT: beqz a4, .LBB8_40
+; RV32-NEXT: j .LBB8_315
+; RV32-NEXT: .LBB8_40: # %else104
+; RV32-NEXT: andi a4, a3, 16
+; RV32-NEXT: beqz a4, .LBB8_41
+; RV32-NEXT: j .LBB8_316
+; RV32-NEXT: .LBB8_41: # %else107
+; RV32-NEXT: andi a4, a3, 32
+; RV32-NEXT: beqz a4, .LBB8_42
+; RV32-NEXT: j .LBB8_317
+; RV32-NEXT: .LBB8_42: # %else110
+; RV32-NEXT: andi a4, a3, 64
+; RV32-NEXT: beqz a4, .LBB8_43
+; RV32-NEXT: j .LBB8_318
+; RV32-NEXT: .LBB8_43: # %else113
+; RV32-NEXT: andi a4, a3, 128
+; RV32-NEXT: beqz a4, .LBB8_44
+; RV32-NEXT: j .LBB8_319
+; RV32-NEXT: .LBB8_44: # %else116
+; RV32-NEXT: andi a4, a3, 256
+; RV32-NEXT: beqz a4, .LBB8_46
+; RV32-NEXT: .LBB8_45: # %cond.store118
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: li a5, 23
+; RV32-NEXT: slli a5, a5, 10
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a5)
+; RV32-NEXT: lbu a2, 0(a2)
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: addi a0, a0, 1
+; RV32-NEXT: .LBB8_46: # %else119
+; RV32-NEXT: andi a4, a3, 512
+; RV32-NEXT: lui a2, 5
+; RV32-NEXT: addi a2, a2, 953
+; RV32-NEXT: add a2, sp, a2
+; RV32-NEXT: beqz a4, .LBB8_47
+; RV32-NEXT: j .LBB8_320
+; RV32-NEXT: .LBB8_47: # %else122
+; RV32-NEXT: andi a4, a3, 1024
+; RV32-NEXT: beqz a4, .LBB8_48
+; RV32-NEXT: j .LBB8_321
+; RV32-NEXT: .LBB8_48: # %else125
+; RV32-NEXT: slli a4, a3, 20
+; RV32-NEXT: bgez a4, .LBB8_49
+; RV32-NEXT: j .LBB8_322
+; RV32-NEXT: .LBB8_49: # %else128
+; RV32-NEXT: slli a4, a3, 19
+; RV32-NEXT: bgez a4, .LBB8_50
+; RV32-NEXT: j .LBB8_323
+; RV32-NEXT: .LBB8_50: # %else131
+; RV32-NEXT: slli a4, a3, 18
+; RV32-NEXT: bgez a4, .LBB8_51
+; RV32-NEXT: j .LBB8_324
+; RV32-NEXT: .LBB8_51: # %else134
+; RV32-NEXT: slli a4, a3, 17
+; RV32-NEXT: bgez a4, .LBB8_52
+; RV32-NEXT: j .LBB8_325
+; RV32-NEXT: .LBB8_52: # %else137
+; RV32-NEXT: slli a4, a3, 16
+; RV32-NEXT: bgez a4, .LBB8_53
+; RV32-NEXT: j .LBB8_326
+; RV32-NEXT: .LBB8_53: # %else140
+; RV32-NEXT: slli a4, a3, 15
+; RV32-NEXT: bgez a4, .LBB8_54
+; RV32-NEXT: j .LBB8_327
+; RV32-NEXT: .LBB8_54: # %else143
+; RV32-NEXT: slli a4, a3, 14
+; RV32-NEXT: bgez a4, .LBB8_55
+; RV32-NEXT: j .LBB8_328
+; RV32-NEXT: .LBB8_55: # %else146
+; RV32-NEXT: slli a4, a3, 13
+; RV32-NEXT: bgez a4, .LBB8_56
+; RV32-NEXT: j .LBB8_329
+; RV32-NEXT: .LBB8_56: # %else149
+; RV32-NEXT: slli a4, a3, 12
+; RV32-NEXT: bgez a4, .LBB8_57
+; RV32-NEXT: j .LBB8_330
+; RV32-NEXT: .LBB8_57: # %else152
+; RV32-NEXT: slli a4, a3, 11
+; RV32-NEXT: bgez a4, .LBB8_58
+; RV32-NEXT: j .LBB8_331
+; RV32-NEXT: .LBB8_58: # %else155
+; RV32-NEXT: slli a4, a3, 10
+; RV32-NEXT: bgez a4, .LBB8_59
+; RV32-NEXT: j .LBB8_332
+; RV32-NEXT: .LBB8_59: # %else158
+; RV32-NEXT: slli a4, a3, 9
+; RV32-NEXT: bgez a4, .LBB8_60
+; RV32-NEXT: j .LBB8_333
+; RV32-NEXT: .LBB8_60: # %else161
+; RV32-NEXT: slli a4, a3, 8
+; RV32-NEXT: bgez a4, .LBB8_61
+; RV32-NEXT: j .LBB8_334
+; RV32-NEXT: .LBB8_61: # %else164
+; RV32-NEXT: slli a4, a3, 7
+; RV32-NEXT: bgez a4, .LBB8_62
+; RV32-NEXT: j .LBB8_335
+; RV32-NEXT: .LBB8_62: # %else167
+; RV32-NEXT: slli a4, a3, 6
+; RV32-NEXT: bgez a4, .LBB8_64
+; RV32-NEXT: .LBB8_63: # %cond.store169
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: lui a5, 5
+; RV32-NEXT: addi a5, a5, 896
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a5)
+; RV32-NEXT: lbu a2, 0(a2)
+; RV32-NEXT: addi a4, a0, 1
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: mv a0, a4
+; RV32-NEXT: .LBB8_64: # %else170
+; RV32-NEXT: slli a2, a3, 5
+; RV32-NEXT: lui a4, 5
+; RV32-NEXT: addi a4, a4, -1206
+; RV32-NEXT: add a4, sp, a4
+; RV32-NEXT: bgez a2, .LBB8_65
+; RV32-NEXT: j .LBB8_336
+; RV32-NEXT: .LBB8_65: # %else173
+; RV32-NEXT: slli a2, a3, 4
+; RV32-NEXT: bgez a2, .LBB8_66
+; RV32-NEXT: j .LBB8_337
+; RV32-NEXT: .LBB8_66: # %else176
+; RV32-NEXT: slli a2, a3, 3
+; RV32-NEXT: bgez a2, .LBB8_67
+; RV32-NEXT: j .LBB8_338
+; RV32-NEXT: .LBB8_67: # %else179
+; RV32-NEXT: slli a2, a3, 2
+; RV32-NEXT: bgez a2, .LBB8_69
+; RV32-NEXT: .LBB8_68: # %cond.store181
+; RV32-NEXT: li a2, 128
+; RV32-NEXT: lui a5, 5
+; RV32-NEXT: addi a5, a5, 384
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a5)
+; RV32-NEXT: lbu a2, 1651(a4)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: .LBB8_69: # %else182
+; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV32-NEXT: slli a2, a3, 1
+; RV32-NEXT: vslidedown.vi v9, v0, 1
+; RV32-NEXT: bgez a2, .LBB8_71
+; RV32-NEXT: # %bb.70: # %cond.store184
+; RV32-NEXT: li a2, 128
+; RV32-NEXT: lui a5, 5
+; RV32-NEXT: addi a5, a5, 256
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a5)
+; RV32-NEXT: lbu a2, 1524(a4)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: .LBB8_71: # %else185
+; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV32-NEXT: vmv.x.s a2, v9
+; RV32-NEXT: bgez a3, .LBB8_72
+; RV32-NEXT: j .LBB8_339
+; RV32-NEXT: .LBB8_72: # %else188
+; RV32-NEXT: andi a3, a2, 1
+; RV32-NEXT: beqz a3, .LBB8_73
+; RV32-NEXT: j .LBB8_340
+; RV32-NEXT: .LBB8_73: # %else191
+; RV32-NEXT: andi a3, a2, 2
+; RV32-NEXT: beqz a3, .LBB8_74
+; RV32-NEXT: j .LBB8_341
+; RV32-NEXT: .LBB8_74: # %else194
+; RV32-NEXT: andi a3, a2, 4
+; RV32-NEXT: beqz a3, .LBB8_75
+; RV32-NEXT: j .LBB8_342
+; RV32-NEXT: .LBB8_75: # %else197
+; RV32-NEXT: andi a3, a2, 8
+; RV32-NEXT: beqz a3, .LBB8_76
+; RV32-NEXT: j .LBB8_343
+; RV32-NEXT: .LBB8_76: # %else200
+; RV32-NEXT: andi a3, a2, 16
+; RV32-NEXT: beqz a3, .LBB8_77
+; RV32-NEXT: j .LBB8_344
+; RV32-NEXT: .LBB8_77: # %else203
+; RV32-NEXT: andi a3, a2, 32
+; RV32-NEXT: beqz a3, .LBB8_78
+; RV32-NEXT: j .LBB8_345
+; RV32-NEXT: .LBB8_78: # %else206
+; RV32-NEXT: andi a3, a2, 64
+; RV32-NEXT: beqz a3, .LBB8_79
+; RV32-NEXT: j .LBB8_346
+; RV32-NEXT: .LBB8_79: # %else209
+; RV32-NEXT: andi a3, a2, 128
+; RV32-NEXT: beqz a3, .LBB8_80
+; RV32-NEXT: j .LBB8_347
+; RV32-NEXT: .LBB8_80: # %else212
+; RV32-NEXT: andi a3, a2, 256
+; RV32-NEXT: beqz a3, .LBB8_81
+; RV32-NEXT: j .LBB8_348
+; RV32-NEXT: .LBB8_81: # %else215
+; RV32-NEXT: andi a3, a2, 512
+; RV32-NEXT: beqz a3, .LBB8_82
+; RV32-NEXT: j .LBB8_349
+; RV32-NEXT: .LBB8_82: # %else218
+; RV32-NEXT: andi a3, a2, 1024
+; RV32-NEXT: beqz a3, .LBB8_84
+; RV32-NEXT: .LBB8_83: # %cond.store220
+; RV32-NEXT: li a3, 128
+; RV32-NEXT: lui a5, 5
+; RV32-NEXT: addi a5, a5, -1280
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a5)
+; RV32-NEXT: lbu a3, 0(a4)
+; RV32-NEXT: addi a4, a0, 1
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: mv a0, a4
+; RV32-NEXT: .LBB8_84: # %else221
+; RV32-NEXT: slli a4, a2, 20
+; RV32-NEXT: lui a3, 4
+; RV32-NEXT: addi a3, a3, 731
+; RV32-NEXT: add a3, sp, a3
+; RV32-NEXT: bgez a4, .LBB8_85
+; RV32-NEXT: j .LBB8_350
+; RV32-NEXT: .LBB8_85: # %else224
+; RV32-NEXT: slli a4, a2, 19
+; RV32-NEXT: bgez a4, .LBB8_86
+; RV32-NEXT: j .LBB8_351
+; RV32-NEXT: .LBB8_86: # %else227
+; RV32-NEXT: slli a4, a2, 18
+; RV32-NEXT: bgez a4, .LBB8_87
+; RV32-NEXT: j .LBB8_352
+; RV32-NEXT: .LBB8_87: # %else230
+; RV32-NEXT: slli a4, a2, 17
+; RV32-NEXT: bgez a4, .LBB8_88
+; RV32-NEXT: j .LBB8_353
+; RV32-NEXT: .LBB8_88: # %else233
+; RV32-NEXT: slli a4, a2, 16
+; RV32-NEXT: bgez a4, .LBB8_89
+; RV32-NEXT: j .LBB8_354
+; RV32-NEXT: .LBB8_89: # %else236
+; RV32-NEXT: slli a4, a2, 15
+; RV32-NEXT: bgez a4, .LBB8_90
+; RV32-NEXT: j .LBB8_355
+; RV32-NEXT: .LBB8_90: # %else239
+; RV32-NEXT: slli a4, a2, 14
+; RV32-NEXT: bgez a4, .LBB8_91
+; RV32-NEXT: j .LBB8_356
+; RV32-NEXT: .LBB8_91: # %else242
+; RV32-NEXT: slli a4, a2, 13
+; RV32-NEXT: bgez a4, .LBB8_92
+; RV32-NEXT: j .LBB8_357
+; RV32-NEXT: .LBB8_92: # %else245
+; RV32-NEXT: slli a4, a2, 12
+; RV32-NEXT: bgez a4, .LBB8_93
+; RV32-NEXT: j .LBB8_358
+; RV32-NEXT: .LBB8_93: # %else248
+; RV32-NEXT: slli a4, a2, 11
+; RV32-NEXT: bgez a4, .LBB8_94
+; RV32-NEXT: j .LBB8_359
+; RV32-NEXT: .LBB8_94: # %else251
+; RV32-NEXT: slli a4, a2, 10
+; RV32-NEXT: bgez a4, .LBB8_95
+; RV32-NEXT: j .LBB8_360
+; RV32-NEXT: .LBB8_95: # %else254
+; RV32-NEXT: slli a4, a2, 9
+; RV32-NEXT: bgez a4, .LBB8_96
+; RV32-NEXT: j .LBB8_361
+; RV32-NEXT: .LBB8_96: # %else257
+; RV32-NEXT: slli a4, a2, 8
+; RV32-NEXT: bgez a4, .LBB8_97
+; RV32-NEXT: j .LBB8_362
+; RV32-NEXT: .LBB8_97: # %else260
+; RV32-NEXT: slli a4, a2, 7
+; RV32-NEXT: bgez a4, .LBB8_98
+; RV32-NEXT: j .LBB8_363
+; RV32-NEXT: .LBB8_98: # %else263
+; RV32-NEXT: slli a4, a2, 6
+; RV32-NEXT: bgez a4, .LBB8_99
+; RV32-NEXT: j .LBB8_364
+; RV32-NEXT: .LBB8_99: # %else266
+; RV32-NEXT: slli a4, a2, 5
+; RV32-NEXT: bgez a4, .LBB8_100
+; RV32-NEXT: j .LBB8_365
+; RV32-NEXT: .LBB8_100: # %else269
+; RV32-NEXT: slli a4, a2, 4
+; RV32-NEXT: bgez a4, .LBB8_102
+; RV32-NEXT: .LBB8_101: # %cond.store271
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: lui a5, 4
+; RV32-NEXT: addi a5, a5, 640
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a5)
+; RV32-NEXT: lbu a3, 0(a3)
+; RV32-NEXT: addi a4, a0, 1
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: mv a0, a4
+; RV32-NEXT: .LBB8_102: # %else272
+; RV32-NEXT: slli a3, a2, 3
+; RV32-NEXT: lui a4, 4
+; RV32-NEXT: addi a4, a4, -1428
+; RV32-NEXT: add a4, sp, a4
+; RV32-NEXT: bgez a3, .LBB8_104
+; RV32-NEXT: # %bb.103: # %cond.store274
+; RV32-NEXT: li a3, 128
+; RV32-NEXT: lui a5, 4
+; RV32-NEXT: addi a5, a5, 512
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a5)
+; RV32-NEXT: lbu a3, 2032(a4)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: .LBB8_104: # %else275
+; RV32-NEXT: slli a3, a2, 2
+; RV32-NEXT: bgez a3, .LBB8_106
+; RV32-NEXT: # %bb.105: # %cond.store277
+; RV32-NEXT: li a3, 128
+; RV32-NEXT: lui a5, 4
+; RV32-NEXT: addi a5, a5, 384
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a5)
+; RV32-NEXT: lbu a3, 1905(a4)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: .LBB8_106: # %else278
+; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV32-NEXT: slli a3, a2, 1
+; RV32-NEXT: vsrl.vx v9, v9, a1
+; RV32-NEXT: bgez a3, .LBB8_108
+; RV32-NEXT: # %bb.107: # %cond.store280
+; RV32-NEXT: li a3, 128
+; RV32-NEXT: lui a5, 4
+; RV32-NEXT: addi a5, a5, 256
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a5)
+; RV32-NEXT: lbu a3, 1778(a4)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: .LBB8_108: # %else281
+; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV32-NEXT: vmv.x.s a3, v9
+; RV32-NEXT: bgez a2, .LBB8_109
+; RV32-NEXT: j .LBB8_366
+; RV32-NEXT: .LBB8_109: # %else284
+; RV32-NEXT: andi a2, a3, 1
+; RV32-NEXT: beqz a2, .LBB8_110
+; RV32-NEXT: j .LBB8_367
+; RV32-NEXT: .LBB8_110: # %else287
+; RV32-NEXT: andi a2, a3, 2
+; RV32-NEXT: beqz a2, .LBB8_111
+; RV32-NEXT: j .LBB8_368
+; RV32-NEXT: .LBB8_111: # %else290
+; RV32-NEXT: andi a2, a3, 4
+; RV32-NEXT: beqz a2, .LBB8_112
+; RV32-NEXT: j .LBB8_369
+; RV32-NEXT: .LBB8_112: # %else293
+; RV32-NEXT: andi a2, a3, 8
+; RV32-NEXT: beqz a2, .LBB8_113
+; RV32-NEXT: j .LBB8_370
+; RV32-NEXT: .LBB8_113: # %else296
+; RV32-NEXT: andi a2, a3, 16
+; RV32-NEXT: beqz a2, .LBB8_114
+; RV32-NEXT: j .LBB8_371
+; RV32-NEXT: .LBB8_114: # %else299
+; RV32-NEXT: andi a2, a3, 32
+; RV32-NEXT: beqz a2, .LBB8_115
+; RV32-NEXT: j .LBB8_372
+; RV32-NEXT: .LBB8_115: # %else302
+; RV32-NEXT: andi a2, a3, 64
+; RV32-NEXT: beqz a2, .LBB8_116
+; RV32-NEXT: j .LBB8_373
+; RV32-NEXT: .LBB8_116: # %else305
+; RV32-NEXT: andi a2, a3, 128
+; RV32-NEXT: beqz a2, .LBB8_117
+; RV32-NEXT: j .LBB8_374
+; RV32-NEXT: .LBB8_117: # %else308
+; RV32-NEXT: andi a2, a3, 256
+; RV32-NEXT: beqz a2, .LBB8_118
+; RV32-NEXT: j .LBB8_375
+; RV32-NEXT: .LBB8_118: # %else311
+; RV32-NEXT: andi a2, a3, 512
+; RV32-NEXT: beqz a2, .LBB8_119
+; RV32-NEXT: j .LBB8_376
+; RV32-NEXT: .LBB8_119: # %else314
+; RV32-NEXT: andi a2, a3, 1024
+; RV32-NEXT: beqz a2, .LBB8_120
+; RV32-NEXT: j .LBB8_377
+; RV32-NEXT: .LBB8_120: # %else317
+; RV32-NEXT: slli a2, a3, 20
+; RV32-NEXT: bgez a2, .LBB8_121
+; RV32-NEXT: j .LBB8_378
+; RV32-NEXT: .LBB8_121: # %else320
+; RV32-NEXT: slli a2, a3, 19
+; RV32-NEXT: bgez a2, .LBB8_123
+; RV32-NEXT: .LBB8_122: # %cond.store322
+; RV32-NEXT: li a2, 128
+; RV32-NEXT: li a5, 29
+; RV32-NEXT: slli a5, a5, 9
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a5)
+; RV32-NEXT: lbu a2, 0(a4)
+; RV32-NEXT: addi a4, a0, 1
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: mv a0, a4
+; RV32-NEXT: .LBB8_123: # %else323
+; RV32-NEXT: slli a4, a3, 18
+; RV32-NEXT: lui a2, 3
+; RV32-NEXT: addi a2, a2, 509
+; RV32-NEXT: add a2, sp, a2
+; RV32-NEXT: bgez a4, .LBB8_124
+; RV32-NEXT: j .LBB8_379
+; RV32-NEXT: .LBB8_124: # %else326
+; RV32-NEXT: slli a4, a3, 17
+; RV32-NEXT: bgez a4, .LBB8_125
+; RV32-NEXT: j .LBB8_380
+; RV32-NEXT: .LBB8_125: # %else329
+; RV32-NEXT: slli a4, a3, 16
+; RV32-NEXT: bgez a4, .LBB8_126
+; RV32-NEXT: j .LBB8_381
+; RV32-NEXT: .LBB8_126: # %else332
+; RV32-NEXT: slli a4, a3, 15
+; RV32-NEXT: bgez a4, .LBB8_127
+; RV32-NEXT: j .LBB8_382
+; RV32-NEXT: .LBB8_127: # %else335
+; RV32-NEXT: slli a4, a3, 14
+; RV32-NEXT: bgez a4, .LBB8_128
+; RV32-NEXT: j .LBB8_383
+; RV32-NEXT: .LBB8_128: # %else338
+; RV32-NEXT: slli a4, a3, 13
+; RV32-NEXT: bgez a4, .LBB8_129
+; RV32-NEXT: j .LBB8_384
+; RV32-NEXT: .LBB8_129: # %else341
+; RV32-NEXT: slli a4, a3, 12
+; RV32-NEXT: bgez a4, .LBB8_130
+; RV32-NEXT: j .LBB8_385
+; RV32-NEXT: .LBB8_130: # %else344
+; RV32-NEXT: slli a4, a3, 11
+; RV32-NEXT: bgez a4, .LBB8_131
+; RV32-NEXT: j .LBB8_386
+; RV32-NEXT: .LBB8_131: # %else347
+; RV32-NEXT: slli a4, a3, 10
+; RV32-NEXT: bgez a4, .LBB8_132
+; RV32-NEXT: j .LBB8_387
+; RV32-NEXT: .LBB8_132: # %else350
+; RV32-NEXT: slli a4, a3, 9
+; RV32-NEXT: bgez a4, .LBB8_133
+; RV32-NEXT: j .LBB8_388
+; RV32-NEXT: .LBB8_133: # %else353
+; RV32-NEXT: slli a4, a3, 8
+; RV32-NEXT: bgez a4, .LBB8_134
+; RV32-NEXT: j .LBB8_389
+; RV32-NEXT: .LBB8_134: # %else356
+; RV32-NEXT: slli a4, a3, 7
+; RV32-NEXT: bgez a4, .LBB8_135
+; RV32-NEXT: j .LBB8_390
+; RV32-NEXT: .LBB8_135: # %else359
+; RV32-NEXT: slli a4, a3, 6
+; RV32-NEXT: bgez a4, .LBB8_136
+; RV32-NEXT: j .LBB8_391
+; RV32-NEXT: .LBB8_136: # %else362
+; RV32-NEXT: slli a4, a3, 5
+; RV32-NEXT: bgez a4, .LBB8_137
+; RV32-NEXT: j .LBB8_392
+; RV32-NEXT: .LBB8_137: # %else365
+; RV32-NEXT: slli a4, a3, 4
+; RV32-NEXT: bgez a4, .LBB8_138
+; RV32-NEXT: j .LBB8_393
+; RV32-NEXT: .LBB8_138: # %else368
+; RV32-NEXT: slli a4, a3, 3
+; RV32-NEXT: bgez a4, .LBB8_139
+; RV32-NEXT: j .LBB8_394
+; RV32-NEXT: .LBB8_139: # %else371
+; RV32-NEXT: slli a4, a3, 2
+; RV32-NEXT: bgez a4, .LBB8_141
+; RV32-NEXT: .LBB8_140: # %cond.store373
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: lui a5, 3
+; RV32-NEXT: addi a5, a5, 384
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a5)
+; RV32-NEXT: lbu a2, 0(a2)
+; RV32-NEXT: addi a4, a0, 1
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: mv a0, a4
+; RV32-NEXT: .LBB8_141: # %else374
+; RV32-NEXT: slli a4, a3, 1
+; RV32-NEXT: lui a2, 3
+; RV32-NEXT: addi a2, a2, -1619
+; RV32-NEXT: add a2, sp, a2
+; RV32-NEXT: bgez a4, .LBB8_143
+; RV32-NEXT: # %bb.142: # %cond.store376
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: lui a5, 3
+; RV32-NEXT: addi a5, a5, 256
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a5)
+; RV32-NEXT: lbu a4, 2001(a2)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: .LBB8_143: # %else377
+; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV32-NEXT: vmv.x.s a4, v8
+; RV32-NEXT: bgez a3, .LBB8_144
+; RV32-NEXT: j .LBB8_395
+; RV32-NEXT: .LBB8_144: # %else380
+; RV32-NEXT: andi a3, a4, 1
+; RV32-NEXT: beqz a3, .LBB8_145
+; RV32-NEXT: j .LBB8_396
+; RV32-NEXT: .LBB8_145: # %else383
+; RV32-NEXT: andi a3, a4, 2
+; RV32-NEXT: beqz a3, .LBB8_146
+; RV32-NEXT: j .LBB8_397
+; RV32-NEXT: .LBB8_146: # %else386
+; RV32-NEXT: andi a3, a4, 4
+; RV32-NEXT: beqz a3, .LBB8_147
+; RV32-NEXT: j .LBB8_398
+; RV32-NEXT: .LBB8_147: # %else389
+; RV32-NEXT: andi a3, a4, 8
+; RV32-NEXT: beqz a3, .LBB8_148
+; RV32-NEXT: j .LBB8_399
+; RV32-NEXT: .LBB8_148: # %else392
+; RV32-NEXT: andi a3, a4, 16
+; RV32-NEXT: beqz a3, .LBB8_149
+; RV32-NEXT: j .LBB8_400
+; RV32-NEXT: .LBB8_149: # %else395
+; RV32-NEXT: andi a3, a4, 32
+; RV32-NEXT: beqz a3, .LBB8_150
+; RV32-NEXT: j .LBB8_401
+; RV32-NEXT: .LBB8_150: # %else398
+; RV32-NEXT: andi a3, a4, 64
+; RV32-NEXT: beqz a3, .LBB8_151
+; RV32-NEXT: j .LBB8_402
+; RV32-NEXT: .LBB8_151: # %else401
+; RV32-NEXT: andi a3, a4, 128
+; RV32-NEXT: beqz a3, .LBB8_152
+; RV32-NEXT: j .LBB8_403
+; RV32-NEXT: .LBB8_152: # %else404
+; RV32-NEXT: andi a3, a4, 256
+; RV32-NEXT: beqz a3, .LBB8_153
+; RV32-NEXT: j .LBB8_404
+; RV32-NEXT: .LBB8_153: # %else407
+; RV32-NEXT: andi a3, a4, 512
+; RV32-NEXT: beqz a3, .LBB8_154
+; RV32-NEXT: j .LBB8_405
+; RV32-NEXT: .LBB8_154: # %else410
+; RV32-NEXT: andi a3, a4, 1024
+; RV32-NEXT: beqz a3, .LBB8_155
+; RV32-NEXT: j .LBB8_406
+; RV32-NEXT: .LBB8_155: # %else413
+; RV32-NEXT: slli a3, a4, 20
+; RV32-NEXT: bgez a3, .LBB8_156
+; RV32-NEXT: j .LBB8_407
+; RV32-NEXT: .LBB8_156: # %else416
+; RV32-NEXT: slli a3, a4, 19
+; RV32-NEXT: bgez a3, .LBB8_157
+; RV32-NEXT: j .LBB8_408
+; RV32-NEXT: .LBB8_157: # %else419
+; RV32-NEXT: slli a3, a4, 18
+; RV32-NEXT: bgez a3, .LBB8_158
+; RV32-NEXT: j .LBB8_409
+; RV32-NEXT: .LBB8_158: # %else422
+; RV32-NEXT: slli a3, a4, 17
+; RV32-NEXT: bgez a3, .LBB8_159
+; RV32-NEXT: j .LBB8_410
+; RV32-NEXT: .LBB8_159: # %else425
+; RV32-NEXT: slli a3, a4, 16
+; RV32-NEXT: bgez a3, .LBB8_160
+; RV32-NEXT: j .LBB8_411
+; RV32-NEXT: .LBB8_160: # %else428
+; RV32-NEXT: slli a3, a4, 15
+; RV32-NEXT: bgez a3, .LBB8_161
+; RV32-NEXT: j .LBB8_412
+; RV32-NEXT: .LBB8_161: # %else431
+; RV32-NEXT: slli a3, a4, 14
+; RV32-NEXT: bgez a3, .LBB8_162
+; RV32-NEXT: j .LBB8_413
+; RV32-NEXT: .LBB8_162: # %else434
+; RV32-NEXT: slli a3, a4, 13
+; RV32-NEXT: bgez a3, .LBB8_163
+; RV32-NEXT: j .LBB8_414
+; RV32-NEXT: .LBB8_163: # %else437
+; RV32-NEXT: slli a3, a4, 12
+; RV32-NEXT: bgez a3, .LBB8_164
+; RV32-NEXT: j .LBB8_415
+; RV32-NEXT: .LBB8_164: # %else440
+; RV32-NEXT: slli a3, a4, 11
+; RV32-NEXT: bgez a3, .LBB8_165
+; RV32-NEXT: j .LBB8_416
+; RV32-NEXT: .LBB8_165: # %else443
+; RV32-NEXT: slli a3, a4, 10
+; RV32-NEXT: bgez a3, .LBB8_166
+; RV32-NEXT: j .LBB8_417
+; RV32-NEXT: .LBB8_166: # %else446
+; RV32-NEXT: slli a3, a4, 9
+; RV32-NEXT: bgez a3, .LBB8_167
+; RV32-NEXT: j .LBB8_418
+; RV32-NEXT: .LBB8_167: # %else449
+; RV32-NEXT: slli a3, a4, 8
+; RV32-NEXT: bgez a3, .LBB8_168
+; RV32-NEXT: j .LBB8_419
+; RV32-NEXT: .LBB8_168: # %else452
+; RV32-NEXT: slli a3, a4, 7
+; RV32-NEXT: bgez a3, .LBB8_169
+; RV32-NEXT: j .LBB8_420
+; RV32-NEXT: .LBB8_169: # %else455
+; RV32-NEXT: slli a3, a4, 6
+; RV32-NEXT: bgez a3, .LBB8_170
+; RV32-NEXT: j .LBB8_421
+; RV32-NEXT: .LBB8_170: # %else458
+; RV32-NEXT: slli a3, a4, 5
+; RV32-NEXT: bgez a3, .LBB8_171
+; RV32-NEXT: j .LBB8_422
+; RV32-NEXT: .LBB8_171: # %else461
+; RV32-NEXT: slli a3, a4, 4
+; RV32-NEXT: bgez a3, .LBB8_172
+; RV32-NEXT: j .LBB8_423
+; RV32-NEXT: .LBB8_172: # %else464
+; RV32-NEXT: slli a3, a4, 3
+; RV32-NEXT: bgez a3, .LBB8_173
+; RV32-NEXT: j .LBB8_424
+; RV32-NEXT: .LBB8_173: # %else467
+; RV32-NEXT: slli a3, a4, 2
+; RV32-NEXT: bgez a3, .LBB8_175
+; RV32-NEXT: .LBB8_174: # %cond.store469
+; RV32-NEXT: vsetivli zero, 1, e8, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v10, v24, 29
+; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV32-NEXT: addi a3, a0, 1
+; RV32-NEXT: vse8.v v10, (a0)
+; RV32-NEXT: mv a0, a3
+; RV32-NEXT: .LBB8_175: # %else470
+; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV32-NEXT: slli a3, a4, 1
+; RV32-NEXT: vsrl.vx v9, v8, a1
+; RV32-NEXT: bgez a3, .LBB8_177
+; RV32-NEXT: # %bb.176: # %cond.store472
+; RV32-NEXT: vsetivli zero, 1, e8, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v10, v24, 30
+; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV32-NEXT: addi a3, a0, 1
+; RV32-NEXT: vse8.v v10, (a0)
+; RV32-NEXT: mv a0, a3
+; RV32-NEXT: .LBB8_177: # %else473
+; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV32-NEXT: vmv.x.s a3, v9
+; RV32-NEXT: bgez a4, .LBB8_178
+; RV32-NEXT: j .LBB8_425
+; RV32-NEXT: .LBB8_178: # %else476
+; RV32-NEXT: andi a4, a3, 1
+; RV32-NEXT: beqz a4, .LBB8_179
+; RV32-NEXT: j .LBB8_426
+; RV32-NEXT: .LBB8_179: # %else479
+; RV32-NEXT: andi a4, a3, 2
+; RV32-NEXT: beqz a4, .LBB8_180
+; RV32-NEXT: j .LBB8_427
+; RV32-NEXT: .LBB8_180: # %else482
+; RV32-NEXT: andi a4, a3, 4
+; RV32-NEXT: beqz a4, .LBB8_181
+; RV32-NEXT: j .LBB8_428
+; RV32-NEXT: .LBB8_181: # %else485
+; RV32-NEXT: andi a4, a3, 8
+; RV32-NEXT: beqz a4, .LBB8_182
+; RV32-NEXT: j .LBB8_429
+; RV32-NEXT: .LBB8_182: # %else488
+; RV32-NEXT: andi a4, a3, 16
+; RV32-NEXT: beqz a4, .LBB8_183
+; RV32-NEXT: j .LBB8_430
+; RV32-NEXT: .LBB8_183: # %else491
+; RV32-NEXT: andi a4, a3, 32
+; RV32-NEXT: beqz a4, .LBB8_184
+; RV32-NEXT: j .LBB8_431
+; RV32-NEXT: .LBB8_184: # %else494
+; RV32-NEXT: andi a4, a3, 64
+; RV32-NEXT: beqz a4, .LBB8_185
+; RV32-NEXT: j .LBB8_432
+; RV32-NEXT: .LBB8_185: # %else497
+; RV32-NEXT: andi a4, a3, 128
+; RV32-NEXT: beqz a4, .LBB8_186
+; RV32-NEXT: j .LBB8_433
+; RV32-NEXT: .LBB8_186: # %else500
+; RV32-NEXT: andi a4, a3, 256
+; RV32-NEXT: beqz a4, .LBB8_187
+; RV32-NEXT: j .LBB8_434
+; RV32-NEXT: .LBB8_187: # %else503
+; RV32-NEXT: andi a4, a3, 512
+; RV32-NEXT: beqz a4, .LBB8_188
+; RV32-NEXT: j .LBB8_435
+; RV32-NEXT: .LBB8_188: # %else506
+; RV32-NEXT: andi a4, a3, 1024
+; RV32-NEXT: beqz a4, .LBB8_189
+; RV32-NEXT: j .LBB8_436
+; RV32-NEXT: .LBB8_189: # %else509
+; RV32-NEXT: slli a4, a3, 20
+; RV32-NEXT: bgez a4, .LBB8_190
+; RV32-NEXT: j .LBB8_437
+; RV32-NEXT: .LBB8_190: # %else512
+; RV32-NEXT: slli a4, a3, 19
+; RV32-NEXT: bgez a4, .LBB8_191
+; RV32-NEXT: j .LBB8_438
+; RV32-NEXT: .LBB8_191: # %else515
+; RV32-NEXT: slli a4, a3, 18
+; RV32-NEXT: bgez a4, .LBB8_193
+; RV32-NEXT: .LBB8_192: # %cond.store517
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: lui a5, 3
+; RV32-NEXT: addi a5, a5, -1664
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v24, (a5)
+; RV32-NEXT: lbu a2, 0(a2)
+; RV32-NEXT: addi a4, a0, 1
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: mv a0, a4
+; RV32-NEXT: .LBB8_193: # %else518
+; RV32-NEXT: slli a4, a3, 17
+; RV32-NEXT: lui a2, 2
+; RV32-NEXT: addi a2, a2, 318
+; RV32-NEXT: add a2, sp, a2
+; RV32-NEXT: bgez a4, .LBB8_194
+; RV32-NEXT: j .LBB8_439
+; RV32-NEXT: .LBB8_194: # %else521
+; RV32-NEXT: slli a4, a3, 16
+; RV32-NEXT: bgez a4, .LBB8_195
+; RV32-NEXT: j .LBB8_440
+; RV32-NEXT: .LBB8_195: # %else524
+; RV32-NEXT: slli a4, a3, 15
+; RV32-NEXT: bgez a4, .LBB8_196
+; RV32-NEXT: j .LBB8_441
+; RV32-NEXT: .LBB8_196: # %else527
+; RV32-NEXT: slli a4, a3, 14
+; RV32-NEXT: bgez a4, .LBB8_197
+; RV32-NEXT: j .LBB8_442
+; RV32-NEXT: .LBB8_197: # %else530
+; RV32-NEXT: slli a4, a3, 13
+; RV32-NEXT: bgez a4, .LBB8_198
+; RV32-NEXT: j .LBB8_443
+; RV32-NEXT: .LBB8_198: # %else533
+; RV32-NEXT: slli a4, a3, 12
+; RV32-NEXT: bgez a4, .LBB8_199
+; RV32-NEXT: j .LBB8_444
+; RV32-NEXT: .LBB8_199: # %else536
+; RV32-NEXT: slli a4, a3, 11
+; RV32-NEXT: bgez a4, .LBB8_200
+; RV32-NEXT: j .LBB8_445
+; RV32-NEXT: .LBB8_200: # %else539
+; RV32-NEXT: slli a4, a3, 10
+; RV32-NEXT: bgez a4, .LBB8_201
+; RV32-NEXT: j .LBB8_446
+; RV32-NEXT: .LBB8_201: # %else542
+; RV32-NEXT: slli a4, a3, 9
+; RV32-NEXT: bgez a4, .LBB8_202
+; RV32-NEXT: j .LBB8_447
+; RV32-NEXT: .LBB8_202: # %else545
+; RV32-NEXT: slli a4, a3, 8
+; RV32-NEXT: bgez a4, .LBB8_203
+; RV32-NEXT: j .LBB8_448
+; RV32-NEXT: .LBB8_203: # %else548
+; RV32-NEXT: slli a4, a3, 7
+; RV32-NEXT: bgez a4, .LBB8_204
+; RV32-NEXT: j .LBB8_449
+; RV32-NEXT: .LBB8_204: # %else551
+; RV32-NEXT: slli a4, a3, 6
+; RV32-NEXT: bgez a4, .LBB8_205
+; RV32-NEXT: j .LBB8_450
+; RV32-NEXT: .LBB8_205: # %else554
+; RV32-NEXT: slli a4, a3, 5
+; RV32-NEXT: bgez a4, .LBB8_206
+; RV32-NEXT: j .LBB8_451
+; RV32-NEXT: .LBB8_206: # %else557
+; RV32-NEXT: slli a4, a3, 4
+; RV32-NEXT: bgez a4, .LBB8_207
+; RV32-NEXT: j .LBB8_452
+; RV32-NEXT: .LBB8_207: # %else560
+; RV32-NEXT: slli a4, a3, 3
+; RV32-NEXT: bgez a4, .LBB8_208
+; RV32-NEXT: j .LBB8_453
+; RV32-NEXT: .LBB8_208: # %else563
+; RV32-NEXT: slli a4, a3, 2
+; RV32-NEXT: bgez a4, .LBB8_210
+; RV32-NEXT: .LBB8_209: # %cond.store565
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: lui a5, 2
+; RV32-NEXT: addi a5, a5, 384
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v24, (a5)
+; RV32-NEXT: lbu a4, 127(a2)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: .LBB8_210: # %else566
+; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV32-NEXT: slli a4, a3, 1
+; RV32-NEXT: vslidedown.vi v8, v8, 1
+; RV32-NEXT: bgez a4, .LBB8_212
+; RV32-NEXT: # %bb.211: # %cond.store568
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: lui a5, 2
+; RV32-NEXT: addi a5, a5, 256
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v24, (a5)
+; RV32-NEXT: lbu a2, 0(a2)
+; RV32-NEXT: addi a4, a0, 1
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: mv a0, a4
+; RV32-NEXT: .LBB8_212: # %else569
+; RV32-NEXT: lui a2, 2
+; RV32-NEXT: addi a2, a2, -1841
+; RV32-NEXT: add a4, sp, a2
+; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV32-NEXT: vmv.x.s a2, v8
+; RV32-NEXT: bgez a3, .LBB8_213
+; RV32-NEXT: j .LBB8_454
+; RV32-NEXT: .LBB8_213: # %else572
+; RV32-NEXT: andi a3, a2, 1
+; RV32-NEXT: beqz a3, .LBB8_214
+; RV32-NEXT: j .LBB8_455
+; RV32-NEXT: .LBB8_214: # %else575
+; RV32-NEXT: andi a3, a2, 2
+; RV32-NEXT: beqz a3, .LBB8_215
+; RV32-NEXT: j .LBB8_456
+; RV32-NEXT: .LBB8_215: # %else578
+; RV32-NEXT: andi a3, a2, 4
+; RV32-NEXT: beqz a3, .LBB8_216
+; RV32-NEXT: j .LBB8_457
+; RV32-NEXT: .LBB8_216: # %else581
+; RV32-NEXT: andi a3, a2, 8
+; RV32-NEXT: beqz a3, .LBB8_217
+; RV32-NEXT: j .LBB8_458
+; RV32-NEXT: .LBB8_217: # %else584
+; RV32-NEXT: andi a3, a2, 16
+; RV32-NEXT: beqz a3, .LBB8_218
+; RV32-NEXT: j .LBB8_459
+; RV32-NEXT: .LBB8_218: # %else587
+; RV32-NEXT: andi a3, a2, 32
+; RV32-NEXT: beqz a3, .LBB8_219
+; RV32-NEXT: j .LBB8_460
+; RV32-NEXT: .LBB8_219: # %else590
+; RV32-NEXT: andi a3, a2, 64
+; RV32-NEXT: beqz a3, .LBB8_220
+; RV32-NEXT: j .LBB8_461
+; RV32-NEXT: .LBB8_220: # %else593
+; RV32-NEXT: andi a3, a2, 128
+; RV32-NEXT: beqz a3, .LBB8_221
+; RV32-NEXT: j .LBB8_462
+; RV32-NEXT: .LBB8_221: # %else596
+; RV32-NEXT: andi a3, a2, 256
+; RV32-NEXT: beqz a3, .LBB8_222
+; RV32-NEXT: j .LBB8_463
+; RV32-NEXT: .LBB8_222: # %else599
+; RV32-NEXT: andi a3, a2, 512
+; RV32-NEXT: beqz a3, .LBB8_223
+; RV32-NEXT: j .LBB8_464
+; RV32-NEXT: .LBB8_223: # %else602
+; RV32-NEXT: andi a3, a2, 1024
+; RV32-NEXT: beqz a3, .LBB8_224
+; RV32-NEXT: j .LBB8_465
+; RV32-NEXT: .LBB8_224: # %else605
+; RV32-NEXT: slli a3, a2, 20
+; RV32-NEXT: bgez a3, .LBB8_225
+; RV32-NEXT: j .LBB8_466
+; RV32-NEXT: .LBB8_225: # %else608
+; RV32-NEXT: slli a3, a2, 19
+; RV32-NEXT: bgez a3, .LBB8_226
+; RV32-NEXT: j .LBB8_467
+; RV32-NEXT: .LBB8_226: # %else611
+; RV32-NEXT: slli a3, a2, 18
+; RV32-NEXT: bgez a3, .LBB8_227
+; RV32-NEXT: j .LBB8_468
+; RV32-NEXT: .LBB8_227: # %else614
+; RV32-NEXT: slli a3, a2, 17
+; RV32-NEXT: bgez a3, .LBB8_228
+; RV32-NEXT: j .LBB8_469
+; RV32-NEXT: .LBB8_228: # %else617
+; RV32-NEXT: slli a3, a2, 16
+; RV32-NEXT: bgez a3, .LBB8_230
+; RV32-NEXT: .LBB8_229: # %cond.store619
+; RV32-NEXT: li a3, 128
+; RV32-NEXT: lui a5, 2
+; RV32-NEXT: addi a5, a5, -1920
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v24, (a5)
+; RV32-NEXT: lbu a3, 0(a4)
+; RV32-NEXT: addi a4, a0, 1
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: mv a0, a4
+; RV32-NEXT: .LBB8_230: # %else620
+; RV32-NEXT: slli a4, a2, 15
+; RV32-NEXT: lui a3, 1
+; RV32-NEXT: addi a3, a3, 96
+; RV32-NEXT: add a3, sp, a3
+; RV32-NEXT: bgez a4, .LBB8_231
+; RV32-NEXT: j .LBB8_470
+; RV32-NEXT: .LBB8_231: # %else623
+; RV32-NEXT: slli a4, a2, 14
+; RV32-NEXT: bgez a4, .LBB8_232
+; RV32-NEXT: j .LBB8_471
+; RV32-NEXT: .LBB8_232: # %else626
+; RV32-NEXT: slli a4, a2, 13
+; RV32-NEXT: bgez a4, .LBB8_233
+; RV32-NEXT: j .LBB8_472
+; RV32-NEXT: .LBB8_233: # %else629
+; RV32-NEXT: slli a4, a2, 12
+; RV32-NEXT: bgez a4, .LBB8_234
+; RV32-NEXT: j .LBB8_473
+; RV32-NEXT: .LBB8_234: # %else632
+; RV32-NEXT: slli a4, a2, 11
+; RV32-NEXT: bgez a4, .LBB8_235
+; RV32-NEXT: j .LBB8_474
+; RV32-NEXT: .LBB8_235: # %else635
+; RV32-NEXT: slli a4, a2, 10
+; RV32-NEXT: bgez a4, .LBB8_236
+; RV32-NEXT: j .LBB8_475
+; RV32-NEXT: .LBB8_236: # %else638
+; RV32-NEXT: slli a4, a2, 9
+; RV32-NEXT: bgez a4, .LBB8_237
+; RV32-NEXT: j .LBB8_476
+; RV32-NEXT: .LBB8_237: # %else641
+; RV32-NEXT: slli a4, a2, 8
+; RV32-NEXT: bgez a4, .LBB8_238
+; RV32-NEXT: j .LBB8_477
+; RV32-NEXT: .LBB8_238: # %else644
+; RV32-NEXT: slli a4, a2, 7
+; RV32-NEXT: bgez a4, .LBB8_239
+; RV32-NEXT: j .LBB8_478
+; RV32-NEXT: .LBB8_239: # %else647
+; RV32-NEXT: slli a4, a2, 6
+; RV32-NEXT: bgez a4, .LBB8_240
+; RV32-NEXT: j .LBB8_479
+; RV32-NEXT: .LBB8_240: # %else650
+; RV32-NEXT: slli a4, a2, 5
+; RV32-NEXT: bgez a4, .LBB8_241
+; RV32-NEXT: j .LBB8_480
+; RV32-NEXT: .LBB8_241: # %else653
+; RV32-NEXT: slli a4, a2, 4
+; RV32-NEXT: bgez a4, .LBB8_242
+; RV32-NEXT: j .LBB8_481
+; RV32-NEXT: .LBB8_242: # %else656
+; RV32-NEXT: slli a4, a2, 3
+; RV32-NEXT: bgez a4, .LBB8_243
+; RV32-NEXT: j .LBB8_482
+; RV32-NEXT: .LBB8_243: # %else659
+; RV32-NEXT: slli a4, a2, 2
+; RV32-NEXT: bgez a4, .LBB8_245
+; RV32-NEXT: .LBB8_244: # %cond.store661
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: lui a5, 1
+; RV32-NEXT: addi a5, a5, 384
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v24, (a5)
+; RV32-NEXT: lbu a4, 381(a3)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: .LBB8_245: # %else662
+; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV32-NEXT: slli a4, a2, 1
+; RV32-NEXT: vsrl.vx v8, v8, a1
+; RV32-NEXT: bgez a4, .LBB8_247
+; RV32-NEXT: # %bb.246: # %cond.store664
+; RV32-NEXT: li a1, 128
+; RV32-NEXT: li a4, 17
+; RV32-NEXT: slli a4, a4, 8
+; RV32-NEXT: add a4, sp, a4
+; RV32-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v24, (a4)
+; RV32-NEXT: lbu a1, 254(a3)
+; RV32-NEXT: addi a4, a0, 1
+; RV32-NEXT: sb a1, 0(a0)
+; RV32-NEXT: mv a0, a4
+; RV32-NEXT: .LBB8_247: # %else665
+; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV32-NEXT: vmv.x.s a1, v8
+; RV32-NEXT: bgez a2, .LBB8_249
+; RV32-NEXT: # %bb.248: # %cond.store667
+; RV32-NEXT: li a2, 128
+; RV32-NEXT: lui a4, 1
+; RV32-NEXT: addi a4, a4, 128
+; RV32-NEXT: add a4, sp, a4
+; RV32-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v24, (a4)
+; RV32-NEXT: lbu a2, 127(a3)
+; RV32-NEXT: addi a4, a0, 1
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: mv a0, a4
+; RV32-NEXT: .LBB8_249: # %else668
+; RV32-NEXT: andi a2, a1, 1
+; RV32-NEXT: beqz a2, .LBB8_251
+; RV32-NEXT: # %bb.250: # %cond.store670
+; RV32-NEXT: li a2, 128
+; RV32-NEXT: lui a4, 1
+; RV32-NEXT: add a4, sp, a4
+; RV32-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v24, (a4)
+; RV32-NEXT: lbu a2, 0(a3)
+; RV32-NEXT: addi a3, a0, 1
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: mv a0, a3
+; RV32-NEXT: .LBB8_251: # %else671
+; RV32-NEXT: andi a3, a1, 2
+; RV32-NEXT: addi a2, sp, 2033
+; RV32-NEXT: beqz a3, .LBB8_252
+; RV32-NEXT: j .LBB8_483
+; RV32-NEXT: .LBB8_252: # %else674
+; RV32-NEXT: andi a3, a1, 4
+; RV32-NEXT: beqz a3, .LBB8_253
+; RV32-NEXT: j .LBB8_484
+; RV32-NEXT: .LBB8_253: # %else677
+; RV32-NEXT: andi a3, a1, 8
+; RV32-NEXT: beqz a3, .LBB8_254
+; RV32-NEXT: j .LBB8_485
+; RV32-NEXT: .LBB8_254: # %else680
+; RV32-NEXT: andi a3, a1, 16
+; RV32-NEXT: beqz a3, .LBB8_255
+; RV32-NEXT: j .LBB8_486
+; RV32-NEXT: .LBB8_255: # %else683
+; RV32-NEXT: andi a3, a1, 32
+; RV32-NEXT: beqz a3, .LBB8_256
+; RV32-NEXT: j .LBB8_487
+; RV32-NEXT: .LBB8_256: # %else686
+; RV32-NEXT: andi a3, a1, 64
+; RV32-NEXT: beqz a3, .LBB8_257
+; RV32-NEXT: j .LBB8_488
+; RV32-NEXT: .LBB8_257: # %else689
+; RV32-NEXT: andi a3, a1, 128
+; RV32-NEXT: beqz a3, .LBB8_258
+; RV32-NEXT: j .LBB8_489
+; RV32-NEXT: .LBB8_258: # %else692
+; RV32-NEXT: andi a3, a1, 256
+; RV32-NEXT: beqz a3, .LBB8_259
+; RV32-NEXT: j .LBB8_490
+; RV32-NEXT: .LBB8_259: # %else695
+; RV32-NEXT: andi a3, a1, 512
+; RV32-NEXT: beqz a3, .LBB8_260
+; RV32-NEXT: j .LBB8_491
+; RV32-NEXT: .LBB8_260: # %else698
+; RV32-NEXT: andi a3, a1, 1024
+; RV32-NEXT: beqz a3, .LBB8_261
+; RV32-NEXT: j .LBB8_492
+; RV32-NEXT: .LBB8_261: # %else701
+; RV32-NEXT: slli a3, a1, 20
+; RV32-NEXT: bgez a3, .LBB8_262
+; RV32-NEXT: j .LBB8_493
+; RV32-NEXT: .LBB8_262: # %else704
+; RV32-NEXT: slli a3, a1, 19
+; RV32-NEXT: bgez a3, .LBB8_263
+; RV32-NEXT: j .LBB8_494
+; RV32-NEXT: .LBB8_263: # %else707
+; RV32-NEXT: slli a3, a1, 18
+; RV32-NEXT: bgez a3, .LBB8_264
+; RV32-NEXT: j .LBB8_495
+; RV32-NEXT: .LBB8_264: # %else710
+; RV32-NEXT: slli a3, a1, 17
+; RV32-NEXT: bgez a3, .LBB8_265
+; RV32-NEXT: j .LBB8_496
+; RV32-NEXT: .LBB8_265: # %else713
+; RV32-NEXT: slli a3, a1, 16
+; RV32-NEXT: bgez a3, .LBB8_266
+; RV32-NEXT: j .LBB8_497
+; RV32-NEXT: .LBB8_266: # %else716
+; RV32-NEXT: slli a3, a1, 15
+; RV32-NEXT: bgez a3, .LBB8_267
+; RV32-NEXT: j .LBB8_498
+; RV32-NEXT: .LBB8_267: # %else719
+; RV32-NEXT: slli a3, a1, 14
+; RV32-NEXT: bgez a3, .LBB8_268
+; RV32-NEXT: j .LBB8_499
+; RV32-NEXT: .LBB8_268: # %else722
+; RV32-NEXT: slli a2, a1, 13
+; RV32-NEXT: bgez a2, .LBB8_269
+; RV32-NEXT: j .LBB8_500
+; RV32-NEXT: .LBB8_269: # %else725
+; RV32-NEXT: slli a2, a1, 12
+; RV32-NEXT: bgez a2, .LBB8_270
+; RV32-NEXT: j .LBB8_501
+; RV32-NEXT: .LBB8_270: # %else728
+; RV32-NEXT: slli a2, a1, 11
+; RV32-NEXT: bgez a2, .LBB8_271
+; RV32-NEXT: j .LBB8_502
+; RV32-NEXT: .LBB8_271: # %else731
+; RV32-NEXT: slli a2, a1, 10
+; RV32-NEXT: bgez a2, .LBB8_272
+; RV32-NEXT: j .LBB8_503
+; RV32-NEXT: .LBB8_272: # %else734
+; RV32-NEXT: slli a2, a1, 9
+; RV32-NEXT: bgez a2, .LBB8_273
+; RV32-NEXT: j .LBB8_504
+; RV32-NEXT: .LBB8_273: # %else737
+; RV32-NEXT: slli a2, a1, 8
+; RV32-NEXT: bgez a2, .LBB8_274
+; RV32-NEXT: j .LBB8_505
+; RV32-NEXT: .LBB8_274: # %else740
+; RV32-NEXT: slli a2, a1, 7
+; RV32-NEXT: bgez a2, .LBB8_275
+; RV32-NEXT: j .LBB8_506
+; RV32-NEXT: .LBB8_275: # %else743
+; RV32-NEXT: slli a2, a1, 6
+; RV32-NEXT: bgez a2, .LBB8_276
+; RV32-NEXT: j .LBB8_507
+; RV32-NEXT: .LBB8_276: # %else746
+; RV32-NEXT: slli a2, a1, 5
+; RV32-NEXT: bgez a2, .LBB8_277
+; RV32-NEXT: j .LBB8_508
+; RV32-NEXT: .LBB8_277: # %else749
+; RV32-NEXT: slli a2, a1, 4
+; RV32-NEXT: bgez a2, .LBB8_278
+; RV32-NEXT: j .LBB8_509
+; RV32-NEXT: .LBB8_278: # %else752
+; RV32-NEXT: slli a2, a1, 3
+; RV32-NEXT: bgez a2, .LBB8_279
+; RV32-NEXT: j .LBB8_510
+; RV32-NEXT: .LBB8_279: # %else755
+; RV32-NEXT: slli a2, a1, 2
+; RV32-NEXT: bgez a2, .LBB8_280
+; RV32-NEXT: j .LBB8_511
+; RV32-NEXT: .LBB8_280: # %else758
+; RV32-NEXT: slli a2, a1, 1
+; RV32-NEXT: bgez a2, .LBB8_281
+; RV32-NEXT: j .LBB8_512
+; RV32-NEXT: .LBB8_281: # %else761
+; RV32-NEXT: bgez a1, .LBB8_283
+; RV32-NEXT: .LBB8_282: # %cond.store763
+; RV32-NEXT: li a1, 128
+; RV32-NEXT: addi a2, sp, 128
+; RV32-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v24, (a2)
+; RV32-NEXT: lbu a1, 255(sp)
+; RV32-NEXT: sb a1, 0(a0)
+; RV32-NEXT: .LBB8_283: # %else764
+; RV32-NEXT: lui a0, 6
+; RV32-NEXT: addi a0, a0, 256
+; RV32-NEXT: sub sp, s0, a0
+; RV32-NEXT: lui a0, 6
+; RV32-NEXT: addi a0, a0, -1776
+; RV32-NEXT: add sp, sp, a0
+; RV32-NEXT: lw ra, 2028(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s0, 2024(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 2032
+; RV32-NEXT: ret
+; RV32-NEXT: .LBB8_284: # %cond.store
+; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV32-NEXT: vse8.v v16, (a0)
+; RV32-NEXT: addi a0, a0, 1
+; RV32-NEXT: andi a1, a2, 2
+; RV32-NEXT: bnez a1, .LBB8_285
+; RV32-NEXT: j .LBB8_2
+; RV32-NEXT: .LBB8_285: # %cond.store1
+; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v9, v16, 1
+; RV32-NEXT: vse8.v v9, (a0)
+; RV32-NEXT: addi a0, a0, 1
+; RV32-NEXT: andi a1, a2, 4
+; RV32-NEXT: bnez a1, .LBB8_286
+; RV32-NEXT: j .LBB8_3
+; RV32-NEXT: .LBB8_286: # %cond.store4
+; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v9, v16, 2
+; RV32-NEXT: vse8.v v9, (a0)
+; RV32-NEXT: addi a0, a0, 1
+; RV32-NEXT: andi a1, a2, 8
+; RV32-NEXT: bnez a1, .LBB8_287
+; RV32-NEXT: j .LBB8_4
+; RV32-NEXT: .LBB8_287: # %cond.store7
+; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v9, v16, 3
+; RV32-NEXT: vse8.v v9, (a0)
+; RV32-NEXT: addi a0, a0, 1
+; RV32-NEXT: andi a1, a2, 16
+; RV32-NEXT: bnez a1, .LBB8_288
+; RV32-NEXT: j .LBB8_5
+; RV32-NEXT: .LBB8_288: # %cond.store10
+; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v9, v16, 4
+; RV32-NEXT: vse8.v v9, (a0)
+; RV32-NEXT: addi a0, a0, 1
+; RV32-NEXT: andi a1, a2, 32
+; RV32-NEXT: bnez a1, .LBB8_289
+; RV32-NEXT: j .LBB8_6
+; RV32-NEXT: .LBB8_289: # %cond.store13
+; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v9, v16, 5
+; RV32-NEXT: vse8.v v9, (a0)
+; RV32-NEXT: addi a0, a0, 1
+; RV32-NEXT: andi a1, a2, 64
+; RV32-NEXT: bnez a1, .LBB8_290
+; RV32-NEXT: j .LBB8_7
+; RV32-NEXT: .LBB8_290: # %cond.store16
+; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v9, v16, 6
+; RV32-NEXT: vse8.v v9, (a0)
+; RV32-NEXT: addi a0, a0, 1
+; RV32-NEXT: andi a1, a2, 128
+; RV32-NEXT: bnez a1, .LBB8_291
+; RV32-NEXT: j .LBB8_8
+; RV32-NEXT: .LBB8_291: # %cond.store19
+; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v9, v16, 7
+; RV32-NEXT: vse8.v v9, (a0)
+; RV32-NEXT: addi a0, a0, 1
+; RV32-NEXT: andi a1, a2, 256
+; RV32-NEXT: bnez a1, .LBB8_292
+; RV32-NEXT: j .LBB8_9
+; RV32-NEXT: .LBB8_292: # %cond.store22
+; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v9, v16, 8
+; RV32-NEXT: vse8.v v9, (a0)
+; RV32-NEXT: addi a0, a0, 1
+; RV32-NEXT: andi a1, a2, 512
+; RV32-NEXT: bnez a1, .LBB8_293
+; RV32-NEXT: j .LBB8_10
+; RV32-NEXT: .LBB8_293: # %cond.store25
+; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v9, v16, 9
+; RV32-NEXT: vse8.v v9, (a0)
+; RV32-NEXT: addi a0, a0, 1
+; RV32-NEXT: andi a1, a2, 1024
+; RV32-NEXT: bnez a1, .LBB8_294
+; RV32-NEXT: j .LBB8_11
+; RV32-NEXT: .LBB8_294: # %cond.store28
+; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v9, v16, 10
+; RV32-NEXT: vse8.v v9, (a0)
+; RV32-NEXT: addi a0, a0, 1
+; RV32-NEXT: slli a1, a2, 20
+; RV32-NEXT: bltz a1, .LBB8_295
+; RV32-NEXT: j .LBB8_12
+; RV32-NEXT: .LBB8_295: # %cond.store31
+; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v9, v16, 11
+; RV32-NEXT: vse8.v v9, (a0)
+; RV32-NEXT: addi a0, a0, 1
+; RV32-NEXT: slli a1, a2, 19
+; RV32-NEXT: bltz a1, .LBB8_296
+; RV32-NEXT: j .LBB8_13
+; RV32-NEXT: .LBB8_296: # %cond.store34
+; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v9, v16, 12
+; RV32-NEXT: vse8.v v9, (a0)
+; RV32-NEXT: addi a0, a0, 1
+; RV32-NEXT: slli a1, a2, 18
+; RV32-NEXT: bltz a1, .LBB8_297
+; RV32-NEXT: j .LBB8_14
+; RV32-NEXT: .LBB8_297: # %cond.store37
+; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v9, v16, 13
+; RV32-NEXT: vse8.v v9, (a0)
+; RV32-NEXT: addi a0, a0, 1
+; RV32-NEXT: slli a1, a2, 17
+; RV32-NEXT: bltz a1, .LBB8_298
+; RV32-NEXT: j .LBB8_15
+; RV32-NEXT: .LBB8_298: # %cond.store40
+; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v9, v16, 14
+; RV32-NEXT: vse8.v v9, (a0)
+; RV32-NEXT: addi a0, a0, 1
+; RV32-NEXT: slli a1, a2, 16
+; RV32-NEXT: bltz a1, .LBB8_299
+; RV32-NEXT: j .LBB8_16
+; RV32-NEXT: .LBB8_299: # %cond.store43
+; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v9, v16, 15
+; RV32-NEXT: vse8.v v9, (a0)
+; RV32-NEXT: addi a0, a0, 1
+; RV32-NEXT: slli a1, a2, 15
+; RV32-NEXT: bltz a1, .LBB8_300
+; RV32-NEXT: j .LBB8_17
+; RV32-NEXT: .LBB8_300: # %cond.store46
+; RV32-NEXT: vsetivli zero, 1, e8, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v10, v16, 16
+; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV32-NEXT: vse8.v v10, (a0)
+; RV32-NEXT: addi a0, a0, 1
+; RV32-NEXT: slli a1, a2, 14
+; RV32-NEXT: bltz a1, .LBB8_301
+; RV32-NEXT: j .LBB8_18
+; RV32-NEXT: .LBB8_301: # %cond.store49
+; RV32-NEXT: vsetivli zero, 1, e8, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v10, v16, 17
+; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV32-NEXT: vse8.v v10, (a0)
+; RV32-NEXT: addi a0, a0, 1
+; RV32-NEXT: slli a1, a2, 13
+; RV32-NEXT: bltz a1, .LBB8_302
+; RV32-NEXT: j .LBB8_19
+; RV32-NEXT: .LBB8_302: # %cond.store52
+; RV32-NEXT: vsetivli zero, 1, e8, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v10, v16, 18
+; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV32-NEXT: vse8.v v10, (a0)
+; RV32-NEXT: addi a0, a0, 1
+; RV32-NEXT: slli a1, a2, 12
+; RV32-NEXT: bltz a1, .LBB8_303
+; RV32-NEXT: j .LBB8_20
+; RV32-NEXT: .LBB8_303: # %cond.store55
+; RV32-NEXT: vsetivli zero, 1, e8, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v10, v16, 19
+; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV32-NEXT: vse8.v v10, (a0)
+; RV32-NEXT: addi a0, a0, 1
+; RV32-NEXT: slli a1, a2, 11
+; RV32-NEXT: bltz a1, .LBB8_304
+; RV32-NEXT: j .LBB8_21
+; RV32-NEXT: .LBB8_304: # %cond.store58
+; RV32-NEXT: vsetivli zero, 1, e8, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v10, v16, 20
+; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV32-NEXT: vse8.v v10, (a0)
+; RV32-NEXT: addi a0, a0, 1
+; RV32-NEXT: slli a1, a2, 10
+; RV32-NEXT: bltz a1, .LBB8_305
+; RV32-NEXT: j .LBB8_22
+; RV32-NEXT: .LBB8_305: # %cond.store61
+; RV32-NEXT: vsetivli zero, 1, e8, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v10, v16, 21
+; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV32-NEXT: vse8.v v10, (a0)
+; RV32-NEXT: addi a0, a0, 1
+; RV32-NEXT: slli a1, a2, 9
+; RV32-NEXT: bltz a1, .LBB8_306
+; RV32-NEXT: j .LBB8_23
+; RV32-NEXT: .LBB8_306: # %cond.store64
+; RV32-NEXT: vsetivli zero, 1, e8, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v10, v16, 22
+; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV32-NEXT: vse8.v v10, (a0)
+; RV32-NEXT: addi a0, a0, 1
+; RV32-NEXT: slli a1, a2, 8
+; RV32-NEXT: bltz a1, .LBB8_307
+; RV32-NEXT: j .LBB8_24
+; RV32-NEXT: .LBB8_307: # %cond.store67
+; RV32-NEXT: vsetivli zero, 1, e8, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v10, v16, 23
+; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV32-NEXT: vse8.v v10, (a0)
+; RV32-NEXT: addi a0, a0, 1
+; RV32-NEXT: slli a1, a2, 7
+; RV32-NEXT: bltz a1, .LBB8_308
+; RV32-NEXT: j .LBB8_25
+; RV32-NEXT: .LBB8_308: # %cond.store70
+; RV32-NEXT: vsetivli zero, 1, e8, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v10, v16, 24
+; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV32-NEXT: vse8.v v10, (a0)
+; RV32-NEXT: addi a0, a0, 1
+; RV32-NEXT: slli a1, a2, 6
+; RV32-NEXT: bltz a1, .LBB8_309
+; RV32-NEXT: j .LBB8_26
+; RV32-NEXT: .LBB8_309: # %cond.store73
+; RV32-NEXT: vsetivli zero, 1, e8, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v10, v16, 25
+; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV32-NEXT: vse8.v v10, (a0)
+; RV32-NEXT: addi a0, a0, 1
+; RV32-NEXT: slli a1, a2, 5
+; RV32-NEXT: bltz a1, .LBB8_310
+; RV32-NEXT: j .LBB8_27
+; RV32-NEXT: .LBB8_310: # %cond.store76
+; RV32-NEXT: vsetivli zero, 1, e8, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v10, v16, 26
+; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV32-NEXT: vse8.v v10, (a0)
+; RV32-NEXT: addi a0, a0, 1
+; RV32-NEXT: slli a1, a2, 4
+; RV32-NEXT: bltz a1, .LBB8_311
+; RV32-NEXT: j .LBB8_28
+; RV32-NEXT: .LBB8_311: # %cond.store79
+; RV32-NEXT: vsetivli zero, 1, e8, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v10, v16, 27
+; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV32-NEXT: vse8.v v10, (a0)
+; RV32-NEXT: addi a0, a0, 1
+; RV32-NEXT: slli a1, a2, 3
+; RV32-NEXT: bgez a1, .LBB8_513
+; RV32-NEXT: j .LBB8_29
+; RV32-NEXT: .LBB8_513: # %cond.store79
+; RV32-NEXT: j .LBB8_30
+; RV32-NEXT: .LBB8_312: # %cond.store94
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: lui a5, 6
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a5)
+; RV32-NEXT: lbu a4, 1016(a2)
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: addi a0, a0, 1
+; RV32-NEXT: andi a4, a3, 2
+; RV32-NEXT: bnez a4, .LBB8_313
+; RV32-NEXT: j .LBB8_38
+; RV32-NEXT: .LBB8_313: # %cond.store97
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: lui a5, 6
+; RV32-NEXT: addi a5, a5, -128
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a5)
+; RV32-NEXT: lbu a4, 889(a2)
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: addi a0, a0, 1
+; RV32-NEXT: andi a4, a3, 4
+; RV32-NEXT: bnez a4, .LBB8_314
+; RV32-NEXT: j .LBB8_39
+; RV32-NEXT: .LBB8_314: # %cond.store100
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: lui a5, 6
+; RV32-NEXT: addi a5, a5, -256
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a5)
+; RV32-NEXT: lbu a4, 762(a2)
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: addi a0, a0, 1
+; RV32-NEXT: andi a4, a3, 8
+; RV32-NEXT: bnez a4, .LBB8_315
+; RV32-NEXT: j .LBB8_40
+; RV32-NEXT: .LBB8_315: # %cond.store103
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: lui a5, 6
+; RV32-NEXT: addi a5, a5, -384
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a5)
+; RV32-NEXT: lbu a4, 635(a2)
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: addi a0, a0, 1
+; RV32-NEXT: andi a4, a3, 16
+; RV32-NEXT: bnez a4, .LBB8_316
+; RV32-NEXT: j .LBB8_41
+; RV32-NEXT: .LBB8_316: # %cond.store106
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: lui a5, 6
+; RV32-NEXT: addi a5, a5, -512
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a5)
+; RV32-NEXT: lbu a4, 508(a2)
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: addi a0, a0, 1
+; RV32-NEXT: andi a4, a3, 32
+; RV32-NEXT: bnez a4, .LBB8_317
+; RV32-NEXT: j .LBB8_42
+; RV32-NEXT: .LBB8_317: # %cond.store109
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: lui a5, 6
+; RV32-NEXT: addi a5, a5, -640
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a5)
+; RV32-NEXT: lbu a4, 381(a2)
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: addi a0, a0, 1
+; RV32-NEXT: andi a4, a3, 64
+; RV32-NEXT: bnez a4, .LBB8_318
+; RV32-NEXT: j .LBB8_43
+; RV32-NEXT: .LBB8_318: # %cond.store112
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: lui a5, 6
+; RV32-NEXT: addi a5, a5, -768
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a5)
+; RV32-NEXT: lbu a4, 254(a2)
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: addi a0, a0, 1
+; RV32-NEXT: andi a4, a3, 128
+; RV32-NEXT: bnez a4, .LBB8_319
+; RV32-NEXT: j .LBB8_44
+; RV32-NEXT: .LBB8_319: # %cond.store115
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: lui a5, 6
+; RV32-NEXT: addi a5, a5, -896
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a5)
+; RV32-NEXT: lbu a4, 127(a2)
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: addi a0, a0, 1
+; RV32-NEXT: andi a4, a3, 256
+; RV32-NEXT: beqz a4, .LBB8_514
+; RV32-NEXT: j .LBB8_45
+; RV32-NEXT: .LBB8_514: # %cond.store115
+; RV32-NEXT: j .LBB8_46
+; RV32-NEXT: .LBB8_320: # %cond.store121
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: lui a5, 6
+; RV32-NEXT: addi a5, a5, -1152
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a5)
+; RV32-NEXT: lbu a4, 2032(a2)
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: addi a0, a0, 1
+; RV32-NEXT: andi a4, a3, 1024
+; RV32-NEXT: bnez a4, .LBB8_321
+; RV32-NEXT: j .LBB8_48
+; RV32-NEXT: .LBB8_321: # %cond.store124
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: lui a5, 6
+; RV32-NEXT: addi a5, a5, -1280
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a5)
+; RV32-NEXT: lbu a4, 1905(a2)
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: addi a0, a0, 1
+; RV32-NEXT: slli a4, a3, 20
+; RV32-NEXT: bltz a4, .LBB8_322
+; RV32-NEXT: j .LBB8_49
+; RV32-NEXT: .LBB8_322: # %cond.store127
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: lui a5, 6
+; RV32-NEXT: addi a5, a5, -1408
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a5)
+; RV32-NEXT: lbu a4, 1778(a2)
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: addi a0, a0, 1
+; RV32-NEXT: slli a4, a3, 19
+; RV32-NEXT: bltz a4, .LBB8_323
+; RV32-NEXT: j .LBB8_50
+; RV32-NEXT: .LBB8_323: # %cond.store130
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: lui a5, 6
+; RV32-NEXT: addi a5, a5, -1536
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a5)
+; RV32-NEXT: lbu a4, 1651(a2)
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: addi a0, a0, 1
+; RV32-NEXT: slli a4, a3, 18
+; RV32-NEXT: bltz a4, .LBB8_324
+; RV32-NEXT: j .LBB8_51
+; RV32-NEXT: .LBB8_324: # %cond.store133
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: lui a5, 6
+; RV32-NEXT: addi a5, a5, -1664
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a5)
+; RV32-NEXT: lbu a4, 1524(a2)
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: addi a0, a0, 1
+; RV32-NEXT: slli a4, a3, 17
+; RV32-NEXT: bltz a4, .LBB8_325
+; RV32-NEXT: j .LBB8_52
+; RV32-NEXT: .LBB8_325: # %cond.store136
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: lui a5, 6
+; RV32-NEXT: addi a5, a5, -1792
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a5)
+; RV32-NEXT: lbu a4, 1397(a2)
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: addi a0, a0, 1
+; RV32-NEXT: slli a4, a3, 16
+; RV32-NEXT: bltz a4, .LBB8_326
+; RV32-NEXT: j .LBB8_53
+; RV32-NEXT: .LBB8_326: # %cond.store139
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: lui a5, 6
+; RV32-NEXT: addi a5, a5, -1920
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a5)
+; RV32-NEXT: lbu a4, 1270(a2)
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: addi a0, a0, 1
+; RV32-NEXT: slli a4, a3, 15
+; RV32-NEXT: bltz a4, .LBB8_327
+; RV32-NEXT: j .LBB8_54
+; RV32-NEXT: .LBB8_327: # %cond.store142
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: li a5, 11
+; RV32-NEXT: slli a5, a5, 11
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a5)
+; RV32-NEXT: lbu a4, 1143(a2)
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: addi a0, a0, 1
+; RV32-NEXT: slli a4, a3, 14
+; RV32-NEXT: bltz a4, .LBB8_328
+; RV32-NEXT: j .LBB8_55
+; RV32-NEXT: .LBB8_328: # %cond.store145
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: lui a5, 5
+; RV32-NEXT: addi a5, a5, 1920
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a5)
+; RV32-NEXT: lbu a4, 1016(a2)
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: addi a0, a0, 1
+; RV32-NEXT: slli a4, a3, 13
+; RV32-NEXT: bltz a4, .LBB8_329
+; RV32-NEXT: j .LBB8_56
+; RV32-NEXT: .LBB8_329: # %cond.store148
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: lui a5, 5
+; RV32-NEXT: addi a5, a5, 1792
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a5)
+; RV32-NEXT: lbu a4, 889(a2)
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: addi a0, a0, 1
+; RV32-NEXT: slli a4, a3, 12
+; RV32-NEXT: bltz a4, .LBB8_330
+; RV32-NEXT: j .LBB8_57
+; RV32-NEXT: .LBB8_330: # %cond.store151
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: lui a5, 5
+; RV32-NEXT: addi a5, a5, 1664
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a5)
+; RV32-NEXT: lbu a4, 762(a2)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: slli a4, a3, 11
+; RV32-NEXT: bltz a4, .LBB8_331
+; RV32-NEXT: j .LBB8_58
+; RV32-NEXT: .LBB8_331: # %cond.store154
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: lui a5, 5
+; RV32-NEXT: addi a5, a5, 1536
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a5)
+; RV32-NEXT: lbu a4, 635(a2)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: slli a4, a3, 10
+; RV32-NEXT: bltz a4, .LBB8_332
+; RV32-NEXT: j .LBB8_59
+; RV32-NEXT: .LBB8_332: # %cond.store157
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: lui a5, 5
+; RV32-NEXT: addi a5, a5, 1408
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a5)
+; RV32-NEXT: lbu a4, 508(a2)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: slli a4, a3, 9
+; RV32-NEXT: bltz a4, .LBB8_333
+; RV32-NEXT: j .LBB8_60
+; RV32-NEXT: .LBB8_333: # %cond.store160
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: lui a5, 5
+; RV32-NEXT: addi a5, a5, 1280
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a5)
+; RV32-NEXT: lbu a4, 381(a2)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: slli a4, a3, 8
+; RV32-NEXT: bltz a4, .LBB8_334
+; RV32-NEXT: j .LBB8_61
+; RV32-NEXT: .LBB8_334: # %cond.store163
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: lui a5, 5
+; RV32-NEXT: addi a5, a5, 1152
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a5)
+; RV32-NEXT: lbu a4, 254(a2)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: slli a4, a3, 7
+; RV32-NEXT: bltz a4, .LBB8_335
+; RV32-NEXT: j .LBB8_62
+; RV32-NEXT: .LBB8_335: # %cond.store166
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: li a5, 21
+; RV32-NEXT: slli a5, a5, 10
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a5)
+; RV32-NEXT: lbu a4, 127(a2)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: slli a4, a3, 6
+; RV32-NEXT: bgez a4, .LBB8_515
+; RV32-NEXT: j .LBB8_63
+; RV32-NEXT: .LBB8_515: # %cond.store166
+; RV32-NEXT: j .LBB8_64
+; RV32-NEXT: .LBB8_336: # %cond.store172
+; RV32-NEXT: li a2, 128
+; RV32-NEXT: lui a5, 5
+; RV32-NEXT: addi a5, a5, 768
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a5)
+; RV32-NEXT: lbu a2, 2032(a4)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: slli a2, a3, 4
+; RV32-NEXT: bltz a2, .LBB8_337
+; RV32-NEXT: j .LBB8_66
+; RV32-NEXT: .LBB8_337: # %cond.store175
+; RV32-NEXT: li a2, 128
+; RV32-NEXT: lui a5, 5
+; RV32-NEXT: addi a5, a5, 640
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a5)
+; RV32-NEXT: lbu a2, 1905(a4)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: slli a2, a3, 3
+; RV32-NEXT: bltz a2, .LBB8_338
+; RV32-NEXT: j .LBB8_67
+; RV32-NEXT: .LBB8_338: # %cond.store178
+; RV32-NEXT: li a2, 128
+; RV32-NEXT: lui a5, 5
+; RV32-NEXT: addi a5, a5, 512
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a5)
+; RV32-NEXT: lbu a2, 1778(a4)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: slli a2, a3, 2
+; RV32-NEXT: bgez a2, .LBB8_516
+; RV32-NEXT: j .LBB8_68
+; RV32-NEXT: .LBB8_516: # %cond.store178
+; RV32-NEXT: j .LBB8_69
+; RV32-NEXT: .LBB8_339: # %cond.store187
+; RV32-NEXT: li a3, 128
+; RV32-NEXT: lui a5, 5
+; RV32-NEXT: addi a5, a5, 128
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a5)
+; RV32-NEXT: lbu a3, 1397(a4)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: andi a3, a2, 1
+; RV32-NEXT: bnez a3, .LBB8_340
+; RV32-NEXT: j .LBB8_73
+; RV32-NEXT: .LBB8_340: # %cond.store190
+; RV32-NEXT: li a3, 128
+; RV32-NEXT: lui a5, 5
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a5)
+; RV32-NEXT: lbu a3, 1270(a4)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: andi a3, a2, 2
+; RV32-NEXT: bnez a3, .LBB8_341
+; RV32-NEXT: j .LBB8_74
+; RV32-NEXT: .LBB8_341: # %cond.store193
+; RV32-NEXT: li a3, 128
+; RV32-NEXT: lui a5, 5
+; RV32-NEXT: addi a5, a5, -128
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a5)
+; RV32-NEXT: lbu a3, 1143(a4)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: andi a3, a2, 4
+; RV32-NEXT: bnez a3, .LBB8_342
+; RV32-NEXT: j .LBB8_75
+; RV32-NEXT: .LBB8_342: # %cond.store196
+; RV32-NEXT: li a3, 128
+; RV32-NEXT: lui a5, 5
+; RV32-NEXT: addi a5, a5, -256
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a5)
+; RV32-NEXT: lbu a3, 1016(a4)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: andi a3, a2, 8
+; RV32-NEXT: bnez a3, .LBB8_343
+; RV32-NEXT: j .LBB8_76
+; RV32-NEXT: .LBB8_343: # %cond.store199
+; RV32-NEXT: li a3, 128
+; RV32-NEXT: lui a5, 5
+; RV32-NEXT: addi a5, a5, -384
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a5)
+; RV32-NEXT: lbu a3, 889(a4)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: andi a3, a2, 16
+; RV32-NEXT: bnez a3, .LBB8_344
+; RV32-NEXT: j .LBB8_77
+; RV32-NEXT: .LBB8_344: # %cond.store202
+; RV32-NEXT: li a3, 128
+; RV32-NEXT: lui a5, 5
+; RV32-NEXT: addi a5, a5, -512
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a5)
+; RV32-NEXT: lbu a3, 762(a4)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: andi a3, a2, 32
+; RV32-NEXT: bnez a3, .LBB8_345
+; RV32-NEXT: j .LBB8_78
+; RV32-NEXT: .LBB8_345: # %cond.store205
+; RV32-NEXT: li a3, 128
+; RV32-NEXT: lui a5, 5
+; RV32-NEXT: addi a5, a5, -640
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a5)
+; RV32-NEXT: lbu a3, 635(a4)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: andi a3, a2, 64
+; RV32-NEXT: bnez a3, .LBB8_346
+; RV32-NEXT: j .LBB8_79
+; RV32-NEXT: .LBB8_346: # %cond.store208
+; RV32-NEXT: li a3, 128
+; RV32-NEXT: lui a5, 5
+; RV32-NEXT: addi a5, a5, -768
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a5)
+; RV32-NEXT: lbu a3, 508(a4)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: andi a3, a2, 128
+; RV32-NEXT: bnez a3, .LBB8_347
+; RV32-NEXT: j .LBB8_80
+; RV32-NEXT: .LBB8_347: # %cond.store211
+; RV32-NEXT: li a3, 128
+; RV32-NEXT: lui a5, 5
+; RV32-NEXT: addi a5, a5, -896
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a5)
+; RV32-NEXT: lbu a3, 381(a4)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: andi a3, a2, 256
+; RV32-NEXT: bnez a3, .LBB8_348
+; RV32-NEXT: j .LBB8_81
+; RV32-NEXT: .LBB8_348: # %cond.store214
+; RV32-NEXT: li a3, 128
+; RV32-NEXT: li a5, 19
+; RV32-NEXT: slli a5, a5, 10
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a5)
+; RV32-NEXT: lbu a3, 254(a4)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: andi a3, a2, 512
+; RV32-NEXT: bnez a3, .LBB8_349
+; RV32-NEXT: j .LBB8_82
+; RV32-NEXT: .LBB8_349: # %cond.store217
+; RV32-NEXT: li a3, 128
+; RV32-NEXT: lui a5, 5
+; RV32-NEXT: addi a5, a5, -1152
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a5)
+; RV32-NEXT: lbu a3, 127(a4)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: andi a3, a2, 1024
+; RV32-NEXT: beqz a3, .LBB8_517
+; RV32-NEXT: j .LBB8_83
+; RV32-NEXT: .LBB8_517: # %cond.store217
+; RV32-NEXT: j .LBB8_84
+; RV32-NEXT: .LBB8_350: # %cond.store223
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: lui a5, 5
+; RV32-NEXT: addi a5, a5, -1408
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a5)
+; RV32-NEXT: lbu a4, 2032(a3)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: slli a4, a2, 19
+; RV32-NEXT: bltz a4, .LBB8_351
+; RV32-NEXT: j .LBB8_86
+; RV32-NEXT: .LBB8_351: # %cond.store226
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: lui a5, 5
+; RV32-NEXT: addi a5, a5, -1536
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a5)
+; RV32-NEXT: lbu a4, 1905(a3)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: slli a4, a2, 18
+; RV32-NEXT: bltz a4, .LBB8_352
+; RV32-NEXT: j .LBB8_87
+; RV32-NEXT: .LBB8_352: # %cond.store229
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: lui a5, 5
+; RV32-NEXT: addi a5, a5, -1664
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a5)
+; RV32-NEXT: lbu a4, 1778(a3)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: slli a4, a2, 17
+; RV32-NEXT: bltz a4, .LBB8_353
+; RV32-NEXT: j .LBB8_88
+; RV32-NEXT: .LBB8_353: # %cond.store232
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: lui a5, 5
+; RV32-NEXT: addi a5, a5, -1792
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a5)
+; RV32-NEXT: lbu a4, 1651(a3)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: slli a4, a2, 16
+; RV32-NEXT: bltz a4, .LBB8_354
+; RV32-NEXT: j .LBB8_89
+; RV32-NEXT: .LBB8_354: # %cond.store235
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: lui a5, 5
+; RV32-NEXT: addi a5, a5, -1920
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a5)
+; RV32-NEXT: lbu a4, 1524(a3)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: slli a4, a2, 15
+; RV32-NEXT: bltz a4, .LBB8_355
+; RV32-NEXT: j .LBB8_90
+; RV32-NEXT: .LBB8_355: # %cond.store238
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: li a5, 9
+; RV32-NEXT: slli a5, a5, 11
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a5)
+; RV32-NEXT: lbu a4, 1397(a3)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: slli a4, a2, 14
+; RV32-NEXT: bltz a4, .LBB8_356
+; RV32-NEXT: j .LBB8_91
+; RV32-NEXT: .LBB8_356: # %cond.store241
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: lui a5, 4
+; RV32-NEXT: addi a5, a5, 1920
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a5)
+; RV32-NEXT: lbu a4, 1270(a3)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: slli a4, a2, 13
+; RV32-NEXT: bltz a4, .LBB8_357
+; RV32-NEXT: j .LBB8_92
+; RV32-NEXT: .LBB8_357: # %cond.store244
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: lui a5, 4
+; RV32-NEXT: addi a5, a5, 1792
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a5)
+; RV32-NEXT: lbu a4, 1143(a3)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: slli a4, a2, 12
+; RV32-NEXT: bltz a4, .LBB8_358
+; RV32-NEXT: j .LBB8_93
+; RV32-NEXT: .LBB8_358: # %cond.store247
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: lui a5, 4
+; RV32-NEXT: addi a5, a5, 1664
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a5)
+; RV32-NEXT: lbu a4, 1016(a3)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: slli a4, a2, 11
+; RV32-NEXT: bltz a4, .LBB8_359
+; RV32-NEXT: j .LBB8_94
+; RV32-NEXT: .LBB8_359: # %cond.store250
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: lui a5, 4
+; RV32-NEXT: addi a5, a5, 1536
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a5)
+; RV32-NEXT: lbu a4, 889(a3)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: slli a4, a2, 10
+; RV32-NEXT: bltz a4, .LBB8_360
+; RV32-NEXT: j .LBB8_95
+; RV32-NEXT: .LBB8_360: # %cond.store253
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: lui a5, 4
+; RV32-NEXT: addi a5, a5, 1408
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a5)
+; RV32-NEXT: lbu a4, 762(a3)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: slli a4, a2, 9
+; RV32-NEXT: bltz a4, .LBB8_361
+; RV32-NEXT: j .LBB8_96
+; RV32-NEXT: .LBB8_361: # %cond.store256
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: lui a5, 4
+; RV32-NEXT: addi a5, a5, 1280
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a5)
+; RV32-NEXT: lbu a4, 635(a3)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: slli a4, a2, 8
+; RV32-NEXT: bltz a4, .LBB8_362
+; RV32-NEXT: j .LBB8_97
+; RV32-NEXT: .LBB8_362: # %cond.store259
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: lui a5, 4
+; RV32-NEXT: addi a5, a5, 1152
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a5)
+; RV32-NEXT: lbu a4, 508(a3)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: slli a4, a2, 7
+; RV32-NEXT: bltz a4, .LBB8_363
+; RV32-NEXT: j .LBB8_98
+; RV32-NEXT: .LBB8_363: # %cond.store262
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: li a5, 17
+; RV32-NEXT: slli a5, a5, 10
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a5)
+; RV32-NEXT: lbu a4, 381(a3)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: slli a4, a2, 6
+; RV32-NEXT: bltz a4, .LBB8_364
+; RV32-NEXT: j .LBB8_99
+; RV32-NEXT: .LBB8_364: # %cond.store265
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: lui a5, 4
+; RV32-NEXT: addi a5, a5, 896
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a5)
+; RV32-NEXT: lbu a4, 254(a3)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: slli a4, a2, 5
+; RV32-NEXT: bltz a4, .LBB8_365
+; RV32-NEXT: j .LBB8_100
+; RV32-NEXT: .LBB8_365: # %cond.store268
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: lui a5, 4
+; RV32-NEXT: addi a5, a5, 768
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a5)
+; RV32-NEXT: lbu a4, 127(a3)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: slli a4, a2, 4
+; RV32-NEXT: bgez a4, .LBB8_518
+; RV32-NEXT: j .LBB8_101
+; RV32-NEXT: .LBB8_518: # %cond.store268
+; RV32-NEXT: j .LBB8_102
+; RV32-NEXT: .LBB8_366: # %cond.store283
+; RV32-NEXT: li a2, 128
+; RV32-NEXT: lui a5, 4
+; RV32-NEXT: addi a5, a5, 128
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a5)
+; RV32-NEXT: lbu a2, 1651(a4)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: andi a2, a3, 1
+; RV32-NEXT: bnez a2, .LBB8_367
+; RV32-NEXT: j .LBB8_110
+; RV32-NEXT: .LBB8_367: # %cond.store286
+; RV32-NEXT: li a2, 128
+; RV32-NEXT: lui a5, 4
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a5)
+; RV32-NEXT: lbu a2, 1524(a4)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: andi a2, a3, 2
+; RV32-NEXT: bnez a2, .LBB8_368
+; RV32-NEXT: j .LBB8_111
+; RV32-NEXT: .LBB8_368: # %cond.store289
+; RV32-NEXT: li a2, 128
+; RV32-NEXT: lui a5, 4
+; RV32-NEXT: addi a5, a5, -128
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a5)
+; RV32-NEXT: lbu a2, 1397(a4)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: andi a2, a3, 4
+; RV32-NEXT: bnez a2, .LBB8_369
+; RV32-NEXT: j .LBB8_112
+; RV32-NEXT: .LBB8_369: # %cond.store292
+; RV32-NEXT: li a2, 128
+; RV32-NEXT: lui a5, 4
+; RV32-NEXT: addi a5, a5, -256
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a5)
+; RV32-NEXT: lbu a2, 1270(a4)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: andi a2, a3, 8
+; RV32-NEXT: bnez a2, .LBB8_370
+; RV32-NEXT: j .LBB8_113
+; RV32-NEXT: .LBB8_370: # %cond.store295
+; RV32-NEXT: li a2, 128
+; RV32-NEXT: lui a5, 4
+; RV32-NEXT: addi a5, a5, -384
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a5)
+; RV32-NEXT: lbu a2, 1143(a4)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: andi a2, a3, 16
+; RV32-NEXT: bnez a2, .LBB8_371
+; RV32-NEXT: j .LBB8_114
+; RV32-NEXT: .LBB8_371: # %cond.store298
+; RV32-NEXT: li a2, 128
+; RV32-NEXT: li a5, 31
+; RV32-NEXT: slli a5, a5, 9
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a5)
+; RV32-NEXT: lbu a2, 1016(a4)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: andi a2, a3, 32
+; RV32-NEXT: bnez a2, .LBB8_372
+; RV32-NEXT: j .LBB8_115
+; RV32-NEXT: .LBB8_372: # %cond.store301
+; RV32-NEXT: li a2, 128
+; RV32-NEXT: lui a5, 4
+; RV32-NEXT: addi a5, a5, -640
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a5)
+; RV32-NEXT: lbu a2, 889(a4)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: andi a2, a3, 64
+; RV32-NEXT: bnez a2, .LBB8_373
+; RV32-NEXT: j .LBB8_116
+; RV32-NEXT: .LBB8_373: # %cond.store304
+; RV32-NEXT: li a2, 128
+; RV32-NEXT: lui a5, 4
+; RV32-NEXT: addi a5, a5, -768
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a5)
+; RV32-NEXT: lbu a2, 762(a4)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: andi a2, a3, 128
+; RV32-NEXT: bnez a2, .LBB8_374
+; RV32-NEXT: j .LBB8_117
+; RV32-NEXT: .LBB8_374: # %cond.store307
+; RV32-NEXT: li a2, 128
+; RV32-NEXT: lui a5, 4
+; RV32-NEXT: addi a5, a5, -896
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a5)
+; RV32-NEXT: lbu a2, 635(a4)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: andi a2, a3, 256
+; RV32-NEXT: bnez a2, .LBB8_375
+; RV32-NEXT: j .LBB8_118
+; RV32-NEXT: .LBB8_375: # %cond.store310
+; RV32-NEXT: li a2, 128
+; RV32-NEXT: li a5, 15
+; RV32-NEXT: slli a5, a5, 10
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a5)
+; RV32-NEXT: lbu a2, 508(a4)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: andi a2, a3, 512
+; RV32-NEXT: bnez a2, .LBB8_376
+; RV32-NEXT: j .LBB8_119
+; RV32-NEXT: .LBB8_376: # %cond.store313
+; RV32-NEXT: li a2, 128
+; RV32-NEXT: lui a5, 4
+; RV32-NEXT: addi a5, a5, -1152
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a5)
+; RV32-NEXT: lbu a2, 381(a4)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: andi a2, a3, 1024
+; RV32-NEXT: bnez a2, .LBB8_377
+; RV32-NEXT: j .LBB8_120
+; RV32-NEXT: .LBB8_377: # %cond.store316
+; RV32-NEXT: li a2, 128
+; RV32-NEXT: lui a5, 4
+; RV32-NEXT: addi a5, a5, -1280
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a5)
+; RV32-NEXT: lbu a2, 254(a4)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: slli a2, a3, 20
+; RV32-NEXT: bltz a2, .LBB8_378
+; RV32-NEXT: j .LBB8_121
+; RV32-NEXT: .LBB8_378: # %cond.store319
+; RV32-NEXT: li a2, 128
+; RV32-NEXT: lui a5, 4
+; RV32-NEXT: addi a5, a5, -1408
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a5)
+; RV32-NEXT: lbu a2, 127(a4)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: slli a2, a3, 19
+; RV32-NEXT: bgez a2, .LBB8_519
+; RV32-NEXT: j .LBB8_122
+; RV32-NEXT: .LBB8_519: # %cond.store319
+; RV32-NEXT: j .LBB8_123
+; RV32-NEXT: .LBB8_379: # %cond.store325
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: lui a5, 4
+; RV32-NEXT: addi a5, a5, -1664
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a5)
+; RV32-NEXT: lbu a4, 2032(a2)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: slli a4, a3, 17
+; RV32-NEXT: bltz a4, .LBB8_380
+; RV32-NEXT: j .LBB8_125
+; RV32-NEXT: .LBB8_380: # %cond.store328
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: lui a5, 4
+; RV32-NEXT: addi a5, a5, -1792
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a5)
+; RV32-NEXT: lbu a4, 1905(a2)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: slli a4, a3, 16
+; RV32-NEXT: bltz a4, .LBB8_381
+; RV32-NEXT: j .LBB8_126
+; RV32-NEXT: .LBB8_381: # %cond.store331
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: lui a5, 4
+; RV32-NEXT: addi a5, a5, -1920
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a5)
+; RV32-NEXT: lbu a4, 1778(a2)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: slli a4, a3, 15
+; RV32-NEXT: bltz a4, .LBB8_382
+; RV32-NEXT: j .LBB8_127
+; RV32-NEXT: .LBB8_382: # %cond.store334
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: li a5, 7
+; RV32-NEXT: slli a5, a5, 11
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a5)
+; RV32-NEXT: lbu a4, 1651(a2)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: slli a4, a3, 14
+; RV32-NEXT: bltz a4, .LBB8_383
+; RV32-NEXT: j .LBB8_128
+; RV32-NEXT: .LBB8_383: # %cond.store337
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: lui a5, 3
+; RV32-NEXT: addi a5, a5, 1920
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a5)
+; RV32-NEXT: lbu a4, 1524(a2)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: slli a4, a3, 13
+; RV32-NEXT: bltz a4, .LBB8_384
+; RV32-NEXT: j .LBB8_129
+; RV32-NEXT: .LBB8_384: # %cond.store340
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: lui a5, 3
+; RV32-NEXT: addi a5, a5, 1792
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a5)
+; RV32-NEXT: lbu a4, 1397(a2)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: slli a4, a3, 12
+; RV32-NEXT: bltz a4, .LBB8_385
+; RV32-NEXT: j .LBB8_130
+; RV32-NEXT: .LBB8_385: # %cond.store343
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: lui a5, 3
+; RV32-NEXT: addi a5, a5, 1664
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a5)
+; RV32-NEXT: lbu a4, 1270(a2)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: slli a4, a3, 11
+; RV32-NEXT: bltz a4, .LBB8_386
+; RV32-NEXT: j .LBB8_131
+; RV32-NEXT: .LBB8_386: # %cond.store346
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: li a5, 27
+; RV32-NEXT: slli a5, a5, 9
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a5)
+; RV32-NEXT: lbu a4, 1143(a2)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: slli a4, a3, 10
+; RV32-NEXT: bltz a4, .LBB8_387
+; RV32-NEXT: j .LBB8_132
+; RV32-NEXT: .LBB8_387: # %cond.store349
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: lui a5, 3
+; RV32-NEXT: addi a5, a5, 1408
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a5)
+; RV32-NEXT: lbu a4, 1016(a2)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: slli a4, a3, 9
+; RV32-NEXT: bltz a4, .LBB8_388
+; RV32-NEXT: j .LBB8_133
+; RV32-NEXT: .LBB8_388: # %cond.store352
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: lui a5, 3
+; RV32-NEXT: addi a5, a5, 1280
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a5)
+; RV32-NEXT: lbu a4, 889(a2)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: slli a4, a3, 8
+; RV32-NEXT: bltz a4, .LBB8_389
+; RV32-NEXT: j .LBB8_134
+; RV32-NEXT: .LBB8_389: # %cond.store355
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: lui a5, 3
+; RV32-NEXT: addi a5, a5, 1152
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a5)
+; RV32-NEXT: lbu a4, 762(a2)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: slli a4, a3, 7
+; RV32-NEXT: bltz a4, .LBB8_390
+; RV32-NEXT: j .LBB8_135
+; RV32-NEXT: .LBB8_390: # %cond.store358
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: li a5, 13
+; RV32-NEXT: slli a5, a5, 10
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a5)
+; RV32-NEXT: lbu a4, 635(a2)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: slli a4, a3, 6
+; RV32-NEXT: bltz a4, .LBB8_391
+; RV32-NEXT: j .LBB8_136
+; RV32-NEXT: .LBB8_391: # %cond.store361
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: lui a5, 3
+; RV32-NEXT: addi a5, a5, 896
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a5)
+; RV32-NEXT: lbu a4, 508(a2)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: slli a4, a3, 5
+; RV32-NEXT: bltz a4, .LBB8_392
+; RV32-NEXT: j .LBB8_137
+; RV32-NEXT: .LBB8_392: # %cond.store364
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: lui a5, 3
+; RV32-NEXT: addi a5, a5, 768
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a5)
+; RV32-NEXT: lbu a4, 381(a2)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: slli a4, a3, 4
+; RV32-NEXT: bltz a4, .LBB8_393
+; RV32-NEXT: j .LBB8_138
+; RV32-NEXT: .LBB8_393: # %cond.store367
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: lui a5, 3
+; RV32-NEXT: addi a5, a5, 640
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a5)
+; RV32-NEXT: lbu a4, 254(a2)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: slli a4, a3, 3
+; RV32-NEXT: bltz a4, .LBB8_394
+; RV32-NEXT: j .LBB8_139
+; RV32-NEXT: .LBB8_394: # %cond.store370
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: li a5, 25
+; RV32-NEXT: slli a5, a5, 9
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a5)
+; RV32-NEXT: lbu a4, 127(a2)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: slli a4, a3, 2
+; RV32-NEXT: bgez a4, .LBB8_520
+; RV32-NEXT: j .LBB8_140
+; RV32-NEXT: .LBB8_520: # %cond.store370
+; RV32-NEXT: j .LBB8_141
+; RV32-NEXT: .LBB8_395: # %cond.store379
+; RV32-NEXT: li a3, 128
+; RV32-NEXT: lui a5, 3
+; RV32-NEXT: addi a5, a5, 128
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a5)
+; RV32-NEXT: lbu a3, 1874(a2)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: andi a3, a4, 1
+; RV32-NEXT: bnez a3, .LBB8_396
+; RV32-NEXT: j .LBB8_145
+; RV32-NEXT: .LBB8_396: # %cond.store382
+; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV32-NEXT: addi a3, a0, 1
+; RV32-NEXT: vse8.v v24, (a0)
+; RV32-NEXT: mv a0, a3
+; RV32-NEXT: andi a3, a4, 2
+; RV32-NEXT: bnez a3, .LBB8_397
+; RV32-NEXT: j .LBB8_146
+; RV32-NEXT: .LBB8_397: # %cond.store385
+; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v9, v24, 1
+; RV32-NEXT: addi a3, a0, 1
+; RV32-NEXT: vse8.v v9, (a0)
+; RV32-NEXT: mv a0, a3
+; RV32-NEXT: andi a3, a4, 4
+; RV32-NEXT: bnez a3, .LBB8_398
+; RV32-NEXT: j .LBB8_147
+; RV32-NEXT: .LBB8_398: # %cond.store388
+; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v9, v24, 2
+; RV32-NEXT: addi a3, a0, 1
+; RV32-NEXT: vse8.v v9, (a0)
+; RV32-NEXT: mv a0, a3
+; RV32-NEXT: andi a3, a4, 8
+; RV32-NEXT: bnez a3, .LBB8_399
+; RV32-NEXT: j .LBB8_148
+; RV32-NEXT: .LBB8_399: # %cond.store391
+; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v9, v24, 3
+; RV32-NEXT: addi a3, a0, 1
+; RV32-NEXT: vse8.v v9, (a0)
+; RV32-NEXT: mv a0, a3
+; RV32-NEXT: andi a3, a4, 16
+; RV32-NEXT: bnez a3, .LBB8_400
+; RV32-NEXT: j .LBB8_149
+; RV32-NEXT: .LBB8_400: # %cond.store394
+; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v9, v24, 4
+; RV32-NEXT: addi a3, a0, 1
+; RV32-NEXT: vse8.v v9, (a0)
+; RV32-NEXT: mv a0, a3
+; RV32-NEXT: andi a3, a4, 32
+; RV32-NEXT: bnez a3, .LBB8_401
+; RV32-NEXT: j .LBB8_150
+; RV32-NEXT: .LBB8_401: # %cond.store397
+; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v9, v24, 5
+; RV32-NEXT: addi a3, a0, 1
+; RV32-NEXT: vse8.v v9, (a0)
+; RV32-NEXT: mv a0, a3
+; RV32-NEXT: andi a3, a4, 64
+; RV32-NEXT: bnez a3, .LBB8_402
+; RV32-NEXT: j .LBB8_151
+; RV32-NEXT: .LBB8_402: # %cond.store400
+; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v9, v24, 6
+; RV32-NEXT: addi a3, a0, 1
+; RV32-NEXT: vse8.v v9, (a0)
+; RV32-NEXT: mv a0, a3
+; RV32-NEXT: andi a3, a4, 128
+; RV32-NEXT: bnez a3, .LBB8_403
+; RV32-NEXT: j .LBB8_152
+; RV32-NEXT: .LBB8_403: # %cond.store403
+; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v9, v24, 7
+; RV32-NEXT: addi a3, a0, 1
+; RV32-NEXT: vse8.v v9, (a0)
+; RV32-NEXT: mv a0, a3
+; RV32-NEXT: andi a3, a4, 256
+; RV32-NEXT: bnez a3, .LBB8_404
+; RV32-NEXT: j .LBB8_153
+; RV32-NEXT: .LBB8_404: # %cond.store406
+; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v9, v24, 8
+; RV32-NEXT: addi a3, a0, 1
+; RV32-NEXT: vse8.v v9, (a0)
+; RV32-NEXT: mv a0, a3
+; RV32-NEXT: andi a3, a4, 512
+; RV32-NEXT: bnez a3, .LBB8_405
+; RV32-NEXT: j .LBB8_154
+; RV32-NEXT: .LBB8_405: # %cond.store409
+; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v9, v24, 9
+; RV32-NEXT: addi a3, a0, 1
+; RV32-NEXT: vse8.v v9, (a0)
+; RV32-NEXT: mv a0, a3
+; RV32-NEXT: andi a3, a4, 1024
+; RV32-NEXT: bnez a3, .LBB8_406
+; RV32-NEXT: j .LBB8_155
+; RV32-NEXT: .LBB8_406: # %cond.store412
+; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v9, v24, 10
+; RV32-NEXT: addi a3, a0, 1
+; RV32-NEXT: vse8.v v9, (a0)
+; RV32-NEXT: mv a0, a3
+; RV32-NEXT: slli a3, a4, 20
+; RV32-NEXT: bltz a3, .LBB8_407
+; RV32-NEXT: j .LBB8_156
+; RV32-NEXT: .LBB8_407: # %cond.store415
+; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v9, v24, 11
+; RV32-NEXT: addi a3, a0, 1
+; RV32-NEXT: vse8.v v9, (a0)
+; RV32-NEXT: mv a0, a3
+; RV32-NEXT: slli a3, a4, 19
+; RV32-NEXT: bltz a3, .LBB8_408
+; RV32-NEXT: j .LBB8_157
+; RV32-NEXT: .LBB8_408: # %cond.store418
+; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v9, v24, 12
+; RV32-NEXT: addi a3, a0, 1
+; RV32-NEXT: vse8.v v9, (a0)
+; RV32-NEXT: mv a0, a3
+; RV32-NEXT: slli a3, a4, 18
+; RV32-NEXT: bltz a3, .LBB8_409
+; RV32-NEXT: j .LBB8_158
+; RV32-NEXT: .LBB8_409: # %cond.store421
+; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v9, v24, 13
+; RV32-NEXT: addi a3, a0, 1
+; RV32-NEXT: vse8.v v9, (a0)
+; RV32-NEXT: mv a0, a3
+; RV32-NEXT: slli a3, a4, 17
+; RV32-NEXT: bltz a3, .LBB8_410
+; RV32-NEXT: j .LBB8_159
+; RV32-NEXT: .LBB8_410: # %cond.store424
+; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v9, v24, 14
+; RV32-NEXT: addi a3, a0, 1
+; RV32-NEXT: vse8.v v9, (a0)
+; RV32-NEXT: mv a0, a3
+; RV32-NEXT: slli a3, a4, 16
+; RV32-NEXT: bltz a3, .LBB8_411
+; RV32-NEXT: j .LBB8_160
+; RV32-NEXT: .LBB8_411: # %cond.store427
+; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v9, v24, 15
+; RV32-NEXT: addi a3, a0, 1
+; RV32-NEXT: vse8.v v9, (a0)
+; RV32-NEXT: mv a0, a3
+; RV32-NEXT: slli a3, a4, 15
+; RV32-NEXT: bltz a3, .LBB8_412
+; RV32-NEXT: j .LBB8_161
+; RV32-NEXT: .LBB8_412: # %cond.store430
+; RV32-NEXT: vsetivli zero, 1, e8, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v10, v24, 16
+; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV32-NEXT: addi a3, a0, 1
+; RV32-NEXT: vse8.v v10, (a0)
+; RV32-NEXT: mv a0, a3
+; RV32-NEXT: slli a3, a4, 14
+; RV32-NEXT: bltz a3, .LBB8_413
+; RV32-NEXT: j .LBB8_162
+; RV32-NEXT: .LBB8_413: # %cond.store433
+; RV32-NEXT: vsetivli zero, 1, e8, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v10, v24, 17
+; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV32-NEXT: addi a3, a0, 1
+; RV32-NEXT: vse8.v v10, (a0)
+; RV32-NEXT: mv a0, a3
+; RV32-NEXT: slli a3, a4, 13
+; RV32-NEXT: bltz a3, .LBB8_414
+; RV32-NEXT: j .LBB8_163
+; RV32-NEXT: .LBB8_414: # %cond.store436
+; RV32-NEXT: vsetivli zero, 1, e8, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v10, v24, 18
+; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV32-NEXT: addi a3, a0, 1
+; RV32-NEXT: vse8.v v10, (a0)
+; RV32-NEXT: mv a0, a3
+; RV32-NEXT: slli a3, a4, 12
+; RV32-NEXT: bltz a3, .LBB8_415
+; RV32-NEXT: j .LBB8_164
+; RV32-NEXT: .LBB8_415: # %cond.store439
+; RV32-NEXT: vsetivli zero, 1, e8, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v10, v24, 19
+; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV32-NEXT: addi a3, a0, 1
+; RV32-NEXT: vse8.v v10, (a0)
+; RV32-NEXT: mv a0, a3
+; RV32-NEXT: slli a3, a4, 11
+; RV32-NEXT: bltz a3, .LBB8_416
+; RV32-NEXT: j .LBB8_165
+; RV32-NEXT: .LBB8_416: # %cond.store442
+; RV32-NEXT: vsetivli zero, 1, e8, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v10, v24, 20
+; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV32-NEXT: addi a3, a0, 1
+; RV32-NEXT: vse8.v v10, (a0)
+; RV32-NEXT: mv a0, a3
+; RV32-NEXT: slli a3, a4, 10
+; RV32-NEXT: bltz a3, .LBB8_417
+; RV32-NEXT: j .LBB8_166
+; RV32-NEXT: .LBB8_417: # %cond.store445
+; RV32-NEXT: vsetivli zero, 1, e8, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v10, v24, 21
+; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV32-NEXT: addi a3, a0, 1
+; RV32-NEXT: vse8.v v10, (a0)
+; RV32-NEXT: mv a0, a3
+; RV32-NEXT: slli a3, a4, 9
+; RV32-NEXT: bltz a3, .LBB8_418
+; RV32-NEXT: j .LBB8_167
+; RV32-NEXT: .LBB8_418: # %cond.store448
+; RV32-NEXT: vsetivli zero, 1, e8, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v10, v24, 22
+; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV32-NEXT: addi a3, a0, 1
+; RV32-NEXT: vse8.v v10, (a0)
+; RV32-NEXT: mv a0, a3
+; RV32-NEXT: slli a3, a4, 8
+; RV32-NEXT: bltz a3, .LBB8_419
+; RV32-NEXT: j .LBB8_168
+; RV32-NEXT: .LBB8_419: # %cond.store451
+; RV32-NEXT: vsetivli zero, 1, e8, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v10, v24, 23
+; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV32-NEXT: addi a3, a0, 1
+; RV32-NEXT: vse8.v v10, (a0)
+; RV32-NEXT: mv a0, a3
+; RV32-NEXT: slli a3, a4, 7
+; RV32-NEXT: bltz a3, .LBB8_420
+; RV32-NEXT: j .LBB8_169
+; RV32-NEXT: .LBB8_420: # %cond.store454
+; RV32-NEXT: vsetivli zero, 1, e8, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v10, v24, 24
+; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV32-NEXT: addi a3, a0, 1
+; RV32-NEXT: vse8.v v10, (a0)
+; RV32-NEXT: mv a0, a3
+; RV32-NEXT: slli a3, a4, 6
+; RV32-NEXT: bltz a3, .LBB8_421
+; RV32-NEXT: j .LBB8_170
+; RV32-NEXT: .LBB8_421: # %cond.store457
+; RV32-NEXT: vsetivli zero, 1, e8, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v10, v24, 25
+; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV32-NEXT: addi a3, a0, 1
+; RV32-NEXT: vse8.v v10, (a0)
+; RV32-NEXT: mv a0, a3
+; RV32-NEXT: slli a3, a4, 5
+; RV32-NEXT: bltz a3, .LBB8_422
+; RV32-NEXT: j .LBB8_171
+; RV32-NEXT: .LBB8_422: # %cond.store460
+; RV32-NEXT: vsetivli zero, 1, e8, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v10, v24, 26
+; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV32-NEXT: addi a3, a0, 1
+; RV32-NEXT: vse8.v v10, (a0)
+; RV32-NEXT: mv a0, a3
+; RV32-NEXT: slli a3, a4, 4
+; RV32-NEXT: bltz a3, .LBB8_423
+; RV32-NEXT: j .LBB8_172
+; RV32-NEXT: .LBB8_423: # %cond.store463
+; RV32-NEXT: vsetivli zero, 1, e8, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v10, v24, 27
+; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV32-NEXT: addi a3, a0, 1
+; RV32-NEXT: vse8.v v10, (a0)
+; RV32-NEXT: mv a0, a3
+; RV32-NEXT: slli a3, a4, 3
+; RV32-NEXT: bltz a3, .LBB8_424
+; RV32-NEXT: j .LBB8_173
+; RV32-NEXT: .LBB8_424: # %cond.store466
+; RV32-NEXT: vsetivli zero, 1, e8, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v10, v24, 28
+; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV32-NEXT: addi a3, a0, 1
+; RV32-NEXT: vse8.v v10, (a0)
+; RV32-NEXT: mv a0, a3
+; RV32-NEXT: slli a3, a4, 2
+; RV32-NEXT: bgez a3, .LBB8_521
+; RV32-NEXT: j .LBB8_174
+; RV32-NEXT: .LBB8_521: # %cond.store466
+; RV32-NEXT: j .LBB8_175
+; RV32-NEXT: .LBB8_425: # %cond.store475
+; RV32-NEXT: vsetivli zero, 1, e8, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v10, v24, 31
+; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV32-NEXT: addi a4, a0, 1
+; RV32-NEXT: vse8.v v10, (a0)
+; RV32-NEXT: mv a0, a4
+; RV32-NEXT: andi a4, a3, 1
+; RV32-NEXT: bnez a4, .LBB8_426
+; RV32-NEXT: j .LBB8_179
+; RV32-NEXT: .LBB8_426: # %cond.store478
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: lui a5, 3
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v24, (a5)
+; RV32-NEXT: lbu a4, 1651(a2)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: andi a4, a3, 2
+; RV32-NEXT: bnez a4, .LBB8_427
+; RV32-NEXT: j .LBB8_180
+; RV32-NEXT: .LBB8_427: # %cond.store481
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: lui a5, 3
+; RV32-NEXT: addi a5, a5, -128
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v24, (a5)
+; RV32-NEXT: lbu a4, 1524(a2)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: andi a4, a3, 4
+; RV32-NEXT: bnez a4, .LBB8_428
+; RV32-NEXT: j .LBB8_181
+; RV32-NEXT: .LBB8_428: # %cond.store484
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: lui a5, 3
+; RV32-NEXT: addi a5, a5, -256
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v24, (a5)
+; RV32-NEXT: lbu a4, 1397(a2)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: andi a4, a3, 8
+; RV32-NEXT: bnez a4, .LBB8_429
+; RV32-NEXT: j .LBB8_182
+; RV32-NEXT: .LBB8_429: # %cond.store487
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: lui a5, 3
+; RV32-NEXT: addi a5, a5, -384
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v24, (a5)
+; RV32-NEXT: lbu a4, 1270(a2)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: andi a4, a3, 16
+; RV32-NEXT: bnez a4, .LBB8_430
+; RV32-NEXT: j .LBB8_183
+; RV32-NEXT: .LBB8_430: # %cond.store490
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: li a5, 23
+; RV32-NEXT: slli a5, a5, 9
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v24, (a5)
+; RV32-NEXT: lbu a4, 1143(a2)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: andi a4, a3, 32
+; RV32-NEXT: bnez a4, .LBB8_431
+; RV32-NEXT: j .LBB8_184
+; RV32-NEXT: .LBB8_431: # %cond.store493
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: lui a5, 3
+; RV32-NEXT: addi a5, a5, -640
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v24, (a5)
+; RV32-NEXT: lbu a4, 1016(a2)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: andi a4, a3, 64
+; RV32-NEXT: bnez a4, .LBB8_432
+; RV32-NEXT: j .LBB8_185
+; RV32-NEXT: .LBB8_432: # %cond.store496
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: lui a5, 3
+; RV32-NEXT: addi a5, a5, -768
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v24, (a5)
+; RV32-NEXT: lbu a4, 889(a2)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: andi a4, a3, 128
+; RV32-NEXT: bnez a4, .LBB8_433
+; RV32-NEXT: j .LBB8_186
+; RV32-NEXT: .LBB8_433: # %cond.store499
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: lui a5, 3
+; RV32-NEXT: addi a5, a5, -896
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v24, (a5)
+; RV32-NEXT: lbu a4, 762(a2)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: andi a4, a3, 256
+; RV32-NEXT: bnez a4, .LBB8_434
+; RV32-NEXT: j .LBB8_187
+; RV32-NEXT: .LBB8_434: # %cond.store502
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: li a5, 11
+; RV32-NEXT: slli a5, a5, 10
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v24, (a5)
+; RV32-NEXT: lbu a4, 635(a2)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: andi a4, a3, 512
+; RV32-NEXT: bnez a4, .LBB8_435
+; RV32-NEXT: j .LBB8_188
+; RV32-NEXT: .LBB8_435: # %cond.store505
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: lui a5, 3
+; RV32-NEXT: addi a5, a5, -1152
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v24, (a5)
+; RV32-NEXT: lbu a4, 508(a2)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: andi a4, a3, 1024
+; RV32-NEXT: bnez a4, .LBB8_436
+; RV32-NEXT: j .LBB8_189
+; RV32-NEXT: .LBB8_436: # %cond.store508
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: lui a5, 3
+; RV32-NEXT: addi a5, a5, -1280
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v24, (a5)
+; RV32-NEXT: lbu a4, 381(a2)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: slli a4, a3, 20
+; RV32-NEXT: bltz a4, .LBB8_437
+; RV32-NEXT: j .LBB8_190
+; RV32-NEXT: .LBB8_437: # %cond.store511
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: lui a5, 3
+; RV32-NEXT: addi a5, a5, -1408
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v24, (a5)
+; RV32-NEXT: lbu a4, 254(a2)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: slli a4, a3, 19
+; RV32-NEXT: bltz a4, .LBB8_438
+; RV32-NEXT: j .LBB8_191
+; RV32-NEXT: .LBB8_438: # %cond.store514
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: li a5, 21
+; RV32-NEXT: slli a5, a5, 9
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v24, (a5)
+; RV32-NEXT: lbu a4, 127(a2)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: slli a4, a3, 18
+; RV32-NEXT: bgez a4, .LBB8_522
+; RV32-NEXT: j .LBB8_192
+; RV32-NEXT: .LBB8_522: # %cond.store514
+; RV32-NEXT: j .LBB8_193
+; RV32-NEXT: .LBB8_439: # %cond.store520
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: lui a5, 3
+; RV32-NEXT: addi a5, a5, -1792
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v24, (a5)
+; RV32-NEXT: lbu a4, 2032(a2)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: slli a4, a3, 16
+; RV32-NEXT: bltz a4, .LBB8_440
+; RV32-NEXT: j .LBB8_195
+; RV32-NEXT: .LBB8_440: # %cond.store523
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: lui a5, 3
+; RV32-NEXT: addi a5, a5, -1920
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v24, (a5)
+; RV32-NEXT: lbu a4, 1905(a2)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: slli a4, a3, 15
+; RV32-NEXT: bltz a4, .LBB8_441
+; RV32-NEXT: j .LBB8_196
+; RV32-NEXT: .LBB8_441: # %cond.store526
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: li a5, 5
+; RV32-NEXT: slli a5, a5, 11
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v24, (a5)
+; RV32-NEXT: lbu a4, 1778(a2)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: slli a4, a3, 14
+; RV32-NEXT: bltz a4, .LBB8_442
+; RV32-NEXT: j .LBB8_197
+; RV32-NEXT: .LBB8_442: # %cond.store529
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: lui a5, 2
+; RV32-NEXT: addi a5, a5, 1920
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v24, (a5)
+; RV32-NEXT: lbu a4, 1651(a2)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: slli a4, a3, 13
+; RV32-NEXT: bltz a4, .LBB8_443
+; RV32-NEXT: j .LBB8_198
+; RV32-NEXT: .LBB8_443: # %cond.store532
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: lui a5, 2
+; RV32-NEXT: addi a5, a5, 1792
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v24, (a5)
+; RV32-NEXT: lbu a4, 1524(a2)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: slli a4, a3, 12
+; RV32-NEXT: bltz a4, .LBB8_444
+; RV32-NEXT: j .LBB8_199
+; RV32-NEXT: .LBB8_444: # %cond.store535
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: lui a5, 2
+; RV32-NEXT: addi a5, a5, 1664
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v24, (a5)
+; RV32-NEXT: lbu a4, 1397(a2)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: slli a4, a3, 11
+; RV32-NEXT: bltz a4, .LBB8_445
+; RV32-NEXT: j .LBB8_200
+; RV32-NEXT: .LBB8_445: # %cond.store538
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: li a5, 19
+; RV32-NEXT: slli a5, a5, 9
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v24, (a5)
+; RV32-NEXT: lbu a4, 1270(a2)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: slli a4, a3, 10
+; RV32-NEXT: bltz a4, .LBB8_446
+; RV32-NEXT: j .LBB8_201
+; RV32-NEXT: .LBB8_446: # %cond.store541
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: lui a5, 2
+; RV32-NEXT: addi a5, a5, 1408
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v24, (a5)
+; RV32-NEXT: lbu a4, 1143(a2)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: slli a4, a3, 9
+; RV32-NEXT: bltz a4, .LBB8_447
+; RV32-NEXT: j .LBB8_202
+; RV32-NEXT: .LBB8_447: # %cond.store544
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: lui a5, 2
+; RV32-NEXT: addi a5, a5, 1280
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v24, (a5)
+; RV32-NEXT: lbu a4, 1016(a2)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: slli a4, a3, 8
+; RV32-NEXT: bltz a4, .LBB8_448
+; RV32-NEXT: j .LBB8_203
+; RV32-NEXT: .LBB8_448: # %cond.store547
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: lui a5, 2
+; RV32-NEXT: addi a5, a5, 1152
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v24, (a5)
+; RV32-NEXT: lbu a4, 889(a2)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: slli a4, a3, 7
+; RV32-NEXT: bltz a4, .LBB8_449
+; RV32-NEXT: j .LBB8_204
+; RV32-NEXT: .LBB8_449: # %cond.store550
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: li a5, 9
+; RV32-NEXT: slli a5, a5, 10
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v24, (a5)
+; RV32-NEXT: lbu a4, 762(a2)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: slli a4, a3, 6
+; RV32-NEXT: bltz a4, .LBB8_450
+; RV32-NEXT: j .LBB8_205
+; RV32-NEXT: .LBB8_450: # %cond.store553
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: lui a5, 2
+; RV32-NEXT: addi a5, a5, 896
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v24, (a5)
+; RV32-NEXT: lbu a4, 635(a2)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: slli a4, a3, 5
+; RV32-NEXT: bltz a4, .LBB8_451
+; RV32-NEXT: j .LBB8_206
+; RV32-NEXT: .LBB8_451: # %cond.store556
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: lui a5, 2
+; RV32-NEXT: addi a5, a5, 768
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v24, (a5)
+; RV32-NEXT: lbu a4, 508(a2)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: slli a4, a3, 4
+; RV32-NEXT: bltz a4, .LBB8_452
+; RV32-NEXT: j .LBB8_207
+; RV32-NEXT: .LBB8_452: # %cond.store559
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: lui a5, 2
+; RV32-NEXT: addi a5, a5, 640
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v24, (a5)
+; RV32-NEXT: lbu a4, 381(a2)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: slli a4, a3, 3
+; RV32-NEXT: bltz a4, .LBB8_453
+; RV32-NEXT: j .LBB8_208
+; RV32-NEXT: .LBB8_453: # %cond.store562
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: li a5, 17
+; RV32-NEXT: slli a5, a5, 9
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v24, (a5)
+; RV32-NEXT: lbu a4, 254(a2)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: slli a4, a3, 2
+; RV32-NEXT: bgez a4, .LBB8_523
+; RV32-NEXT: j .LBB8_209
+; RV32-NEXT: .LBB8_523: # %cond.store562
+; RV32-NEXT: j .LBB8_210
+; RV32-NEXT: .LBB8_454: # %cond.store571
+; RV32-NEXT: li a3, 128
+; RV32-NEXT: lui a5, 2
+; RV32-NEXT: addi a5, a5, 128
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v24, (a5)
+; RV32-NEXT: lbu a3, 2032(a4)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: andi a3, a2, 1
+; RV32-NEXT: bnez a3, .LBB8_455
+; RV32-NEXT: j .LBB8_214
+; RV32-NEXT: .LBB8_455: # %cond.store574
+; RV32-NEXT: li a3, 128
+; RV32-NEXT: lui a5, 2
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v24, (a5)
+; RV32-NEXT: lbu a3, 1905(a4)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: andi a3, a2, 2
+; RV32-NEXT: bnez a3, .LBB8_456
+; RV32-NEXT: j .LBB8_215
+; RV32-NEXT: .LBB8_456: # %cond.store577
+; RV32-NEXT: li a3, 128
+; RV32-NEXT: lui a5, 2
+; RV32-NEXT: addi a5, a5, -128
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v24, (a5)
+; RV32-NEXT: lbu a3, 1778(a4)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: andi a3, a2, 4
+; RV32-NEXT: bnez a3, .LBB8_457
+; RV32-NEXT: j .LBB8_216
+; RV32-NEXT: .LBB8_457: # %cond.store580
+; RV32-NEXT: li a3, 128
+; RV32-NEXT: li a5, 31
+; RV32-NEXT: slli a5, a5, 8
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v24, (a5)
+; RV32-NEXT: lbu a3, 1651(a4)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: andi a3, a2, 8
+; RV32-NEXT: bnez a3, .LBB8_458
+; RV32-NEXT: j .LBB8_217
+; RV32-NEXT: .LBB8_458: # %cond.store583
+; RV32-NEXT: li a3, 128
+; RV32-NEXT: lui a5, 2
+; RV32-NEXT: addi a5, a5, -384
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v24, (a5)
+; RV32-NEXT: lbu a3, 1524(a4)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: andi a3, a2, 16
+; RV32-NEXT: bnez a3, .LBB8_459
+; RV32-NEXT: j .LBB8_218
+; RV32-NEXT: .LBB8_459: # %cond.store586
+; RV32-NEXT: li a3, 128
+; RV32-NEXT: li a5, 15
+; RV32-NEXT: slli a5, a5, 9
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v24, (a5)
+; RV32-NEXT: lbu a3, 1397(a4)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: andi a3, a2, 32
+; RV32-NEXT: bnez a3, .LBB8_460
+; RV32-NEXT: j .LBB8_219
+; RV32-NEXT: .LBB8_460: # %cond.store589
+; RV32-NEXT: li a3, 128
+; RV32-NEXT: lui a5, 2
+; RV32-NEXT: addi a5, a5, -640
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v24, (a5)
+; RV32-NEXT: lbu a3, 1270(a4)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: andi a3, a2, 64
+; RV32-NEXT: bnez a3, .LBB8_461
+; RV32-NEXT: j .LBB8_220
+; RV32-NEXT: .LBB8_461: # %cond.store592
+; RV32-NEXT: li a3, 128
+; RV32-NEXT: li a5, 29
+; RV32-NEXT: slli a5, a5, 8
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v24, (a5)
+; RV32-NEXT: lbu a3, 1143(a4)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: andi a3, a2, 128
+; RV32-NEXT: bnez a3, .LBB8_462
+; RV32-NEXT: j .LBB8_221
+; RV32-NEXT: .LBB8_462: # %cond.store595
+; RV32-NEXT: li a3, 128
+; RV32-NEXT: lui a5, 2
+; RV32-NEXT: addi a5, a5, -896
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v24, (a5)
+; RV32-NEXT: lbu a3, 1016(a4)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: andi a3, a2, 256
+; RV32-NEXT: bnez a3, .LBB8_463
+; RV32-NEXT: j .LBB8_222
+; RV32-NEXT: .LBB8_463: # %cond.store598
+; RV32-NEXT: li a3, 128
+; RV32-NEXT: li a5, 7
+; RV32-NEXT: slli a5, a5, 10
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v24, (a5)
+; RV32-NEXT: lbu a3, 889(a4)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: andi a3, a2, 512
+; RV32-NEXT: bnez a3, .LBB8_464
+; RV32-NEXT: j .LBB8_223
+; RV32-NEXT: .LBB8_464: # %cond.store601
+; RV32-NEXT: li a3, 128
+; RV32-NEXT: lui a5, 2
+; RV32-NEXT: addi a5, a5, -1152
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v24, (a5)
+; RV32-NEXT: lbu a3, 762(a4)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: andi a3, a2, 1024
+; RV32-NEXT: bnez a3, .LBB8_465
+; RV32-NEXT: j .LBB8_224
+; RV32-NEXT: .LBB8_465: # %cond.store604
+; RV32-NEXT: li a3, 128
+; RV32-NEXT: li a5, 27
+; RV32-NEXT: slli a5, a5, 8
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v24, (a5)
+; RV32-NEXT: lbu a3, 635(a4)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: slli a3, a2, 20
+; RV32-NEXT: bltz a3, .LBB8_466
+; RV32-NEXT: j .LBB8_225
+; RV32-NEXT: .LBB8_466: # %cond.store607
+; RV32-NEXT: li a3, 128
+; RV32-NEXT: lui a5, 2
+; RV32-NEXT: addi a5, a5, -1408
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v24, (a5)
+; RV32-NEXT: lbu a3, 508(a4)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: slli a3, a2, 19
+; RV32-NEXT: bltz a3, .LBB8_467
+; RV32-NEXT: j .LBB8_226
+; RV32-NEXT: .LBB8_467: # %cond.store610
+; RV32-NEXT: li a3, 128
+; RV32-NEXT: li a5, 13
+; RV32-NEXT: slli a5, a5, 9
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v24, (a5)
+; RV32-NEXT: lbu a3, 381(a4)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: slli a3, a2, 18
+; RV32-NEXT: bltz a3, .LBB8_468
+; RV32-NEXT: j .LBB8_227
+; RV32-NEXT: .LBB8_468: # %cond.store613
+; RV32-NEXT: li a3, 128
+; RV32-NEXT: lui a5, 2
+; RV32-NEXT: addi a5, a5, -1664
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v24, (a5)
+; RV32-NEXT: lbu a3, 254(a4)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: slli a3, a2, 17
+; RV32-NEXT: bltz a3, .LBB8_469
+; RV32-NEXT: j .LBB8_228
+; RV32-NEXT: .LBB8_469: # %cond.store616
+; RV32-NEXT: li a3, 128
+; RV32-NEXT: li a5, 25
+; RV32-NEXT: slli a5, a5, 8
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v24, (a5)
+; RV32-NEXT: lbu a3, 127(a4)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: slli a3, a2, 16
+; RV32-NEXT: bgez a3, .LBB8_524
+; RV32-NEXT: j .LBB8_229
+; RV32-NEXT: .LBB8_524: # %cond.store616
+; RV32-NEXT: j .LBB8_230
+; RV32-NEXT: .LBB8_470: # %cond.store622
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: li a5, 3
+; RV32-NEXT: slli a5, a5, 11
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v24, (a5)
+; RV32-NEXT: lbu a4, 2032(a3)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: slli a4, a2, 14
+; RV32-NEXT: bltz a4, .LBB8_471
+; RV32-NEXT: j .LBB8_232
+; RV32-NEXT: .LBB8_471: # %cond.store625
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: lui a5, 1
+; RV32-NEXT: addi a5, a5, 1920
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v24, (a5)
+; RV32-NEXT: lbu a4, 1905(a3)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: slli a4, a2, 13
+; RV32-NEXT: bltz a4, .LBB8_472
+; RV32-NEXT: j .LBB8_233
+; RV32-NEXT: .LBB8_472: # %cond.store628
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: li a5, 23
+; RV32-NEXT: slli a5, a5, 8
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v24, (a5)
+; RV32-NEXT: lbu a4, 1778(a3)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: slli a4, a2, 12
+; RV32-NEXT: bltz a4, .LBB8_473
+; RV32-NEXT: j .LBB8_234
+; RV32-NEXT: .LBB8_473: # %cond.store631
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: lui a5, 1
+; RV32-NEXT: addi a5, a5, 1664
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v24, (a5)
+; RV32-NEXT: lbu a4, 1651(a3)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: slli a4, a2, 11
+; RV32-NEXT: bltz a4, .LBB8_474
+; RV32-NEXT: j .LBB8_235
+; RV32-NEXT: .LBB8_474: # %cond.store634
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: li a5, 11
+; RV32-NEXT: slli a5, a5, 9
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v24, (a5)
+; RV32-NEXT: lbu a4, 1524(a3)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: slli a4, a2, 10
+; RV32-NEXT: bltz a4, .LBB8_475
+; RV32-NEXT: j .LBB8_236
+; RV32-NEXT: .LBB8_475: # %cond.store637
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: lui a5, 1
+; RV32-NEXT: addi a5, a5, 1408
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v24, (a5)
+; RV32-NEXT: lbu a4, 1397(a3)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: slli a4, a2, 9
+; RV32-NEXT: bltz a4, .LBB8_476
+; RV32-NEXT: j .LBB8_237
+; RV32-NEXT: .LBB8_476: # %cond.store640
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: li a5, 21
+; RV32-NEXT: slli a5, a5, 8
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v24, (a5)
+; RV32-NEXT: lbu a4, 1270(a3)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: slli a4, a2, 8
+; RV32-NEXT: bltz a4, .LBB8_477
+; RV32-NEXT: j .LBB8_238
+; RV32-NEXT: .LBB8_477: # %cond.store643
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: lui a5, 1
+; RV32-NEXT: addi a5, a5, 1152
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v24, (a5)
+; RV32-NEXT: lbu a4, 1143(a3)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: slli a4, a2, 7
+; RV32-NEXT: bltz a4, .LBB8_478
+; RV32-NEXT: j .LBB8_239
+; RV32-NEXT: .LBB8_478: # %cond.store646
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: li a5, 5
+; RV32-NEXT: slli a5, a5, 10
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v24, (a5)
+; RV32-NEXT: lbu a4, 1016(a3)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: slli a4, a2, 6
+; RV32-NEXT: bltz a4, .LBB8_479
+; RV32-NEXT: j .LBB8_240
+; RV32-NEXT: .LBB8_479: # %cond.store649
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: lui a5, 1
+; RV32-NEXT: addi a5, a5, 896
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v24, (a5)
+; RV32-NEXT: lbu a4, 889(a3)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: slli a4, a2, 5
+; RV32-NEXT: bltz a4, .LBB8_480
+; RV32-NEXT: j .LBB8_241
+; RV32-NEXT: .LBB8_480: # %cond.store652
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: li a5, 19
+; RV32-NEXT: slli a5, a5, 8
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v24, (a5)
+; RV32-NEXT: lbu a4, 762(a3)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: slli a4, a2, 4
+; RV32-NEXT: bltz a4, .LBB8_481
+; RV32-NEXT: j .LBB8_242
+; RV32-NEXT: .LBB8_481: # %cond.store655
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: lui a5, 1
+; RV32-NEXT: addi a5, a5, 640
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v24, (a5)
+; RV32-NEXT: lbu a4, 635(a3)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: slli a4, a2, 3
+; RV32-NEXT: bltz a4, .LBB8_482
+; RV32-NEXT: j .LBB8_243
+; RV32-NEXT: .LBB8_482: # %cond.store658
+; RV32-NEXT: li a4, 128
+; RV32-NEXT: li a5, 9
+; RV32-NEXT: slli a5, a5, 9
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v24, (a5)
+; RV32-NEXT: lbu a4, 508(a3)
+; RV32-NEXT: addi a5, a0, 1
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: slli a4, a2, 2
+; RV32-NEXT: bgez a4, .LBB8_525
+; RV32-NEXT: j .LBB8_244
+; RV32-NEXT: .LBB8_525: # %cond.store658
+; RV32-NEXT: j .LBB8_245
+; RV32-NEXT: .LBB8_483: # %cond.store673
+; RV32-NEXT: li a3, 128
+; RV32-NEXT: addi a4, sp, 2047
+; RV32-NEXT: addi a4, a4, 1921
+; RV32-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v24, (a4)
+; RV32-NEXT: lbu a3, 2032(a2)
+; RV32-NEXT: addi a4, a0, 1
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: mv a0, a4
+; RV32-NEXT: andi a3, a1, 4
+; RV32-NEXT: bnez a3, .LBB8_484
+; RV32-NEXT: j .LBB8_253
+; RV32-NEXT: .LBB8_484: # %cond.store676
+; RV32-NEXT: li a3, 128
+; RV32-NEXT: addi a4, sp, 2047
+; RV32-NEXT: addi a4, a4, 1793
+; RV32-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v24, (a4)
+; RV32-NEXT: lbu a3, 1905(a2)
+; RV32-NEXT: addi a4, a0, 1
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: mv a0, a4
+; RV32-NEXT: andi a3, a1, 8
+; RV32-NEXT: bnez a3, .LBB8_485
+; RV32-NEXT: j .LBB8_254
+; RV32-NEXT: .LBB8_485: # %cond.store679
+; RV32-NEXT: li a3, 128
+; RV32-NEXT: addi a4, sp, 2047
+; RV32-NEXT: addi a4, a4, 1665
+; RV32-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v24, (a4)
+; RV32-NEXT: lbu a3, 1778(a2)
+; RV32-NEXT: addi a4, a0, 1
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: mv a0, a4
+; RV32-NEXT: andi a3, a1, 16
+; RV32-NEXT: bnez a3, .LBB8_486
+; RV32-NEXT: j .LBB8_255
+; RV32-NEXT: .LBB8_486: # %cond.store682
+; RV32-NEXT: li a3, 128
+; RV32-NEXT: addi a4, sp, 2047
+; RV32-NEXT: addi a4, a4, 1537
+; RV32-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v24, (a4)
+; RV32-NEXT: lbu a3, 1651(a2)
+; RV32-NEXT: addi a4, a0, 1
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: mv a0, a4
+; RV32-NEXT: andi a3, a1, 32
+; RV32-NEXT: bnez a3, .LBB8_487
+; RV32-NEXT: j .LBB8_256
+; RV32-NEXT: .LBB8_487: # %cond.store685
+; RV32-NEXT: li a3, 128
+; RV32-NEXT: addi a4, sp, 2047
+; RV32-NEXT: addi a4, a4, 1409
+; RV32-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v24, (a4)
+; RV32-NEXT: lbu a3, 1524(a2)
+; RV32-NEXT: addi a4, a0, 1
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: mv a0, a4
+; RV32-NEXT: andi a3, a1, 64
+; RV32-NEXT: bnez a3, .LBB8_488
+; RV32-NEXT: j .LBB8_257
+; RV32-NEXT: .LBB8_488: # %cond.store688
+; RV32-NEXT: li a3, 128
+; RV32-NEXT: addi a4, sp, 2047
+; RV32-NEXT: addi a4, a4, 1281
+; RV32-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v24, (a4)
+; RV32-NEXT: lbu a3, 1397(a2)
+; RV32-NEXT: addi a4, a0, 1
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: mv a0, a4
+; RV32-NEXT: andi a3, a1, 128
+; RV32-NEXT: bnez a3, .LBB8_489
+; RV32-NEXT: j .LBB8_258
+; RV32-NEXT: .LBB8_489: # %cond.store691
+; RV32-NEXT: li a3, 128
+; RV32-NEXT: addi a4, sp, 2047
+; RV32-NEXT: addi a4, a4, 1153
+; RV32-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v24, (a4)
+; RV32-NEXT: lbu a3, 1270(a2)
+; RV32-NEXT: addi a4, a0, 1
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: mv a0, a4
+; RV32-NEXT: andi a3, a1, 256
+; RV32-NEXT: bnez a3, .LBB8_490
+; RV32-NEXT: j .LBB8_259
+; RV32-NEXT: .LBB8_490: # %cond.store694
+; RV32-NEXT: li a3, 128
+; RV32-NEXT: addi a4, sp, 2047
+; RV32-NEXT: addi a4, a4, 1025
+; RV32-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v24, (a4)
+; RV32-NEXT: lbu a3, 1143(a2)
+; RV32-NEXT: addi a4, a0, 1
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: mv a0, a4
+; RV32-NEXT: andi a3, a1, 512
+; RV32-NEXT: bnez a3, .LBB8_491
+; RV32-NEXT: j .LBB8_260
+; RV32-NEXT: .LBB8_491: # %cond.store697
+; RV32-NEXT: li a3, 128
+; RV32-NEXT: addi a4, sp, 2047
+; RV32-NEXT: addi a4, a4, 897
+; RV32-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v24, (a4)
+; RV32-NEXT: lbu a3, 1016(a2)
+; RV32-NEXT: addi a4, a0, 1
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: mv a0, a4
+; RV32-NEXT: andi a3, a1, 1024
+; RV32-NEXT: bnez a3, .LBB8_492
+; RV32-NEXT: j .LBB8_261
+; RV32-NEXT: .LBB8_492: # %cond.store700
+; RV32-NEXT: li a3, 128
+; RV32-NEXT: addi a4, sp, 2047
+; RV32-NEXT: addi a4, a4, 769
+; RV32-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v24, (a4)
+; RV32-NEXT: lbu a3, 889(a2)
+; RV32-NEXT: addi a4, a0, 1
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: mv a0, a4
+; RV32-NEXT: slli a3, a1, 20
+; RV32-NEXT: bltz a3, .LBB8_493
+; RV32-NEXT: j .LBB8_262
+; RV32-NEXT: .LBB8_493: # %cond.store703
+; RV32-NEXT: li a3, 128
+; RV32-NEXT: addi a4, sp, 2047
+; RV32-NEXT: addi a4, a4, 641
+; RV32-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v24, (a4)
+; RV32-NEXT: lbu a3, 762(a2)
+; RV32-NEXT: addi a4, a0, 1
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: mv a0, a4
+; RV32-NEXT: slli a3, a1, 19
+; RV32-NEXT: bltz a3, .LBB8_494
+; RV32-NEXT: j .LBB8_263
+; RV32-NEXT: .LBB8_494: # %cond.store706
+; RV32-NEXT: li a3, 128
+; RV32-NEXT: addi a4, sp, 2047
+; RV32-NEXT: addi a4, a4, 513
+; RV32-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v24, (a4)
+; RV32-NEXT: lbu a3, 635(a2)
+; RV32-NEXT: addi a4, a0, 1
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: mv a0, a4
+; RV32-NEXT: slli a3, a1, 18
+; RV32-NEXT: bltz a3, .LBB8_495
+; RV32-NEXT: j .LBB8_264
+; RV32-NEXT: .LBB8_495: # %cond.store709
+; RV32-NEXT: li a3, 128
+; RV32-NEXT: addi a4, sp, 2047
+; RV32-NEXT: addi a4, a4, 385
+; RV32-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v24, (a4)
+; RV32-NEXT: lbu a3, 508(a2)
+; RV32-NEXT: addi a4, a0, 1
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: mv a0, a4
+; RV32-NEXT: slli a3, a1, 17
+; RV32-NEXT: bltz a3, .LBB8_496
+; RV32-NEXT: j .LBB8_265
+; RV32-NEXT: .LBB8_496: # %cond.store712
+; RV32-NEXT: li a3, 128
+; RV32-NEXT: addi a4, sp, 2047
+; RV32-NEXT: addi a4, a4, 257
+; RV32-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v24, (a4)
+; RV32-NEXT: lbu a3, 381(a2)
+; RV32-NEXT: addi a4, a0, 1
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: mv a0, a4
+; RV32-NEXT: slli a3, a1, 16
+; RV32-NEXT: bltz a3, .LBB8_497
+; RV32-NEXT: j .LBB8_266
+; RV32-NEXT: .LBB8_497: # %cond.store715
+; RV32-NEXT: li a3, 128
+; RV32-NEXT: addi a4, sp, 2047
+; RV32-NEXT: addi a4, a4, 129
+; RV32-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v24, (a4)
+; RV32-NEXT: lbu a3, 254(a2)
+; RV32-NEXT: addi a4, a0, 1
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: mv a0, a4
+; RV32-NEXT: slli a3, a1, 15
+; RV32-NEXT: bltz a3, .LBB8_498
+; RV32-NEXT: j .LBB8_267
+; RV32-NEXT: .LBB8_498: # %cond.store718
+; RV32-NEXT: li a3, 128
+; RV32-NEXT: addi a4, sp, 2047
+; RV32-NEXT: addi a4, a4, 1
+; RV32-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v24, (a4)
+; RV32-NEXT: lbu a3, 127(a2)
+; RV32-NEXT: addi a4, a0, 1
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: mv a0, a4
+; RV32-NEXT: slli a3, a1, 14
+; RV32-NEXT: bltz a3, .LBB8_499
+; RV32-NEXT: j .LBB8_268
+; RV32-NEXT: .LBB8_499: # %cond.store721
+; RV32-NEXT: li a3, 128
+; RV32-NEXT: addi a4, sp, 1920
+; RV32-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v24, (a4)
+; RV32-NEXT: lbu a2, 0(a2)
+; RV32-NEXT: addi a3, a0, 1
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: mv a0, a3
+; RV32-NEXT: slli a2, a1, 13
+; RV32-NEXT: bltz a2, .LBB8_500
+; RV32-NEXT: j .LBB8_269
+; RV32-NEXT: .LBB8_500: # %cond.store724
+; RV32-NEXT: li a2, 128
+; RV32-NEXT: addi a3, sp, 1792
+; RV32-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v24, (a3)
+; RV32-NEXT: lbu a2, 1906(sp)
+; RV32-NEXT: addi a3, a0, 1
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: mv a0, a3
+; RV32-NEXT: slli a2, a1, 12
+; RV32-NEXT: bltz a2, .LBB8_501
+; RV32-NEXT: j .LBB8_270
+; RV32-NEXT: .LBB8_501: # %cond.store727
+; RV32-NEXT: li a2, 128
+; RV32-NEXT: addi a3, sp, 1664
+; RV32-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v24, (a3)
+; RV32-NEXT: lbu a2, 1779(sp)
+; RV32-NEXT: addi a3, a0, 1
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: mv a0, a3
+; RV32-NEXT: slli a2, a1, 11
+; RV32-NEXT: bltz a2, .LBB8_502
+; RV32-NEXT: j .LBB8_271
+; RV32-NEXT: .LBB8_502: # %cond.store730
+; RV32-NEXT: li a2, 128
+; RV32-NEXT: addi a3, sp, 1536
+; RV32-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v24, (a3)
+; RV32-NEXT: lbu a2, 1652(sp)
+; RV32-NEXT: addi a3, a0, 1
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: mv a0, a3
+; RV32-NEXT: slli a2, a1, 10
+; RV32-NEXT: bltz a2, .LBB8_503
+; RV32-NEXT: j .LBB8_272
+; RV32-NEXT: .LBB8_503: # %cond.store733
+; RV32-NEXT: li a2, 128
+; RV32-NEXT: addi a3, sp, 1408
+; RV32-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v24, (a3)
+; RV32-NEXT: lbu a2, 1525(sp)
+; RV32-NEXT: addi a3, a0, 1
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: mv a0, a3
+; RV32-NEXT: slli a2, a1, 9
+; RV32-NEXT: bltz a2, .LBB8_504
+; RV32-NEXT: j .LBB8_273
+; RV32-NEXT: .LBB8_504: # %cond.store736
+; RV32-NEXT: li a2, 128
+; RV32-NEXT: addi a3, sp, 1280
+; RV32-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v24, (a3)
+; RV32-NEXT: lbu a2, 1398(sp)
+; RV32-NEXT: addi a3, a0, 1
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: mv a0, a3
+; RV32-NEXT: slli a2, a1, 8
+; RV32-NEXT: bltz a2, .LBB8_505
+; RV32-NEXT: j .LBB8_274
+; RV32-NEXT: .LBB8_505: # %cond.store739
+; RV32-NEXT: li a2, 128
+; RV32-NEXT: addi a3, sp, 1152
+; RV32-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v24, (a3)
+; RV32-NEXT: lbu a2, 1271(sp)
+; RV32-NEXT: addi a3, a0, 1
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: mv a0, a3
+; RV32-NEXT: slli a2, a1, 7
+; RV32-NEXT: bltz a2, .LBB8_506
+; RV32-NEXT: j .LBB8_275
+; RV32-NEXT: .LBB8_506: # %cond.store742
+; RV32-NEXT: li a2, 128
+; RV32-NEXT: addi a3, sp, 1024
+; RV32-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v24, (a3)
+; RV32-NEXT: lbu a2, 1144(sp)
+; RV32-NEXT: addi a3, a0, 1
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: mv a0, a3
+; RV32-NEXT: slli a2, a1, 6
+; RV32-NEXT: bltz a2, .LBB8_507
+; RV32-NEXT: j .LBB8_276
+; RV32-NEXT: .LBB8_507: # %cond.store745
+; RV32-NEXT: li a2, 128
+; RV32-NEXT: addi a3, sp, 896
+; RV32-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v24, (a3)
+; RV32-NEXT: lbu a2, 1017(sp)
+; RV32-NEXT: addi a3, a0, 1
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: mv a0, a3
+; RV32-NEXT: slli a2, a1, 5
+; RV32-NEXT: bltz a2, .LBB8_508
+; RV32-NEXT: j .LBB8_277
+; RV32-NEXT: .LBB8_508: # %cond.store748
+; RV32-NEXT: li a2, 128
+; RV32-NEXT: addi a3, sp, 768
+; RV32-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v24, (a3)
+; RV32-NEXT: lbu a2, 890(sp)
+; RV32-NEXT: addi a3, a0, 1
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: mv a0, a3
+; RV32-NEXT: slli a2, a1, 4
+; RV32-NEXT: bltz a2, .LBB8_509
+; RV32-NEXT: j .LBB8_278
+; RV32-NEXT: .LBB8_509: # %cond.store751
+; RV32-NEXT: li a2, 128
+; RV32-NEXT: addi a3, sp, 640
+; RV32-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v24, (a3)
+; RV32-NEXT: lbu a2, 763(sp)
+; RV32-NEXT: addi a3, a0, 1
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: mv a0, a3
+; RV32-NEXT: slli a2, a1, 3
+; RV32-NEXT: bltz a2, .LBB8_510
+; RV32-NEXT: j .LBB8_279
+; RV32-NEXT: .LBB8_510: # %cond.store754
+; RV32-NEXT: li a2, 128
+; RV32-NEXT: addi a3, sp, 512
+; RV32-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v24, (a3)
+; RV32-NEXT: lbu a2, 636(sp)
+; RV32-NEXT: addi a3, a0, 1
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: mv a0, a3
+; RV32-NEXT: slli a2, a1, 2
+; RV32-NEXT: bltz a2, .LBB8_511
+; RV32-NEXT: j .LBB8_280
+; RV32-NEXT: .LBB8_511: # %cond.store757
+; RV32-NEXT: li a2, 128
+; RV32-NEXT: addi a3, sp, 384
+; RV32-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v24, (a3)
+; RV32-NEXT: lbu a2, 509(sp)
+; RV32-NEXT: addi a3, a0, 1
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: mv a0, a3
+; RV32-NEXT: slli a2, a1, 1
+; RV32-NEXT: bltz a2, .LBB8_512
+; RV32-NEXT: j .LBB8_281
+; RV32-NEXT: .LBB8_512: # %cond.store760
+; RV32-NEXT: li a2, 128
+; RV32-NEXT: addi a3, sp, 256
+; RV32-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v24, (a3)
+; RV32-NEXT: lbu a2, 382(sp)
+; RV32-NEXT: addi a3, a0, 1
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: mv a0, a3
+; RV32-NEXT: bgez a1, .LBB8_526
+; RV32-NEXT: j .LBB8_282
+; RV32-NEXT: .LBB8_526: # %cond.store760
+; RV32-NEXT: j .LBB8_283
+entry:
+ tail call void @llvm.masked.compressstore.v256i8(<256 x i8> %data, ptr %p, <256 x i1> %mask)
+ ret void
+}
+
+; Compress + store for i16 type
+
+define void @test_compresstore_i16_v1(ptr %p, <1 x i1> %mask, <1 x i16> %data) {
+; RV64-LABEL: test_compresstore_i16_v1:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
+; RV64-NEXT: vcompress.vm v9, v8, v0
+; RV64-NEXT: vcpop.m a1, v0
+; RV64-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; RV64-NEXT: vse16.v v9, (a0)
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_compresstore_i16_v1:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
+; RV32-NEXT: vcompress.vm v9, v8, v0
+; RV32-NEXT: vcpop.m a1, v0
+; RV32-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; RV32-NEXT: vse16.v v9, (a0)
+; RV32-NEXT: ret
+entry:
+ tail call void @llvm.masked.compressstore.v1i16(<1 x i16> %data, ptr %p, <1 x i1> %mask)
+ ret void
+}
+
+define void @test_compresstore_i16_v2(ptr %p, <2 x i1> %mask, <2 x i16> %data) {
+; RV64-LABEL: test_compresstore_i16_v2:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; RV64-NEXT: vcompress.vm v9, v8, v0
+; RV64-NEXT: vcpop.m a1, v0
+; RV64-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; RV64-NEXT: vse16.v v9, (a0)
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_compresstore_i16_v2:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; RV32-NEXT: vcompress.vm v9, v8, v0
+; RV32-NEXT: vcpop.m a1, v0
+; RV32-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; RV32-NEXT: vse16.v v9, (a0)
+; RV32-NEXT: ret
+entry:
+ tail call void @llvm.masked.compressstore.v2i16(<2 x i16> %data, ptr %p, <2 x i1> %mask)
+ ret void
+}
+
+define void @test_compresstore_i16_v4(ptr %p, <4 x i1> %mask, <4 x i16> %data) {
+; RV64-LABEL: test_compresstore_i16_v4:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; RV64-NEXT: vcompress.vm v9, v8, v0
+; RV64-NEXT: vcpop.m a1, v0
+; RV64-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
+; RV64-NEXT: vse16.v v9, (a0)
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_compresstore_i16_v4:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; RV32-NEXT: vcompress.vm v9, v8, v0
+; RV32-NEXT: vcpop.m a1, v0
+; RV32-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
+; RV32-NEXT: vse16.v v9, (a0)
+; RV32-NEXT: ret
+entry:
+ tail call void @llvm.masked.compressstore.v4i16(<4 x i16> %data, ptr %p, <4 x i1> %mask)
+ ret void
+}
+
+define void @test_compresstore_i16_v8(ptr %p, <8 x i1> %mask, <8 x i16> %data) {
+; RV64-LABEL: test_compresstore_i16_v8:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV64-NEXT: vcompress.vm v9, v8, v0
+; RV64-NEXT: vcpop.m a1, v0
+; RV64-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; RV64-NEXT: vse16.v v9, (a0)
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_compresstore_i16_v8:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV32-NEXT: vcompress.vm v9, v8, v0
+; RV32-NEXT: vcpop.m a1, v0
+; RV32-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; RV32-NEXT: vse16.v v9, (a0)
+; RV32-NEXT: ret
+entry:
+ tail call void @llvm.masked.compressstore.v8i16(<8 x i16> %data, ptr %p, <8 x i1> %mask)
+ ret void
+}
+
+define void @test_compresstore_i16_v16(ptr %p, <16 x i1> %mask, <16 x i16> %data) {
+; RV64-LABEL: test_compresstore_i16_v16:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; RV64-NEXT: vcompress.vm v10, v8, v0
+; RV64-NEXT: vcpop.m a1, v0
+; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma
+; RV64-NEXT: vse16.v v10, (a0)
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_compresstore_i16_v16:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; RV32-NEXT: vcompress.vm v10, v8, v0
+; RV32-NEXT: vcpop.m a1, v0
+; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma
+; RV32-NEXT: vse16.v v10, (a0)
+; RV32-NEXT: ret
+entry:
+ tail call void @llvm.masked.compressstore.v16i16(<16 x i16> %data, ptr %p, <16 x i1> %mask)
+ ret void
+}
+
+define void @test_compresstore_i16_v32(ptr %p, <32 x i1> %mask, <32 x i16> %data) {
+; RV64-LABEL: test_compresstore_i16_v32:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: li a1, 32
+; RV64-NEXT: vsetvli zero, a1, e16, m4, ta, ma
+; RV64-NEXT: vcompress.vm v12, v8, v0
+; RV64-NEXT: vcpop.m a1, v0
+; RV64-NEXT: vsetvli zero, a1, e16, m4, ta, ma
+; RV64-NEXT: vse16.v v12, (a0)
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_compresstore_i16_v32:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: li a1, 32
+; RV32-NEXT: vsetvli zero, a1, e16, m4, ta, ma
+; RV32-NEXT: vcompress.vm v12, v8, v0
+; RV32-NEXT: vcpop.m a1, v0
+; RV32-NEXT: vsetvli zero, a1, e16, m4, ta, ma
+; RV32-NEXT: vse16.v v12, (a0)
+; RV32-NEXT: ret
+entry:
+ tail call void @llvm.masked.compressstore.v32i16(<32 x i16> %data, ptr %p, <32 x i1> %mask)
+ ret void
+}
+
+define void @test_compresstore_i16_v64(ptr %p, <64 x i1> %mask, <64 x i16> %data) {
+; RV64-LABEL: test_compresstore_i16_v64:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: li a1, 64
+; RV64-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; RV64-NEXT: vcompress.vm v16, v8, v0
+; RV64-NEXT: vcpop.m a1, v0
+; RV64-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v16, (a0)
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_compresstore_i16_v64:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: li a1, 64
+; RV32-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; RV32-NEXT: vcompress.vm v16, v8, v0
+; RV32-NEXT: vcpop.m a1, v0
+; RV32-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v16, (a0)
+; RV32-NEXT: ret
+entry:
+ tail call void @llvm.masked.compressstore.v64i16(<64 x i16> %data, ptr %p, <64 x i1> %mask)
+ ret void
+}
+
+define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %data) {
+; RV64-LABEL: test_compresstore_i16_v128:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vmv.x.s a2, v0
+; RV64-NEXT: andi a1, a2, 1
+; RV64-NEXT: bnez a1, .LBB16_137
+; RV64-NEXT: # %bb.1: # %else
+; RV64-NEXT: andi a1, a2, 2
+; RV64-NEXT: bnez a1, .LBB16_138
+; RV64-NEXT: .LBB16_2: # %else2
+; RV64-NEXT: andi a1, a2, 4
+; RV64-NEXT: bnez a1, .LBB16_139
+; RV64-NEXT: .LBB16_3: # %else5
+; RV64-NEXT: andi a1, a2, 8
+; RV64-NEXT: bnez a1, .LBB16_140
+; RV64-NEXT: .LBB16_4: # %else8
+; RV64-NEXT: andi a1, a2, 16
+; RV64-NEXT: bnez a1, .LBB16_141
+; RV64-NEXT: .LBB16_5: # %else11
+; RV64-NEXT: andi a1, a2, 32
+; RV64-NEXT: bnez a1, .LBB16_142
+; RV64-NEXT: .LBB16_6: # %else14
+; RV64-NEXT: andi a1, a2, 64
+; RV64-NEXT: bnez a1, .LBB16_143
+; RV64-NEXT: .LBB16_7: # %else17
+; RV64-NEXT: andi a1, a2, 128
+; RV64-NEXT: bnez a1, .LBB16_144
+; RV64-NEXT: .LBB16_8: # %else20
+; RV64-NEXT: andi a1, a2, 256
+; RV64-NEXT: bnez a1, .LBB16_145
+; RV64-NEXT: .LBB16_9: # %else23
+; RV64-NEXT: andi a1, a2, 512
+; RV64-NEXT: bnez a1, .LBB16_146
+; RV64-NEXT: .LBB16_10: # %else26
+; RV64-NEXT: andi a1, a2, 1024
+; RV64-NEXT: bnez a1, .LBB16_147
+; RV64-NEXT: .LBB16_11: # %else29
+; RV64-NEXT: slli a1, a2, 52
+; RV64-NEXT: bltz a1, .LBB16_148
+; RV64-NEXT: .LBB16_12: # %else32
+; RV64-NEXT: slli a1, a2, 51
+; RV64-NEXT: bltz a1, .LBB16_149
+; RV64-NEXT: .LBB16_13: # %else35
+; RV64-NEXT: slli a1, a2, 50
+; RV64-NEXT: bltz a1, .LBB16_150
+; RV64-NEXT: .LBB16_14: # %else38
+; RV64-NEXT: slli a1, a2, 49
+; RV64-NEXT: bltz a1, .LBB16_151
+; RV64-NEXT: .LBB16_15: # %else41
+; RV64-NEXT: slli a1, a2, 48
+; RV64-NEXT: bgez a1, .LBB16_17
+; RV64-NEXT: .LBB16_16: # %cond.store43
+; RV64-NEXT: vsetivli zero, 1, e16, m2, ta, ma
+; RV64-NEXT: vslidedown.vi v24, v8, 15
+; RV64-NEXT: vmv.x.s a1, v24
+; RV64-NEXT: sb a1, 0(a0)
+; RV64-NEXT: srli a1, a1, 8
+; RV64-NEXT: sb a1, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: .LBB16_17: # %else44
+; RV64-NEXT: addi sp, sp, -2032
+; RV64-NEXT: .cfi_def_cfa_offset 2032
+; RV64-NEXT: sd ra, 2024(sp) # 8-byte Folded Spill
+; RV64-NEXT: sd s0, 2016(sp) # 8-byte Folded Spill
+; RV64-NEXT: .cfi_offset ra, -8
+; RV64-NEXT: .cfi_offset s0, -16
+; RV64-NEXT: addi s0, sp, 2032
+; RV64-NEXT: .cfi_def_cfa s0, 0
+; RV64-NEXT: lui a1, 3
+; RV64-NEXT: addiw a1, a1, -1776
+; RV64-NEXT: sub sp, sp, a1
+; RV64-NEXT: andi sp, sp, -128
+; RV64-NEXT: slli a3, a2, 47
+; RV64-NEXT: lui a1, 3
+; RV64-NEXT: addiw a1, a1, -1606
+; RV64-NEXT: add a1, sp, a1
+; RV64-NEXT: bltz a3, .LBB16_152
+; RV64-NEXT: # %bb.18: # %else47
+; RV64-NEXT: slli a3, a2, 46
+; RV64-NEXT: bltz a3, .LBB16_153
+; RV64-NEXT: .LBB16_19: # %else50
+; RV64-NEXT: slli a3, a2, 45
+; RV64-NEXT: bltz a3, .LBB16_154
+; RV64-NEXT: .LBB16_20: # %else53
+; RV64-NEXT: slli a3, a2, 44
+; RV64-NEXT: bltz a3, .LBB16_155
+; RV64-NEXT: .LBB16_21: # %else56
+; RV64-NEXT: slli a3, a2, 43
+; RV64-NEXT: bltz a3, .LBB16_156
+; RV64-NEXT: .LBB16_22: # %else59
+; RV64-NEXT: slli a3, a2, 42
+; RV64-NEXT: bltz a3, .LBB16_157
+; RV64-NEXT: .LBB16_23: # %else62
+; RV64-NEXT: slli a3, a2, 41
+; RV64-NEXT: bltz a3, .LBB16_158
+; RV64-NEXT: .LBB16_24: # %else65
+; RV64-NEXT: slli a3, a2, 40
+; RV64-NEXT: bltz a3, .LBB16_159
+; RV64-NEXT: .LBB16_25: # %else68
+; RV64-NEXT: slli a3, a2, 39
+; RV64-NEXT: bltz a3, .LBB16_160
+; RV64-NEXT: .LBB16_26: # %else71
+; RV64-NEXT: slli a3, a2, 38
+; RV64-NEXT: bltz a3, .LBB16_161
+; RV64-NEXT: .LBB16_27: # %else74
+; RV64-NEXT: slli a3, a2, 37
+; RV64-NEXT: bltz a3, .LBB16_162
+; RV64-NEXT: .LBB16_28: # %else77
+; RV64-NEXT: slli a3, a2, 36
+; RV64-NEXT: bltz a3, .LBB16_163
+; RV64-NEXT: .LBB16_29: # %else80
+; RV64-NEXT: slli a3, a2, 35
+; RV64-NEXT: bltz a3, .LBB16_164
+; RV64-NEXT: .LBB16_30: # %else83
+; RV64-NEXT: slli a3, a2, 34
+; RV64-NEXT: bgez a3, .LBB16_32
+; RV64-NEXT: .LBB16_31: # %cond.store85
+; RV64-NEXT: li a3, 64
+; RV64-NEXT: lui a4, 3
+; RV64-NEXT: addiw a4, a4, -1664
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v8, (a4)
+; RV64-NEXT: lh a1, 0(a1)
+; RV64-NEXT: sb a1, 0(a0)
+; RV64-NEXT: srli a1, a1, 8
+; RV64-NEXT: sb a1, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: .LBB16_32: # %else86
+; RV64-NEXT: slli a3, a2, 33
+; RV64-NEXT: lui a1, 2
+; RV64-NEXT: addiw a1, a1, 348
+; RV64-NEXT: add a1, sp, a1
+; RV64-NEXT: bltz a3, .LBB16_165
+; RV64-NEXT: # %bb.33: # %else89
+; RV64-NEXT: slli a3, a2, 32
+; RV64-NEXT: bltz a3, .LBB16_166
+; RV64-NEXT: .LBB16_34: # %else92
+; RV64-NEXT: slli a3, a2, 31
+; RV64-NEXT: bltz a3, .LBB16_167
+; RV64-NEXT: .LBB16_35: # %else95
+; RV64-NEXT: slli a3, a2, 30
+; RV64-NEXT: bltz a3, .LBB16_168
+; RV64-NEXT: .LBB16_36: # %else98
+; RV64-NEXT: slli a3, a2, 29
+; RV64-NEXT: bltz a3, .LBB16_169
+; RV64-NEXT: .LBB16_37: # %else101
+; RV64-NEXT: slli a3, a2, 28
+; RV64-NEXT: bltz a3, .LBB16_170
+; RV64-NEXT: .LBB16_38: # %else104
+; RV64-NEXT: slli a3, a2, 27
+; RV64-NEXT: bltz a3, .LBB16_171
+; RV64-NEXT: .LBB16_39: # %else107
+; RV64-NEXT: slli a3, a2, 26
+; RV64-NEXT: bltz a3, .LBB16_172
+; RV64-NEXT: .LBB16_40: # %else110
+; RV64-NEXT: slli a3, a2, 25
+; RV64-NEXT: bltz a3, .LBB16_173
+; RV64-NEXT: .LBB16_41: # %else113
+; RV64-NEXT: slli a3, a2, 24
+; RV64-NEXT: bltz a3, .LBB16_174
+; RV64-NEXT: .LBB16_42: # %else116
+; RV64-NEXT: slli a3, a2, 23
+; RV64-NEXT: bltz a3, .LBB16_175
+; RV64-NEXT: .LBB16_43: # %else119
+; RV64-NEXT: slli a3, a2, 22
+; RV64-NEXT: bltz a3, .LBB16_176
+; RV64-NEXT: .LBB16_44: # %else122
+; RV64-NEXT: slli a3, a2, 21
+; RV64-NEXT: bltz a3, .LBB16_177
+; RV64-NEXT: .LBB16_45: # %else125
+; RV64-NEXT: slli a3, a2, 20
+; RV64-NEXT: bltz a3, .LBB16_178
+; RV64-NEXT: .LBB16_46: # %else128
+; RV64-NEXT: slli a3, a2, 19
+; RV64-NEXT: bltz a3, .LBB16_179
+; RV64-NEXT: .LBB16_47: # %else131
+; RV64-NEXT: slli a3, a2, 18
+; RV64-NEXT: bltz a3, .LBB16_180
+; RV64-NEXT: .LBB16_48: # %else134
+; RV64-NEXT: slli a3, a2, 17
+; RV64-NEXT: bgez a3, .LBB16_50
+; RV64-NEXT: .LBB16_49: # %cond.store136
+; RV64-NEXT: li a3, 64
+; RV64-NEXT: lui a4, 2
+; RV64-NEXT: addiw a4, a4, 256
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v8, (a4)
+; RV64-NEXT: lh a1, 0(a1)
+; RV64-NEXT: sb a1, 0(a0)
+; RV64-NEXT: srli a1, a1, 8
+; RV64-NEXT: sb a1, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: .LBB16_50: # %else137
+; RV64-NEXT: slli a1, a2, 16
+; RV64-NEXT: lui a3, 2
+; RV64-NEXT: addiw a3, a3, -1794
+; RV64-NEXT: add a3, sp, a3
+; RV64-NEXT: bltz a1, .LBB16_181
+; RV64-NEXT: # %bb.51: # %else140
+; RV64-NEXT: slli a1, a2, 15
+; RV64-NEXT: bltz a1, .LBB16_182
+; RV64-NEXT: .LBB16_52: # %else143
+; RV64-NEXT: slli a1, a2, 14
+; RV64-NEXT: bltz a1, .LBB16_183
+; RV64-NEXT: .LBB16_53: # %else146
+; RV64-NEXT: slli a1, a2, 13
+; RV64-NEXT: bltz a1, .LBB16_184
+; RV64-NEXT: .LBB16_54: # %else149
+; RV64-NEXT: slli a1, a2, 12
+; RV64-NEXT: bltz a1, .LBB16_185
+; RV64-NEXT: .LBB16_55: # %else152
+; RV64-NEXT: slli a1, a2, 11
+; RV64-NEXT: bltz a1, .LBB16_186
+; RV64-NEXT: .LBB16_56: # %else155
+; RV64-NEXT: slli a1, a2, 10
+; RV64-NEXT: bltz a1, .LBB16_187
+; RV64-NEXT: .LBB16_57: # %else158
+; RV64-NEXT: slli a1, a2, 9
+; RV64-NEXT: bltz a1, .LBB16_188
+; RV64-NEXT: .LBB16_58: # %else161
+; RV64-NEXT: slli a1, a2, 8
+; RV64-NEXT: bltz a1, .LBB16_189
+; RV64-NEXT: .LBB16_59: # %else164
+; RV64-NEXT: slli a1, a2, 7
+; RV64-NEXT: bltz a1, .LBB16_190
+; RV64-NEXT: .LBB16_60: # %else167
+; RV64-NEXT: slli a1, a2, 6
+; RV64-NEXT: bltz a1, .LBB16_191
+; RV64-NEXT: .LBB16_61: # %else170
+; RV64-NEXT: slli a1, a2, 5
+; RV64-NEXT: bltz a1, .LBB16_192
+; RV64-NEXT: .LBB16_62: # %else173
+; RV64-NEXT: slli a1, a2, 4
+; RV64-NEXT: bltz a1, .LBB16_193
+; RV64-NEXT: .LBB16_63: # %else176
+; RV64-NEXT: slli a1, a2, 3
+; RV64-NEXT: bltz a1, .LBB16_194
+; RV64-NEXT: .LBB16_64: # %else179
+; RV64-NEXT: slli a1, a2, 2
+; RV64-NEXT: bgez a1, .LBB16_66
+; RV64-NEXT: .LBB16_65: # %cond.store181
+; RV64-NEXT: li a1, 64
+; RV64-NEXT: lui a4, 2
+; RV64-NEXT: addiw a4, a4, -1664
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v8, (a4)
+; RV64-NEXT: lh a1, 252(a3)
+; RV64-NEXT: sb a1, 0(a0)
+; RV64-NEXT: srli a1, a1, 8
+; RV64-NEXT: sb a1, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: .LBB16_66: # %else182
+; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: slli a1, a2, 1
+; RV64-NEXT: vslidedown.vi v24, v0, 1
+; RV64-NEXT: bgez a1, .LBB16_68
+; RV64-NEXT: # %bb.67: # %cond.store184
+; RV64-NEXT: li a1, 64
+; RV64-NEXT: li a4, 25
+; RV64-NEXT: slli a4, a4, 8
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v8, (a4)
+; RV64-NEXT: lh a1, 126(a3)
+; RV64-NEXT: sb a1, 0(a0)
+; RV64-NEXT: srli a1, a1, 8
+; RV64-NEXT: sb a1, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: .LBB16_68: # %else185
+; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vmv.x.s a1, v24
+; RV64-NEXT: bltz a2, .LBB16_195
+; RV64-NEXT: # %bb.69: # %else188
+; RV64-NEXT: andi a2, a1, 1
+; RV64-NEXT: bnez a2, .LBB16_196
+; RV64-NEXT: .LBB16_70: # %else191
+; RV64-NEXT: andi a2, a1, 2
+; RV64-NEXT: bnez a2, .LBB16_197
+; RV64-NEXT: .LBB16_71: # %else194
+; RV64-NEXT: andi a2, a1, 4
+; RV64-NEXT: bnez a2, .LBB16_198
+; RV64-NEXT: .LBB16_72: # %else197
+; RV64-NEXT: andi a2, a1, 8
+; RV64-NEXT: bnez a2, .LBB16_199
+; RV64-NEXT: .LBB16_73: # %else200
+; RV64-NEXT: andi a2, a1, 16
+; RV64-NEXT: bnez a2, .LBB16_200
+; RV64-NEXT: .LBB16_74: # %else203
+; RV64-NEXT: andi a2, a1, 32
+; RV64-NEXT: bnez a2, .LBB16_201
+; RV64-NEXT: .LBB16_75: # %else206
+; RV64-NEXT: andi a2, a1, 64
+; RV64-NEXT: bnez a2, .LBB16_202
+; RV64-NEXT: .LBB16_76: # %else209
+; RV64-NEXT: andi a2, a1, 128
+; RV64-NEXT: bnez a2, .LBB16_203
+; RV64-NEXT: .LBB16_77: # %else212
+; RV64-NEXT: andi a2, a1, 256
+; RV64-NEXT: bnez a2, .LBB16_204
+; RV64-NEXT: .LBB16_78: # %else215
+; RV64-NEXT: andi a2, a1, 512
+; RV64-NEXT: bnez a2, .LBB16_205
+; RV64-NEXT: .LBB16_79: # %else218
+; RV64-NEXT: andi a2, a1, 1024
+; RV64-NEXT: bnez a2, .LBB16_206
+; RV64-NEXT: .LBB16_80: # %else221
+; RV64-NEXT: slli a2, a1, 52
+; RV64-NEXT: bltz a2, .LBB16_207
+; RV64-NEXT: .LBB16_81: # %else224
+; RV64-NEXT: slli a2, a1, 51
+; RV64-NEXT: bltz a2, .LBB16_208
+; RV64-NEXT: .LBB16_82: # %else227
+; RV64-NEXT: slli a2, a1, 50
+; RV64-NEXT: bltz a2, .LBB16_209
+; RV64-NEXT: .LBB16_83: # %else230
+; RV64-NEXT: slli a2, a1, 49
+; RV64-NEXT: bltz a2, .LBB16_210
+; RV64-NEXT: .LBB16_84: # %else233
+; RV64-NEXT: slli a2, a1, 48
+; RV64-NEXT: bgez a2, .LBB16_86
+; RV64-NEXT: .LBB16_85: # %cond.store235
+; RV64-NEXT: vsetivli zero, 1, e16, m2, ta, ma
+; RV64-NEXT: vslidedown.vi v8, v16, 15
+; RV64-NEXT: vmv.x.s a2, v8
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: srli a2, a2, 8
+; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: .LBB16_86: # %else236
+; RV64-NEXT: slli a3, a1, 47
+; RV64-NEXT: lui a2, 1
+; RV64-NEXT: addiw a2, a2, 64
+; RV64-NEXT: add a2, sp, a2
+; RV64-NEXT: bltz a3, .LBB16_211
+; RV64-NEXT: # %bb.87: # %else239
+; RV64-NEXT: slli a3, a1, 46
+; RV64-NEXT: bltz a3, .LBB16_212
+; RV64-NEXT: .LBB16_88: # %else242
+; RV64-NEXT: slli a3, a1, 45
+; RV64-NEXT: bgez a3, .LBB16_89
+; RV64-NEXT: j .LBB16_213
+; RV64-NEXT: .LBB16_89: # %else245
+; RV64-NEXT: slli a3, a1, 44
+; RV64-NEXT: bgez a3, .LBB16_90
+; RV64-NEXT: j .LBB16_214
+; RV64-NEXT: .LBB16_90: # %else248
+; RV64-NEXT: slli a3, a1, 43
+; RV64-NEXT: bgez a3, .LBB16_91
+; RV64-NEXT: j .LBB16_215
+; RV64-NEXT: .LBB16_91: # %else251
+; RV64-NEXT: slli a3, a1, 42
+; RV64-NEXT: bgez a3, .LBB16_92
+; RV64-NEXT: j .LBB16_216
+; RV64-NEXT: .LBB16_92: # %else254
+; RV64-NEXT: slli a3, a1, 41
+; RV64-NEXT: bgez a3, .LBB16_93
+; RV64-NEXT: j .LBB16_217
+; RV64-NEXT: .LBB16_93: # %else257
+; RV64-NEXT: slli a3, a1, 40
+; RV64-NEXT: bgez a3, .LBB16_94
+; RV64-NEXT: j .LBB16_218
+; RV64-NEXT: .LBB16_94: # %else260
+; RV64-NEXT: slli a3, a1, 39
+; RV64-NEXT: bgez a3, .LBB16_95
+; RV64-NEXT: j .LBB16_219
+; RV64-NEXT: .LBB16_95: # %else263
+; RV64-NEXT: slli a3, a1, 38
+; RV64-NEXT: bgez a3, .LBB16_96
+; RV64-NEXT: j .LBB16_220
+; RV64-NEXT: .LBB16_96: # %else266
+; RV64-NEXT: slli a3, a1, 37
+; RV64-NEXT: bgez a3, .LBB16_97
+; RV64-NEXT: j .LBB16_221
+; RV64-NEXT: .LBB16_97: # %else269
+; RV64-NEXT: slli a3, a1, 36
+; RV64-NEXT: bgez a3, .LBB16_98
+; RV64-NEXT: j .LBB16_222
+; RV64-NEXT: .LBB16_98: # %else272
+; RV64-NEXT: slli a3, a1, 35
+; RV64-NEXT: bgez a3, .LBB16_99
+; RV64-NEXT: j .LBB16_223
+; RV64-NEXT: .LBB16_99: # %else275
+; RV64-NEXT: slli a3, a1, 34
+; RV64-NEXT: bgez a3, .LBB16_100
+; RV64-NEXT: j .LBB16_224
+; RV64-NEXT: .LBB16_100: # %else278
+; RV64-NEXT: slli a3, a1, 33
+; RV64-NEXT: bgez a3, .LBB16_101
+; RV64-NEXT: j .LBB16_225
+; RV64-NEXT: .LBB16_101: # %else281
+; RV64-NEXT: slli a3, a1, 32
+; RV64-NEXT: bgez a3, .LBB16_102
+; RV64-NEXT: j .LBB16_226
+; RV64-NEXT: .LBB16_102: # %else284
+; RV64-NEXT: slli a3, a1, 31
+; RV64-NEXT: bgez a3, .LBB16_104
+; RV64-NEXT: .LBB16_103: # %cond.store286
+; RV64-NEXT: li a3, 64
+; RV64-NEXT: lui a4, 1
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v16, (a4)
+; RV64-NEXT: lh a2, 0(a2)
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: srli a2, a2, 8
+; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: .LBB16_104: # %else287
+; RV64-NEXT: slli a3, a1, 30
+; RV64-NEXT: addi a2, sp, 2018
+; RV64-NEXT: bgez a3, .LBB16_105
+; RV64-NEXT: j .LBB16_227
+; RV64-NEXT: .LBB16_105: # %else290
+; RV64-NEXT: slli a3, a1, 29
+; RV64-NEXT: bgez a3, .LBB16_106
+; RV64-NEXT: j .LBB16_228
+; RV64-NEXT: .LBB16_106: # %else293
+; RV64-NEXT: slli a3, a1, 28
+; RV64-NEXT: bgez a3, .LBB16_107
+; RV64-NEXT: j .LBB16_229
+; RV64-NEXT: .LBB16_107: # %else296
+; RV64-NEXT: slli a3, a1, 27
+; RV64-NEXT: bgez a3, .LBB16_108
+; RV64-NEXT: j .LBB16_230
+; RV64-NEXT: .LBB16_108: # %else299
+; RV64-NEXT: slli a3, a1, 26
+; RV64-NEXT: bgez a3, .LBB16_109
+; RV64-NEXT: j .LBB16_231
+; RV64-NEXT: .LBB16_109: # %else302
+; RV64-NEXT: slli a3, a1, 25
+; RV64-NEXT: bgez a3, .LBB16_110
+; RV64-NEXT: j .LBB16_232
+; RV64-NEXT: .LBB16_110: # %else305
+; RV64-NEXT: slli a3, a1, 24
+; RV64-NEXT: bgez a3, .LBB16_111
+; RV64-NEXT: j .LBB16_233
+; RV64-NEXT: .LBB16_111: # %else308
+; RV64-NEXT: slli a3, a1, 23
+; RV64-NEXT: bgez a3, .LBB16_112
+; RV64-NEXT: j .LBB16_234
+; RV64-NEXT: .LBB16_112: # %else311
+; RV64-NEXT: slli a3, a1, 22
+; RV64-NEXT: bgez a3, .LBB16_113
+; RV64-NEXT: j .LBB16_235
+; RV64-NEXT: .LBB16_113: # %else314
+; RV64-NEXT: slli a3, a1, 21
+; RV64-NEXT: bgez a3, .LBB16_114
+; RV64-NEXT: j .LBB16_236
+; RV64-NEXT: .LBB16_114: # %else317
+; RV64-NEXT: slli a3, a1, 20
+; RV64-NEXT: bgez a3, .LBB16_115
+; RV64-NEXT: j .LBB16_237
+; RV64-NEXT: .LBB16_115: # %else320
+; RV64-NEXT: slli a3, a1, 19
+; RV64-NEXT: bgez a3, .LBB16_116
+; RV64-NEXT: j .LBB16_238
+; RV64-NEXT: .LBB16_116: # %else323
+; RV64-NEXT: slli a3, a1, 18
+; RV64-NEXT: bgez a3, .LBB16_117
+; RV64-NEXT: j .LBB16_239
+; RV64-NEXT: .LBB16_117: # %else326
+; RV64-NEXT: slli a3, a1, 17
+; RV64-NEXT: bgez a3, .LBB16_118
+; RV64-NEXT: j .LBB16_240
+; RV64-NEXT: .LBB16_118: # %else329
+; RV64-NEXT: slli a3, a1, 16
+; RV64-NEXT: bgez a3, .LBB16_119
+; RV64-NEXT: j .LBB16_241
+; RV64-NEXT: .LBB16_119: # %else332
+; RV64-NEXT: slli a3, a1, 15
+; RV64-NEXT: bgez a3, .LBB16_120
+; RV64-NEXT: j .LBB16_242
+; RV64-NEXT: .LBB16_120: # %else335
+; RV64-NEXT: slli a3, a1, 14
+; RV64-NEXT: bgez a3, .LBB16_121
+; RV64-NEXT: j .LBB16_243
+; RV64-NEXT: .LBB16_121: # %else338
+; RV64-NEXT: slli a2, a1, 13
+; RV64-NEXT: bgez a2, .LBB16_122
+; RV64-NEXT: j .LBB16_244
+; RV64-NEXT: .LBB16_122: # %else341
+; RV64-NEXT: slli a2, a1, 12
+; RV64-NEXT: bgez a2, .LBB16_123
+; RV64-NEXT: j .LBB16_245
+; RV64-NEXT: .LBB16_123: # %else344
+; RV64-NEXT: slli a2, a1, 11
+; RV64-NEXT: bgez a2, .LBB16_124
+; RV64-NEXT: j .LBB16_246
+; RV64-NEXT: .LBB16_124: # %else347
+; RV64-NEXT: slli a2, a1, 10
+; RV64-NEXT: bgez a2, .LBB16_125
+; RV64-NEXT: j .LBB16_247
+; RV64-NEXT: .LBB16_125: # %else350
+; RV64-NEXT: slli a2, a1, 9
+; RV64-NEXT: bgez a2, .LBB16_126
+; RV64-NEXT: j .LBB16_248
+; RV64-NEXT: .LBB16_126: # %else353
+; RV64-NEXT: slli a2, a1, 8
+; RV64-NEXT: bgez a2, .LBB16_127
+; RV64-NEXT: j .LBB16_249
+; RV64-NEXT: .LBB16_127: # %else356
+; RV64-NEXT: slli a2, a1, 7
+; RV64-NEXT: bgez a2, .LBB16_128
+; RV64-NEXT: j .LBB16_250
+; RV64-NEXT: .LBB16_128: # %else359
+; RV64-NEXT: slli a2, a1, 6
+; RV64-NEXT: bgez a2, .LBB16_129
+; RV64-NEXT: j .LBB16_251
+; RV64-NEXT: .LBB16_129: # %else362
+; RV64-NEXT: slli a2, a1, 5
+; RV64-NEXT: bgez a2, .LBB16_130
+; RV64-NEXT: j .LBB16_252
+; RV64-NEXT: .LBB16_130: # %else365
+; RV64-NEXT: slli a2, a1, 4
+; RV64-NEXT: bgez a2, .LBB16_131
+; RV64-NEXT: j .LBB16_253
+; RV64-NEXT: .LBB16_131: # %else368
+; RV64-NEXT: slli a2, a1, 3
+; RV64-NEXT: bgez a2, .LBB16_132
+; RV64-NEXT: j .LBB16_254
+; RV64-NEXT: .LBB16_132: # %else371
+; RV64-NEXT: slli a2, a1, 2
+; RV64-NEXT: bgez a2, .LBB16_133
+; RV64-NEXT: j .LBB16_255
+; RV64-NEXT: .LBB16_133: # %else374
+; RV64-NEXT: slli a2, a1, 1
+; RV64-NEXT: bgez a2, .LBB16_134
+; RV64-NEXT: j .LBB16_256
+; RV64-NEXT: .LBB16_134: # %else377
+; RV64-NEXT: bgez a1, .LBB16_136
+; RV64-NEXT: .LBB16_135: # %cond.store379
+; RV64-NEXT: li a1, 64
+; RV64-NEXT: addi a2, sp, 128
+; RV64-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v16, (a2)
+; RV64-NEXT: lh a1, 254(sp)
+; RV64-NEXT: sb a1, 0(a0)
+; RV64-NEXT: srli a1, a1, 8
+; RV64-NEXT: sb a1, 1(a0)
+; RV64-NEXT: .LBB16_136: # %else380
+; RV64-NEXT: lui a0, 3
+; RV64-NEXT: addiw a0, a0, 256
+; RV64-NEXT: sub sp, s0, a0
+; RV64-NEXT: lui a0, 3
+; RV64-NEXT: addiw a0, a0, -1776
+; RV64-NEXT: add sp, sp, a0
+; RV64-NEXT: ld ra, 2024(sp) # 8-byte Folded Reload
+; RV64-NEXT: ld s0, 2016(sp) # 8-byte Folded Reload
+; RV64-NEXT: addi sp, sp, 2032
+; RV64-NEXT: ret
+; RV64-NEXT: .LBB16_137: # %cond.store
+; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; RV64-NEXT: vmv.x.s a1, v8
+; RV64-NEXT: sb a1, 0(a0)
+; RV64-NEXT: srli a1, a1, 8
+; RV64-NEXT: sb a1, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: andi a1, a2, 2
+; RV64-NEXT: beqz a1, .LBB16_2
+; RV64-NEXT: .LBB16_138: # %cond.store1
+; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64-NEXT: vslidedown.vi v24, v8, 1
+; RV64-NEXT: vmv.x.s a1, v24
+; RV64-NEXT: sb a1, 0(a0)
+; RV64-NEXT: srli a1, a1, 8
+; RV64-NEXT: sb a1, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: andi a1, a2, 4
+; RV64-NEXT: beqz a1, .LBB16_3
+; RV64-NEXT: .LBB16_139: # %cond.store4
+; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64-NEXT: vslidedown.vi v24, v8, 2
+; RV64-NEXT: vmv.x.s a1, v24
+; RV64-NEXT: sb a1, 0(a0)
+; RV64-NEXT: srli a1, a1, 8
+; RV64-NEXT: sb a1, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: andi a1, a2, 8
+; RV64-NEXT: beqz a1, .LBB16_4
+; RV64-NEXT: .LBB16_140: # %cond.store7
+; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64-NEXT: vslidedown.vi v24, v8, 3
+; RV64-NEXT: vmv.x.s a1, v24
+; RV64-NEXT: sb a1, 0(a0)
+; RV64-NEXT: srli a1, a1, 8
+; RV64-NEXT: sb a1, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: andi a1, a2, 16
+; RV64-NEXT: beqz a1, .LBB16_5
+; RV64-NEXT: .LBB16_141: # %cond.store10
+; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64-NEXT: vslidedown.vi v24, v8, 4
+; RV64-NEXT: vmv.x.s a1, v24
+; RV64-NEXT: sb a1, 0(a0)
+; RV64-NEXT: srli a1, a1, 8
+; RV64-NEXT: sb a1, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: andi a1, a2, 32
+; RV64-NEXT: beqz a1, .LBB16_6
+; RV64-NEXT: .LBB16_142: # %cond.store13
+; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64-NEXT: vslidedown.vi v24, v8, 5
+; RV64-NEXT: vmv.x.s a1, v24
+; RV64-NEXT: sb a1, 0(a0)
+; RV64-NEXT: srli a1, a1, 8
+; RV64-NEXT: sb a1, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: andi a1, a2, 64
+; RV64-NEXT: beqz a1, .LBB16_7
+; RV64-NEXT: .LBB16_143: # %cond.store16
+; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64-NEXT: vslidedown.vi v24, v8, 6
+; RV64-NEXT: vmv.x.s a1, v24
+; RV64-NEXT: sb a1, 0(a0)
+; RV64-NEXT: srli a1, a1, 8
+; RV64-NEXT: sb a1, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: andi a1, a2, 128
+; RV64-NEXT: beqz a1, .LBB16_8
+; RV64-NEXT: .LBB16_144: # %cond.store19
+; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64-NEXT: vslidedown.vi v24, v8, 7
+; RV64-NEXT: vmv.x.s a1, v24
+; RV64-NEXT: sb a1, 0(a0)
+; RV64-NEXT: srli a1, a1, 8
+; RV64-NEXT: sb a1, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: andi a1, a2, 256
+; RV64-NEXT: beqz a1, .LBB16_9
+; RV64-NEXT: .LBB16_145: # %cond.store22
+; RV64-NEXT: vsetivli zero, 1, e16, m2, ta, ma
+; RV64-NEXT: vslidedown.vi v24, v8, 8
+; RV64-NEXT: vmv.x.s a1, v24
+; RV64-NEXT: sb a1, 0(a0)
+; RV64-NEXT: srli a1, a1, 8
+; RV64-NEXT: sb a1, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: andi a1, a2, 512
+; RV64-NEXT: beqz a1, .LBB16_10
+; RV64-NEXT: .LBB16_146: # %cond.store25
+; RV64-NEXT: vsetivli zero, 1, e16, m2, ta, ma
+; RV64-NEXT: vslidedown.vi v24, v8, 9
+; RV64-NEXT: vmv.x.s a1, v24
+; RV64-NEXT: sb a1, 0(a0)
+; RV64-NEXT: srli a1, a1, 8
+; RV64-NEXT: sb a1, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: andi a1, a2, 1024
+; RV64-NEXT: beqz a1, .LBB16_11
+; RV64-NEXT: .LBB16_147: # %cond.store28
+; RV64-NEXT: vsetivli zero, 1, e16, m2, ta, ma
+; RV64-NEXT: vslidedown.vi v24, v8, 10
+; RV64-NEXT: vmv.x.s a1, v24
+; RV64-NEXT: sb a1, 0(a0)
+; RV64-NEXT: srli a1, a1, 8
+; RV64-NEXT: sb a1, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: slli a1, a2, 52
+; RV64-NEXT: bgez a1, .LBB16_12
+; RV64-NEXT: .LBB16_148: # %cond.store31
+; RV64-NEXT: vsetivli zero, 1, e16, m2, ta, ma
+; RV64-NEXT: vslidedown.vi v24, v8, 11
+; RV64-NEXT: vmv.x.s a1, v24
+; RV64-NEXT: sb a1, 0(a0)
+; RV64-NEXT: srli a1, a1, 8
+; RV64-NEXT: sb a1, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: slli a1, a2, 51
+; RV64-NEXT: bgez a1, .LBB16_13
+; RV64-NEXT: .LBB16_149: # %cond.store34
+; RV64-NEXT: vsetivli zero, 1, e16, m2, ta, ma
+; RV64-NEXT: vslidedown.vi v24, v8, 12
+; RV64-NEXT: vmv.x.s a1, v24
+; RV64-NEXT: sb a1, 0(a0)
+; RV64-NEXT: srli a1, a1, 8
+; RV64-NEXT: sb a1, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: slli a1, a2, 50
+; RV64-NEXT: bgez a1, .LBB16_14
+; RV64-NEXT: .LBB16_150: # %cond.store37
+; RV64-NEXT: vsetivli zero, 1, e16, m2, ta, ma
+; RV64-NEXT: vslidedown.vi v24, v8, 13
+; RV64-NEXT: vmv.x.s a1, v24
+; RV64-NEXT: sb a1, 0(a0)
+; RV64-NEXT: srli a1, a1, 8
+; RV64-NEXT: sb a1, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: slli a1, a2, 49
+; RV64-NEXT: bgez a1, .LBB16_15
+; RV64-NEXT: .LBB16_151: # %cond.store40
+; RV64-NEXT: vsetivli zero, 1, e16, m2, ta, ma
+; RV64-NEXT: vslidedown.vi v24, v8, 14
+; RV64-NEXT: vmv.x.s a1, v24
+; RV64-NEXT: sb a1, 0(a0)
+; RV64-NEXT: srli a1, a1, 8
+; RV64-NEXT: sb a1, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: slli a1, a2, 48
+; RV64-NEXT: bltz a1, .LBB16_16
+; RV64-NEXT: j .LBB16_17
+; RV64-NEXT: .LBB16_152: # %cond.store46
+; RV64-NEXT: li a3, 64
+; RV64-NEXT: lui a4, 3
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v8, (a4)
+; RV64-NEXT: lh a3, 1638(a1)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: slli a3, a2, 46
+; RV64-NEXT: bgez a3, .LBB16_19
+; RV64-NEXT: .LBB16_153: # %cond.store49
+; RV64-NEXT: li a3, 64
+; RV64-NEXT: lui a4, 3
+; RV64-NEXT: addiw a4, a4, -128
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v8, (a4)
+; RV64-NEXT: lh a3, 1512(a1)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: slli a3, a2, 45
+; RV64-NEXT: bgez a3, .LBB16_20
+; RV64-NEXT: .LBB16_154: # %cond.store52
+; RV64-NEXT: li a3, 64
+; RV64-NEXT: lui a4, 3
+; RV64-NEXT: addiw a4, a4, -256
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v8, (a4)
+; RV64-NEXT: lh a3, 1386(a1)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: slli a3, a2, 44
+; RV64-NEXT: bgez a3, .LBB16_21
+; RV64-NEXT: .LBB16_155: # %cond.store55
+; RV64-NEXT: li a3, 64
+; RV64-NEXT: lui a4, 3
+; RV64-NEXT: addiw a4, a4, -384
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v8, (a4)
+; RV64-NEXT: lh a3, 1260(a1)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: slli a3, a2, 43
+; RV64-NEXT: bgez a3, .LBB16_22
+; RV64-NEXT: .LBB16_156: # %cond.store58
+; RV64-NEXT: li a3, 64
+; RV64-NEXT: li a4, 23
+; RV64-NEXT: slli a4, a4, 9
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v8, (a4)
+; RV64-NEXT: lh a3, 1134(a1)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: slli a3, a2, 42
+; RV64-NEXT: bgez a3, .LBB16_23
+; RV64-NEXT: .LBB16_157: # %cond.store61
+; RV64-NEXT: li a3, 64
+; RV64-NEXT: lui a4, 3
+; RV64-NEXT: addiw a4, a4, -640
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v8, (a4)
+; RV64-NEXT: lh a3, 1008(a1)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: slli a3, a2, 41
+; RV64-NEXT: bgez a3, .LBB16_24
+; RV64-NEXT: .LBB16_158: # %cond.store64
+; RV64-NEXT: li a3, 64
+; RV64-NEXT: lui a4, 3
+; RV64-NEXT: addiw a4, a4, -768
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v8, (a4)
+; RV64-NEXT: lh a3, 882(a1)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: slli a3, a2, 40
+; RV64-NEXT: bgez a3, .LBB16_25
+; RV64-NEXT: .LBB16_159: # %cond.store67
+; RV64-NEXT: li a3, 64
+; RV64-NEXT: lui a4, 3
+; RV64-NEXT: addiw a4, a4, -896
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v8, (a4)
+; RV64-NEXT: lh a3, 756(a1)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: slli a3, a2, 39
+; RV64-NEXT: bgez a3, .LBB16_26
+; RV64-NEXT: .LBB16_160: # %cond.store70
+; RV64-NEXT: li a3, 64
+; RV64-NEXT: li a4, 11
+; RV64-NEXT: slli a4, a4, 10
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v8, (a4)
+; RV64-NEXT: lh a3, 630(a1)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: slli a3, a2, 38
+; RV64-NEXT: bgez a3, .LBB16_27
+; RV64-NEXT: .LBB16_161: # %cond.store73
+; RV64-NEXT: li a3, 64
+; RV64-NEXT: lui a4, 3
+; RV64-NEXT: addiw a4, a4, -1152
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v8, (a4)
+; RV64-NEXT: lh a3, 504(a1)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: slli a3, a2, 37
+; RV64-NEXT: bgez a3, .LBB16_28
+; RV64-NEXT: .LBB16_162: # %cond.store76
+; RV64-NEXT: li a3, 64
+; RV64-NEXT: lui a4, 3
+; RV64-NEXT: addiw a4, a4, -1280
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v8, (a4)
+; RV64-NEXT: lh a3, 378(a1)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: slli a3, a2, 36
+; RV64-NEXT: bgez a3, .LBB16_29
+; RV64-NEXT: .LBB16_163: # %cond.store79
+; RV64-NEXT: li a3, 64
+; RV64-NEXT: lui a4, 3
+; RV64-NEXT: addiw a4, a4, -1408
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v8, (a4)
+; RV64-NEXT: lh a3, 252(a1)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: slli a3, a2, 35
+; RV64-NEXT: bgez a3, .LBB16_30
+; RV64-NEXT: .LBB16_164: # %cond.store82
+; RV64-NEXT: li a3, 64
+; RV64-NEXT: li a4, 21
+; RV64-NEXT: slli a4, a4, 9
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v8, (a4)
+; RV64-NEXT: lh a3, 126(a1)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: slli a3, a2, 34
+; RV64-NEXT: bltz a3, .LBB16_31
+; RV64-NEXT: j .LBB16_32
+; RV64-NEXT: .LBB16_165: # %cond.store88
+; RV64-NEXT: li a3, 64
+; RV64-NEXT: lui a4, 3
+; RV64-NEXT: addiw a4, a4, -1792
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v8, (a4)
+; RV64-NEXT: lh a3, 2016(a1)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: slli a3, a2, 32
+; RV64-NEXT: bgez a3, .LBB16_34
+; RV64-NEXT: .LBB16_166: # %cond.store91
+; RV64-NEXT: li a3, 64
+; RV64-NEXT: lui a4, 3
+; RV64-NEXT: addiw a4, a4, -1920
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v8, (a4)
+; RV64-NEXT: lh a3, 1890(a1)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: slli a3, a2, 31
+; RV64-NEXT: bgez a3, .LBB16_35
+; RV64-NEXT: .LBB16_167: # %cond.store94
+; RV64-NEXT: li a3, 64
+; RV64-NEXT: li a4, 5
+; RV64-NEXT: slli a4, a4, 11
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v8, (a4)
+; RV64-NEXT: lh a3, 1764(a1)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: slli a3, a2, 30
+; RV64-NEXT: bgez a3, .LBB16_36
+; RV64-NEXT: .LBB16_168: # %cond.store97
+; RV64-NEXT: li a3, 64
+; RV64-NEXT: lui a4, 2
+; RV64-NEXT: addiw a4, a4, 1920
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v8, (a4)
+; RV64-NEXT: lh a3, 1638(a1)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: slli a3, a2, 29
+; RV64-NEXT: bgez a3, .LBB16_37
+; RV64-NEXT: .LBB16_169: # %cond.store100
+; RV64-NEXT: li a3, 64
+; RV64-NEXT: lui a4, 2
+; RV64-NEXT: addiw a4, a4, 1792
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v8, (a4)
+; RV64-NEXT: lh a3, 1512(a1)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: slli a3, a2, 28
+; RV64-NEXT: bgez a3, .LBB16_38
+; RV64-NEXT: .LBB16_170: # %cond.store103
+; RV64-NEXT: li a3, 64
+; RV64-NEXT: lui a4, 2
+; RV64-NEXT: addiw a4, a4, 1664
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v8, (a4)
+; RV64-NEXT: lh a3, 1386(a1)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: slli a3, a2, 27
+; RV64-NEXT: bgez a3, .LBB16_39
+; RV64-NEXT: .LBB16_171: # %cond.store106
+; RV64-NEXT: li a3, 64
+; RV64-NEXT: li a4, 19
+; RV64-NEXT: slli a4, a4, 9
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v8, (a4)
+; RV64-NEXT: lh a3, 1260(a1)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: slli a3, a2, 26
+; RV64-NEXT: bgez a3, .LBB16_40
+; RV64-NEXT: .LBB16_172: # %cond.store109
+; RV64-NEXT: li a3, 64
+; RV64-NEXT: lui a4, 2
+; RV64-NEXT: addiw a4, a4, 1408
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v8, (a4)
+; RV64-NEXT: lh a3, 1134(a1)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: slli a3, a2, 25
+; RV64-NEXT: bgez a3, .LBB16_41
+; RV64-NEXT: .LBB16_173: # %cond.store112
+; RV64-NEXT: li a3, 64
+; RV64-NEXT: lui a4, 2
+; RV64-NEXT: addiw a4, a4, 1280
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v8, (a4)
+; RV64-NEXT: lh a3, 1008(a1)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: slli a3, a2, 24
+; RV64-NEXT: bgez a3, .LBB16_42
+; RV64-NEXT: .LBB16_174: # %cond.store115
+; RV64-NEXT: li a3, 64
+; RV64-NEXT: lui a4, 2
+; RV64-NEXT: addiw a4, a4, 1152
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v8, (a4)
+; RV64-NEXT: lh a3, 882(a1)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: slli a3, a2, 23
+; RV64-NEXT: bgez a3, .LBB16_43
+; RV64-NEXT: .LBB16_175: # %cond.store118
+; RV64-NEXT: li a3, 64
+; RV64-NEXT: li a4, 9
+; RV64-NEXT: slli a4, a4, 10
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v8, (a4)
+; RV64-NEXT: lh a3, 756(a1)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: slli a3, a2, 22
+; RV64-NEXT: bgez a3, .LBB16_44
+; RV64-NEXT: .LBB16_176: # %cond.store121
+; RV64-NEXT: li a3, 64
+; RV64-NEXT: lui a4, 2
+; RV64-NEXT: addiw a4, a4, 896
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v8, (a4)
+; RV64-NEXT: lh a3, 630(a1)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: slli a3, a2, 21
+; RV64-NEXT: bgez a3, .LBB16_45
+; RV64-NEXT: .LBB16_177: # %cond.store124
+; RV64-NEXT: li a3, 64
+; RV64-NEXT: lui a4, 2
+; RV64-NEXT: addiw a4, a4, 768
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v8, (a4)
+; RV64-NEXT: lh a3, 504(a1)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: slli a3, a2, 20
+; RV64-NEXT: bgez a3, .LBB16_46
+; RV64-NEXT: .LBB16_178: # %cond.store127
+; RV64-NEXT: li a3, 64
+; RV64-NEXT: lui a4, 2
+; RV64-NEXT: addiw a4, a4, 640
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v8, (a4)
+; RV64-NEXT: lh a3, 378(a1)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: slli a3, a2, 19
+; RV64-NEXT: bgez a3, .LBB16_47
+; RV64-NEXT: .LBB16_179: # %cond.store130
+; RV64-NEXT: li a3, 64
+; RV64-NEXT: li a4, 17
+; RV64-NEXT: slli a4, a4, 9
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v8, (a4)
+; RV64-NEXT: lh a3, 252(a1)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: slli a3, a2, 18
+; RV64-NEXT: bgez a3, .LBB16_48
+; RV64-NEXT: .LBB16_180: # %cond.store133
+; RV64-NEXT: li a3, 64
+; RV64-NEXT: lui a4, 2
+; RV64-NEXT: addiw a4, a4, 384
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v8, (a4)
+; RV64-NEXT: lh a3, 126(a1)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: slli a3, a2, 17
+; RV64-NEXT: bltz a3, .LBB16_49
+; RV64-NEXT: j .LBB16_50
+; RV64-NEXT: .LBB16_181: # %cond.store139
+; RV64-NEXT: li a1, 64
+; RV64-NEXT: lui a4, 2
+; RV64-NEXT: addiw a4, a4, 128
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v8, (a4)
+; RV64-NEXT: lh a1, 2016(a3)
+; RV64-NEXT: sb a1, 0(a0)
+; RV64-NEXT: srli a1, a1, 8
+; RV64-NEXT: sb a1, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: slli a1, a2, 15
+; RV64-NEXT: bgez a1, .LBB16_52
+; RV64-NEXT: .LBB16_182: # %cond.store142
+; RV64-NEXT: li a1, 64
+; RV64-NEXT: lui a4, 2
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v8, (a4)
+; RV64-NEXT: lh a1, 1890(a3)
+; RV64-NEXT: sb a1, 0(a0)
+; RV64-NEXT: srli a1, a1, 8
+; RV64-NEXT: sb a1, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: slli a1, a2, 14
+; RV64-NEXT: bgez a1, .LBB16_53
+; RV64-NEXT: .LBB16_183: # %cond.store145
+; RV64-NEXT: li a1, 64
+; RV64-NEXT: lui a4, 2
+; RV64-NEXT: addiw a4, a4, -128
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v8, (a4)
+; RV64-NEXT: lh a1, 1764(a3)
+; RV64-NEXT: sb a1, 0(a0)
+; RV64-NEXT: srli a1, a1, 8
+; RV64-NEXT: sb a1, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: slli a1, a2, 13
+; RV64-NEXT: bgez a1, .LBB16_54
+; RV64-NEXT: .LBB16_184: # %cond.store148
+; RV64-NEXT: li a1, 64
+; RV64-NEXT: li a4, 31
+; RV64-NEXT: slli a4, a4, 8
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v8, (a4)
+; RV64-NEXT: lh a1, 1638(a3)
+; RV64-NEXT: sb a1, 0(a0)
+; RV64-NEXT: srli a1, a1, 8
+; RV64-NEXT: sb a1, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: slli a1, a2, 12
+; RV64-NEXT: bgez a1, .LBB16_55
+; RV64-NEXT: .LBB16_185: # %cond.store151
+; RV64-NEXT: li a1, 64
+; RV64-NEXT: lui a4, 2
+; RV64-NEXT: addiw a4, a4, -384
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v8, (a4)
+; RV64-NEXT: lh a1, 1512(a3)
+; RV64-NEXT: sb a1, 0(a0)
+; RV64-NEXT: srli a1, a1, 8
+; RV64-NEXT: sb a1, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: slli a1, a2, 11
+; RV64-NEXT: bgez a1, .LBB16_56
+; RV64-NEXT: .LBB16_186: # %cond.store154
+; RV64-NEXT: li a1, 64
+; RV64-NEXT: li a4, 15
+; RV64-NEXT: slli a4, a4, 9
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v8, (a4)
+; RV64-NEXT: lh a1, 1386(a3)
+; RV64-NEXT: sb a1, 0(a0)
+; RV64-NEXT: srli a1, a1, 8
+; RV64-NEXT: sb a1, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: slli a1, a2, 10
+; RV64-NEXT: bgez a1, .LBB16_57
+; RV64-NEXT: .LBB16_187: # %cond.store157
+; RV64-NEXT: li a1, 64
+; RV64-NEXT: lui a4, 2
+; RV64-NEXT: addiw a4, a4, -640
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v8, (a4)
+; RV64-NEXT: lh a1, 1260(a3)
+; RV64-NEXT: sb a1, 0(a0)
+; RV64-NEXT: srli a1, a1, 8
+; RV64-NEXT: sb a1, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: slli a1, a2, 9
+; RV64-NEXT: bgez a1, .LBB16_58
+; RV64-NEXT: .LBB16_188: # %cond.store160
+; RV64-NEXT: li a1, 64
+; RV64-NEXT: li a4, 29
+; RV64-NEXT: slli a4, a4, 8
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v8, (a4)
+; RV64-NEXT: lh a1, 1134(a3)
+; RV64-NEXT: sb a1, 0(a0)
+; RV64-NEXT: srli a1, a1, 8
+; RV64-NEXT: sb a1, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: slli a1, a2, 8
+; RV64-NEXT: bgez a1, .LBB16_59
+; RV64-NEXT: .LBB16_189: # %cond.store163
+; RV64-NEXT: li a1, 64
+; RV64-NEXT: lui a4, 2
+; RV64-NEXT: addiw a4, a4, -896
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v8, (a4)
+; RV64-NEXT: lh a1, 1008(a3)
+; RV64-NEXT: sb a1, 0(a0)
+; RV64-NEXT: srli a1, a1, 8
+; RV64-NEXT: sb a1, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: slli a1, a2, 7
+; RV64-NEXT: bgez a1, .LBB16_60
+; RV64-NEXT: .LBB16_190: # %cond.store166
+; RV64-NEXT: li a1, 64
+; RV64-NEXT: li a4, 7
+; RV64-NEXT: slli a4, a4, 10
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v8, (a4)
+; RV64-NEXT: lh a1, 882(a3)
+; RV64-NEXT: sb a1, 0(a0)
+; RV64-NEXT: srli a1, a1, 8
+; RV64-NEXT: sb a1, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: slli a1, a2, 6
+; RV64-NEXT: bgez a1, .LBB16_61
+; RV64-NEXT: .LBB16_191: # %cond.store169
+; RV64-NEXT: li a1, 64
+; RV64-NEXT: lui a4, 2
+; RV64-NEXT: addiw a4, a4, -1152
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v8, (a4)
+; RV64-NEXT: lh a1, 756(a3)
+; RV64-NEXT: sb a1, 0(a0)
+; RV64-NEXT: srli a1, a1, 8
+; RV64-NEXT: sb a1, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: slli a1, a2, 5
+; RV64-NEXT: bgez a1, .LBB16_62
+; RV64-NEXT: .LBB16_192: # %cond.store172
+; RV64-NEXT: li a1, 64
+; RV64-NEXT: li a4, 27
+; RV64-NEXT: slli a4, a4, 8
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v8, (a4)
+; RV64-NEXT: lh a1, 630(a3)
+; RV64-NEXT: sb a1, 0(a0)
+; RV64-NEXT: srli a1, a1, 8
+; RV64-NEXT: sb a1, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: slli a1, a2, 4
+; RV64-NEXT: bgez a1, .LBB16_63
+; RV64-NEXT: .LBB16_193: # %cond.store175
+; RV64-NEXT: li a1, 64
+; RV64-NEXT: lui a4, 2
+; RV64-NEXT: addiw a4, a4, -1408
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v8, (a4)
+; RV64-NEXT: lh a1, 504(a3)
+; RV64-NEXT: sb a1, 0(a0)
+; RV64-NEXT: srli a1, a1, 8
+; RV64-NEXT: sb a1, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: slli a1, a2, 3
+; RV64-NEXT: bgez a1, .LBB16_64
+; RV64-NEXT: .LBB16_194: # %cond.store178
+; RV64-NEXT: li a1, 64
+; RV64-NEXT: li a4, 13
+; RV64-NEXT: slli a4, a4, 9
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v8, (a4)
+; RV64-NEXT: lh a1, 378(a3)
+; RV64-NEXT: sb a1, 0(a0)
+; RV64-NEXT: srli a1, a1, 8
+; RV64-NEXT: sb a1, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: slli a1, a2, 2
+; RV64-NEXT: bltz a1, .LBB16_65
+; RV64-NEXT: j .LBB16_66
+; RV64-NEXT: .LBB16_195: # %cond.store187
+; RV64-NEXT: li a2, 64
+; RV64-NEXT: lui a4, 2
+; RV64-NEXT: addiw a4, a4, -1920
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a2, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v8, (a4)
+; RV64-NEXT: lh a2, 0(a3)
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: srli a2, a2, 8
+; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: andi a2, a1, 1
+; RV64-NEXT: beqz a2, .LBB16_70
+; RV64-NEXT: .LBB16_196: # %cond.store190
+; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64-NEXT: vmv.x.s a2, v16
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: srli a2, a2, 8
+; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: andi a2, a1, 2
+; RV64-NEXT: beqz a2, .LBB16_71
+; RV64-NEXT: .LBB16_197: # %cond.store193
+; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64-NEXT: vslidedown.vi v8, v16, 1
+; RV64-NEXT: vmv.x.s a2, v8
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: srli a2, a2, 8
+; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: andi a2, a1, 4
+; RV64-NEXT: beqz a2, .LBB16_72
+; RV64-NEXT: .LBB16_198: # %cond.store196
+; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64-NEXT: vslidedown.vi v8, v16, 2
+; RV64-NEXT: vmv.x.s a2, v8
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: srli a2, a2, 8
+; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: andi a2, a1, 8
+; RV64-NEXT: beqz a2, .LBB16_73
+; RV64-NEXT: .LBB16_199: # %cond.store199
+; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64-NEXT: vslidedown.vi v8, v16, 3
+; RV64-NEXT: vmv.x.s a2, v8
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: srli a2, a2, 8
+; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: andi a2, a1, 16
+; RV64-NEXT: beqz a2, .LBB16_74
+; RV64-NEXT: .LBB16_200: # %cond.store202
+; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64-NEXT: vslidedown.vi v8, v16, 4
+; RV64-NEXT: vmv.x.s a2, v8
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: srli a2, a2, 8
+; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: andi a2, a1, 32
+; RV64-NEXT: beqz a2, .LBB16_75
+; RV64-NEXT: .LBB16_201: # %cond.store205
+; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64-NEXT: vslidedown.vi v8, v16, 5
+; RV64-NEXT: vmv.x.s a2, v8
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: srli a2, a2, 8
+; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: andi a2, a1, 64
+; RV64-NEXT: beqz a2, .LBB16_76
+; RV64-NEXT: .LBB16_202: # %cond.store208
+; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64-NEXT: vslidedown.vi v8, v16, 6
+; RV64-NEXT: vmv.x.s a2, v8
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: srli a2, a2, 8
+; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: andi a2, a1, 128
+; RV64-NEXT: beqz a2, .LBB16_77
+; RV64-NEXT: .LBB16_203: # %cond.store211
+; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64-NEXT: vslidedown.vi v8, v16, 7
+; RV64-NEXT: vmv.x.s a2, v8
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: srli a2, a2, 8
+; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: andi a2, a1, 256
+; RV64-NEXT: beqz a2, .LBB16_78
+; RV64-NEXT: .LBB16_204: # %cond.store214
+; RV64-NEXT: vsetivli zero, 1, e16, m2, ta, ma
+; RV64-NEXT: vslidedown.vi v8, v16, 8
+; RV64-NEXT: vmv.x.s a2, v8
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: srli a2, a2, 8
+; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: andi a2, a1, 512
+; RV64-NEXT: beqz a2, .LBB16_79
+; RV64-NEXT: .LBB16_205: # %cond.store217
+; RV64-NEXT: vsetivli zero, 1, e16, m2, ta, ma
+; RV64-NEXT: vslidedown.vi v8, v16, 9
+; RV64-NEXT: vmv.x.s a2, v8
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: srli a2, a2, 8
+; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: andi a2, a1, 1024
+; RV64-NEXT: beqz a2, .LBB16_80
+; RV64-NEXT: .LBB16_206: # %cond.store220
+; RV64-NEXT: vsetivli zero, 1, e16, m2, ta, ma
+; RV64-NEXT: vslidedown.vi v8, v16, 10
+; RV64-NEXT: vmv.x.s a2, v8
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: srli a2, a2, 8
+; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: slli a2, a1, 52
+; RV64-NEXT: bgez a2, .LBB16_81
+; RV64-NEXT: .LBB16_207: # %cond.store223
+; RV64-NEXT: vsetivli zero, 1, e16, m2, ta, ma
+; RV64-NEXT: vslidedown.vi v8, v16, 11
+; RV64-NEXT: vmv.x.s a2, v8
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: srli a2, a2, 8
+; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: slli a2, a1, 51
+; RV64-NEXT: bgez a2, .LBB16_82
+; RV64-NEXT: .LBB16_208: # %cond.store226
+; RV64-NEXT: vsetivli zero, 1, e16, m2, ta, ma
+; RV64-NEXT: vslidedown.vi v8, v16, 12
+; RV64-NEXT: vmv.x.s a2, v8
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: srli a2, a2, 8
+; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: slli a2, a1, 50
+; RV64-NEXT: bgez a2, .LBB16_83
+; RV64-NEXT: .LBB16_209: # %cond.store229
+; RV64-NEXT: vsetivli zero, 1, e16, m2, ta, ma
+; RV64-NEXT: vslidedown.vi v8, v16, 13
+; RV64-NEXT: vmv.x.s a2, v8
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: srli a2, a2, 8
+; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: slli a2, a1, 49
+; RV64-NEXT: bgez a2, .LBB16_84
+; RV64-NEXT: .LBB16_210: # %cond.store232
+; RV64-NEXT: vsetivli zero, 1, e16, m2, ta, ma
+; RV64-NEXT: vslidedown.vi v8, v16, 14
+; RV64-NEXT: vmv.x.s a2, v8
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: srli a2, a2, 8
+; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: slli a2, a1, 48
+; RV64-NEXT: bltz a2, .LBB16_85
+; RV64-NEXT: j .LBB16_86
+; RV64-NEXT: .LBB16_211: # %cond.store238
+; RV64-NEXT: li a3, 64
+; RV64-NEXT: li a4, 3
+; RV64-NEXT: slli a4, a4, 11
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v16, (a4)
+; RV64-NEXT: lh a3, 2016(a2)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: slli a3, a1, 46
+; RV64-NEXT: bgez a3, .LBB16_88
+; RV64-NEXT: .LBB16_212: # %cond.store241
+; RV64-NEXT: li a3, 64
+; RV64-NEXT: lui a4, 1
+; RV64-NEXT: addiw a4, a4, 1920
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v16, (a4)
+; RV64-NEXT: lh a3, 1890(a2)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: slli a3, a1, 45
+; RV64-NEXT: bltz a3, .LBB16_213
+; RV64-NEXT: j .LBB16_89
+; RV64-NEXT: .LBB16_213: # %cond.store244
+; RV64-NEXT: li a3, 64
+; RV64-NEXT: li a4, 23
+; RV64-NEXT: slli a4, a4, 8
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v16, (a4)
+; RV64-NEXT: lh a3, 1764(a2)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: slli a3, a1, 44
+; RV64-NEXT: bltz a3, .LBB16_214
+; RV64-NEXT: j .LBB16_90
+; RV64-NEXT: .LBB16_214: # %cond.store247
+; RV64-NEXT: li a3, 64
+; RV64-NEXT: lui a4, 1
+; RV64-NEXT: addiw a4, a4, 1664
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v16, (a4)
+; RV64-NEXT: lh a3, 1638(a2)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: slli a3, a1, 43
+; RV64-NEXT: bltz a3, .LBB16_215
+; RV64-NEXT: j .LBB16_91
+; RV64-NEXT: .LBB16_215: # %cond.store250
+; RV64-NEXT: li a3, 64
+; RV64-NEXT: li a4, 11
+; RV64-NEXT: slli a4, a4, 9
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v16, (a4)
+; RV64-NEXT: lh a3, 1512(a2)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: slli a3, a1, 42
+; RV64-NEXT: bltz a3, .LBB16_216
+; RV64-NEXT: j .LBB16_92
+; RV64-NEXT: .LBB16_216: # %cond.store253
+; RV64-NEXT: li a3, 64
+; RV64-NEXT: lui a4, 1
+; RV64-NEXT: addiw a4, a4, 1408
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v16, (a4)
+; RV64-NEXT: lh a3, 1386(a2)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: slli a3, a1, 41
+; RV64-NEXT: bltz a3, .LBB16_217
+; RV64-NEXT: j .LBB16_93
+; RV64-NEXT: .LBB16_217: # %cond.store256
+; RV64-NEXT: li a3, 64
+; RV64-NEXT: li a4, 21
+; RV64-NEXT: slli a4, a4, 8
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v16, (a4)
+; RV64-NEXT: lh a3, 1260(a2)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: slli a3, a1, 40
+; RV64-NEXT: bltz a3, .LBB16_218
+; RV64-NEXT: j .LBB16_94
+; RV64-NEXT: .LBB16_218: # %cond.store259
+; RV64-NEXT: li a3, 64
+; RV64-NEXT: lui a4, 1
+; RV64-NEXT: addiw a4, a4, 1152
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v16, (a4)
+; RV64-NEXT: lh a3, 1134(a2)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: slli a3, a1, 39
+; RV64-NEXT: bltz a3, .LBB16_219
+; RV64-NEXT: j .LBB16_95
+; RV64-NEXT: .LBB16_219: # %cond.store262
+; RV64-NEXT: li a3, 64
+; RV64-NEXT: li a4, 5
+; RV64-NEXT: slli a4, a4, 10
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v16, (a4)
+; RV64-NEXT: lh a3, 1008(a2)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: slli a3, a1, 38
+; RV64-NEXT: bltz a3, .LBB16_220
+; RV64-NEXT: j .LBB16_96
+; RV64-NEXT: .LBB16_220: # %cond.store265
+; RV64-NEXT: li a3, 64
+; RV64-NEXT: lui a4, 1
+; RV64-NEXT: addiw a4, a4, 896
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v16, (a4)
+; RV64-NEXT: lh a3, 882(a2)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: slli a3, a1, 37
+; RV64-NEXT: bltz a3, .LBB16_221
+; RV64-NEXT: j .LBB16_97
+; RV64-NEXT: .LBB16_221: # %cond.store268
+; RV64-NEXT: li a3, 64
+; RV64-NEXT: li a4, 19
+; RV64-NEXT: slli a4, a4, 8
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v16, (a4)
+; RV64-NEXT: lh a3, 756(a2)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: slli a3, a1, 36
+; RV64-NEXT: bltz a3, .LBB16_222
+; RV64-NEXT: j .LBB16_98
+; RV64-NEXT: .LBB16_222: # %cond.store271
+; RV64-NEXT: li a3, 64
+; RV64-NEXT: lui a4, 1
+; RV64-NEXT: addiw a4, a4, 640
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v16, (a4)
+; RV64-NEXT: lh a3, 630(a2)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: slli a3, a1, 35
+; RV64-NEXT: bltz a3, .LBB16_223
+; RV64-NEXT: j .LBB16_99
+; RV64-NEXT: .LBB16_223: # %cond.store274
+; RV64-NEXT: li a3, 64
+; RV64-NEXT: li a4, 9
+; RV64-NEXT: slli a4, a4, 9
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v16, (a4)
+; RV64-NEXT: lh a3, 504(a2)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: slli a3, a1, 34
+; RV64-NEXT: bltz a3, .LBB16_224
+; RV64-NEXT: j .LBB16_100
+; RV64-NEXT: .LBB16_224: # %cond.store277
+; RV64-NEXT: li a3, 64
+; RV64-NEXT: lui a4, 1
+; RV64-NEXT: addiw a4, a4, 384
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v16, (a4)
+; RV64-NEXT: lh a3, 378(a2)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: slli a3, a1, 33
+; RV64-NEXT: bltz a3, .LBB16_225
+; RV64-NEXT: j .LBB16_101
+; RV64-NEXT: .LBB16_225: # %cond.store280
+; RV64-NEXT: li a3, 64
+; RV64-NEXT: li a4, 17
+; RV64-NEXT: slli a4, a4, 8
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v16, (a4)
+; RV64-NEXT: lh a3, 252(a2)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: slli a3, a1, 32
+; RV64-NEXT: bltz a3, .LBB16_226
+; RV64-NEXT: j .LBB16_102
+; RV64-NEXT: .LBB16_226: # %cond.store283
+; RV64-NEXT: li a3, 64
+; RV64-NEXT: lui a4, 1
+; RV64-NEXT: addiw a4, a4, 128
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v16, (a4)
+; RV64-NEXT: lh a3, 126(a2)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: slli a3, a1, 31
+; RV64-NEXT: bgez a3, .LBB16_257
+; RV64-NEXT: j .LBB16_103
+; RV64-NEXT: .LBB16_257: # %cond.store283
+; RV64-NEXT: j .LBB16_104
+; RV64-NEXT: .LBB16_227: # %cond.store289
+; RV64-NEXT: li a3, 64
+; RV64-NEXT: addi a4, sp, 2047
+; RV64-NEXT: addi a4, a4, 1921
+; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v16, (a4)
+; RV64-NEXT: lh a3, 2016(a2)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: slli a3, a1, 29
+; RV64-NEXT: bltz a3, .LBB16_228
+; RV64-NEXT: j .LBB16_106
+; RV64-NEXT: .LBB16_228: # %cond.store292
+; RV64-NEXT: li a3, 64
+; RV64-NEXT: addi a4, sp, 2047
+; RV64-NEXT: addi a4, a4, 1793
+; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v16, (a4)
+; RV64-NEXT: lh a3, 1890(a2)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: slli a3, a1, 28
+; RV64-NEXT: bltz a3, .LBB16_229
+; RV64-NEXT: j .LBB16_107
+; RV64-NEXT: .LBB16_229: # %cond.store295
+; RV64-NEXT: li a3, 64
+; RV64-NEXT: addi a4, sp, 2047
+; RV64-NEXT: addi a4, a4, 1665
+; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v16, (a4)
+; RV64-NEXT: lh a3, 1764(a2)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: slli a3, a1, 27
+; RV64-NEXT: bltz a3, .LBB16_230
+; RV64-NEXT: j .LBB16_108
+; RV64-NEXT: .LBB16_230: # %cond.store298
+; RV64-NEXT: li a3, 64
+; RV64-NEXT: addi a4, sp, 2047
+; RV64-NEXT: addi a4, a4, 1537
+; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v16, (a4)
+; RV64-NEXT: lh a3, 1638(a2)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: slli a3, a1, 26
+; RV64-NEXT: bltz a3, .LBB16_231
+; RV64-NEXT: j .LBB16_109
+; RV64-NEXT: .LBB16_231: # %cond.store301
+; RV64-NEXT: li a3, 64
+; RV64-NEXT: addi a4, sp, 2047
+; RV64-NEXT: addi a4, a4, 1409
+; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v16, (a4)
+; RV64-NEXT: lh a3, 1512(a2)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: slli a3, a1, 25
+; RV64-NEXT: bltz a3, .LBB16_232
+; RV64-NEXT: j .LBB16_110
+; RV64-NEXT: .LBB16_232: # %cond.store304
+; RV64-NEXT: li a3, 64
+; RV64-NEXT: addi a4, sp, 2047
+; RV64-NEXT: addi a4, a4, 1281
+; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v16, (a4)
+; RV64-NEXT: lh a3, 1386(a2)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: slli a3, a1, 24
+; RV64-NEXT: bltz a3, .LBB16_233
+; RV64-NEXT: j .LBB16_111
+; RV64-NEXT: .LBB16_233: # %cond.store307
+; RV64-NEXT: li a3, 64
+; RV64-NEXT: addi a4, sp, 2047
+; RV64-NEXT: addi a4, a4, 1153
+; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v16, (a4)
+; RV64-NEXT: lh a3, 1260(a2)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: slli a3, a1, 23
+; RV64-NEXT: bltz a3, .LBB16_234
+; RV64-NEXT: j .LBB16_112
+; RV64-NEXT: .LBB16_234: # %cond.store310
+; RV64-NEXT: li a3, 64
+; RV64-NEXT: addi a4, sp, 2047
+; RV64-NEXT: addi a4, a4, 1025
+; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v16, (a4)
+; RV64-NEXT: lh a3, 1134(a2)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: slli a3, a1, 22
+; RV64-NEXT: bltz a3, .LBB16_235
+; RV64-NEXT: j .LBB16_113
+; RV64-NEXT: .LBB16_235: # %cond.store313
+; RV64-NEXT: li a3, 64
+; RV64-NEXT: addi a4, sp, 2047
+; RV64-NEXT: addi a4, a4, 897
+; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v16, (a4)
+; RV64-NEXT: lh a3, 1008(a2)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: slli a3, a1, 21
+; RV64-NEXT: bltz a3, .LBB16_236
+; RV64-NEXT: j .LBB16_114
+; RV64-NEXT: .LBB16_236: # %cond.store316
+; RV64-NEXT: li a3, 64
+; RV64-NEXT: addi a4, sp, 2047
+; RV64-NEXT: addi a4, a4, 769
+; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v16, (a4)
+; RV64-NEXT: lh a3, 882(a2)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: slli a3, a1, 20
+; RV64-NEXT: bltz a3, .LBB16_237
+; RV64-NEXT: j .LBB16_115
+; RV64-NEXT: .LBB16_237: # %cond.store319
+; RV64-NEXT: li a3, 64
+; RV64-NEXT: addi a4, sp, 2047
+; RV64-NEXT: addi a4, a4, 641
+; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v16, (a4)
+; RV64-NEXT: lh a3, 756(a2)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: slli a3, a1, 19
+; RV64-NEXT: bltz a3, .LBB16_238
+; RV64-NEXT: j .LBB16_116
+; RV64-NEXT: .LBB16_238: # %cond.store322
+; RV64-NEXT: li a3, 64
+; RV64-NEXT: addi a4, sp, 2047
+; RV64-NEXT: addi a4, a4, 513
+; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v16, (a4)
+; RV64-NEXT: lh a3, 630(a2)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: slli a3, a1, 18
+; RV64-NEXT: bltz a3, .LBB16_239
+; RV64-NEXT: j .LBB16_117
+; RV64-NEXT: .LBB16_239: # %cond.store325
+; RV64-NEXT: li a3, 64
+; RV64-NEXT: addi a4, sp, 2047
+; RV64-NEXT: addi a4, a4, 385
+; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v16, (a4)
+; RV64-NEXT: lh a3, 504(a2)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: slli a3, a1, 17
+; RV64-NEXT: bltz a3, .LBB16_240
+; RV64-NEXT: j .LBB16_118
+; RV64-NEXT: .LBB16_240: # %cond.store328
+; RV64-NEXT: li a3, 64
+; RV64-NEXT: addi a4, sp, 2047
+; RV64-NEXT: addi a4, a4, 257
+; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v16, (a4)
+; RV64-NEXT: lh a3, 378(a2)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: slli a3, a1, 16
+; RV64-NEXT: bltz a3, .LBB16_241
+; RV64-NEXT: j .LBB16_119
+; RV64-NEXT: .LBB16_241: # %cond.store331
+; RV64-NEXT: li a3, 64
+; RV64-NEXT: addi a4, sp, 2047
+; RV64-NEXT: addi a4, a4, 129
+; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v16, (a4)
+; RV64-NEXT: lh a3, 252(a2)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: slli a3, a1, 15
+; RV64-NEXT: bltz a3, .LBB16_242
+; RV64-NEXT: j .LBB16_120
+; RV64-NEXT: .LBB16_242: # %cond.store334
+; RV64-NEXT: li a3, 64
+; RV64-NEXT: addi a4, sp, 2047
+; RV64-NEXT: addi a4, a4, 1
+; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v16, (a4)
+; RV64-NEXT: lh a3, 126(a2)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: slli a3, a1, 14
+; RV64-NEXT: bltz a3, .LBB16_243
+; RV64-NEXT: j .LBB16_121
+; RV64-NEXT: .LBB16_243: # %cond.store337
+; RV64-NEXT: li a3, 64
+; RV64-NEXT: addi a4, sp, 1920
+; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v16, (a4)
+; RV64-NEXT: lh a2, 0(a2)
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: srli a2, a2, 8
+; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: slli a2, a1, 13
+; RV64-NEXT: bltz a2, .LBB16_244
+; RV64-NEXT: j .LBB16_122
+; RV64-NEXT: .LBB16_244: # %cond.store340
+; RV64-NEXT: li a2, 64
+; RV64-NEXT: addi a3, sp, 1792
+; RV64-NEXT: vsetvli zero, a2, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v16, (a3)
+; RV64-NEXT: lh a2, 1892(sp)
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: srli a2, a2, 8
+; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: slli a2, a1, 12
+; RV64-NEXT: bltz a2, .LBB16_245
+; RV64-NEXT: j .LBB16_123
+; RV64-NEXT: .LBB16_245: # %cond.store343
+; RV64-NEXT: li a2, 64
+; RV64-NEXT: addi a3, sp, 1664
+; RV64-NEXT: vsetvli zero, a2, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v16, (a3)
+; RV64-NEXT: lh a2, 1766(sp)
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: srli a2, a2, 8
+; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: slli a2, a1, 11
+; RV64-NEXT: bltz a2, .LBB16_246
+; RV64-NEXT: j .LBB16_124
+; RV64-NEXT: .LBB16_246: # %cond.store346
+; RV64-NEXT: li a2, 64
+; RV64-NEXT: addi a3, sp, 1536
+; RV64-NEXT: vsetvli zero, a2, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v16, (a3)
+; RV64-NEXT: lh a2, 1640(sp)
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: srli a2, a2, 8
+; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: slli a2, a1, 10
+; RV64-NEXT: bltz a2, .LBB16_247
+; RV64-NEXT: j .LBB16_125
+; RV64-NEXT: .LBB16_247: # %cond.store349
+; RV64-NEXT: li a2, 64
+; RV64-NEXT: addi a3, sp, 1408
+; RV64-NEXT: vsetvli zero, a2, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v16, (a3)
+; RV64-NEXT: lh a2, 1514(sp)
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: srli a2, a2, 8
+; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: slli a2, a1, 9
+; RV64-NEXT: bltz a2, .LBB16_248
+; RV64-NEXT: j .LBB16_126
+; RV64-NEXT: .LBB16_248: # %cond.store352
+; RV64-NEXT: li a2, 64
+; RV64-NEXT: addi a3, sp, 1280
+; RV64-NEXT: vsetvli zero, a2, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v16, (a3)
+; RV64-NEXT: lh a2, 1388(sp)
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: srli a2, a2, 8
+; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: slli a2, a1, 8
+; RV64-NEXT: bltz a2, .LBB16_249
+; RV64-NEXT: j .LBB16_127
+; RV64-NEXT: .LBB16_249: # %cond.store355
+; RV64-NEXT: li a2, 64
+; RV64-NEXT: addi a3, sp, 1152
+; RV64-NEXT: vsetvli zero, a2, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v16, (a3)
+; RV64-NEXT: lh a2, 1262(sp)
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: srli a2, a2, 8
+; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: slli a2, a1, 7
+; RV64-NEXT: bltz a2, .LBB16_250
+; RV64-NEXT: j .LBB16_128
+; RV64-NEXT: .LBB16_250: # %cond.store358
+; RV64-NEXT: li a2, 64
+; RV64-NEXT: addi a3, sp, 1024
+; RV64-NEXT: vsetvli zero, a2, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v16, (a3)
+; RV64-NEXT: lh a2, 1136(sp)
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: srli a2, a2, 8
+; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: slli a2, a1, 6
+; RV64-NEXT: bltz a2, .LBB16_251
+; RV64-NEXT: j .LBB16_129
+; RV64-NEXT: .LBB16_251: # %cond.store361
+; RV64-NEXT: li a2, 64
+; RV64-NEXT: addi a3, sp, 896
+; RV64-NEXT: vsetvli zero, a2, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v16, (a3)
+; RV64-NEXT: lh a2, 1010(sp)
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: srli a2, a2, 8
+; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: slli a2, a1, 5
+; RV64-NEXT: bltz a2, .LBB16_252
+; RV64-NEXT: j .LBB16_130
+; RV64-NEXT: .LBB16_252: # %cond.store364
+; RV64-NEXT: li a2, 64
+; RV64-NEXT: addi a3, sp, 768
+; RV64-NEXT: vsetvli zero, a2, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v16, (a3)
+; RV64-NEXT: lh a2, 884(sp)
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: srli a2, a2, 8
+; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: slli a2, a1, 4
+; RV64-NEXT: bltz a2, .LBB16_253
+; RV64-NEXT: j .LBB16_131
+; RV64-NEXT: .LBB16_253: # %cond.store367
+; RV64-NEXT: li a2, 64
+; RV64-NEXT: addi a3, sp, 640
+; RV64-NEXT: vsetvli zero, a2, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v16, (a3)
+; RV64-NEXT: lh a2, 758(sp)
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: srli a2, a2, 8
+; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: slli a2, a1, 3
+; RV64-NEXT: bltz a2, .LBB16_254
+; RV64-NEXT: j .LBB16_132
+; RV64-NEXT: .LBB16_254: # %cond.store370
+; RV64-NEXT: li a2, 64
+; RV64-NEXT: addi a3, sp, 512
+; RV64-NEXT: vsetvli zero, a2, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v16, (a3)
+; RV64-NEXT: lh a2, 632(sp)
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: srli a2, a2, 8
+; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: slli a2, a1, 2
+; RV64-NEXT: bltz a2, .LBB16_255
+; RV64-NEXT: j .LBB16_133
+; RV64-NEXT: .LBB16_255: # %cond.store373
+; RV64-NEXT: li a2, 64
+; RV64-NEXT: addi a3, sp, 384
+; RV64-NEXT: vsetvli zero, a2, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v16, (a3)
+; RV64-NEXT: lh a2, 506(sp)
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: srli a2, a2, 8
+; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: slli a2, a1, 1
+; RV64-NEXT: bltz a2, .LBB16_256
+; RV64-NEXT: j .LBB16_134
+; RV64-NEXT: .LBB16_256: # %cond.store376
+; RV64-NEXT: li a2, 64
+; RV64-NEXT: addi a3, sp, 256
+; RV64-NEXT: vsetvli zero, a2, e16, m8, ta, ma
+; RV64-NEXT: vse16.v v16, (a3)
+; RV64-NEXT: lh a2, 380(sp)
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: srli a2, a2, 8
+; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: bgez a1, .LBB16_258
+; RV64-NEXT: j .LBB16_135
+; RV64-NEXT: .LBB16_258: # %cond.store376
+; RV64-NEXT: j .LBB16_136
+;
+; RV32-LABEL: test_compresstore_i16_v128:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV32-NEXT: vmv.x.s a3, v0
+; RV32-NEXT: andi a1, a3, 1
+; RV32-NEXT: bnez a1, .LBB16_142
+; RV32-NEXT: # %bb.1: # %else
+; RV32-NEXT: andi a1, a3, 2
+; RV32-NEXT: bnez a1, .LBB16_143
+; RV32-NEXT: .LBB16_2: # %else2
+; RV32-NEXT: andi a1, a3, 4
+; RV32-NEXT: bnez a1, .LBB16_144
+; RV32-NEXT: .LBB16_3: # %else5
+; RV32-NEXT: andi a1, a3, 8
+; RV32-NEXT: bnez a1, .LBB16_145
+; RV32-NEXT: .LBB16_4: # %else8
+; RV32-NEXT: andi a1, a3, 16
+; RV32-NEXT: bnez a1, .LBB16_146
+; RV32-NEXT: .LBB16_5: # %else11
+; RV32-NEXT: andi a1, a3, 32
+; RV32-NEXT: bnez a1, .LBB16_147
+; RV32-NEXT: .LBB16_6: # %else14
+; RV32-NEXT: andi a1, a3, 64
+; RV32-NEXT: bnez a1, .LBB16_148
+; RV32-NEXT: .LBB16_7: # %else17
+; RV32-NEXT: andi a1, a3, 128
+; RV32-NEXT: bnez a1, .LBB16_149
+; RV32-NEXT: .LBB16_8: # %else20
+; RV32-NEXT: andi a1, a3, 256
+; RV32-NEXT: bnez a1, .LBB16_150
+; RV32-NEXT: .LBB16_9: # %else23
+; RV32-NEXT: andi a1, a3, 512
+; RV32-NEXT: bnez a1, .LBB16_151
+; RV32-NEXT: .LBB16_10: # %else26
+; RV32-NEXT: andi a1, a3, 1024
+; RV32-NEXT: bnez a1, .LBB16_152
+; RV32-NEXT: .LBB16_11: # %else29
+; RV32-NEXT: slli a1, a3, 20
+; RV32-NEXT: bltz a1, .LBB16_153
+; RV32-NEXT: .LBB16_12: # %else32
+; RV32-NEXT: slli a1, a3, 19
+; RV32-NEXT: bltz a1, .LBB16_154
+; RV32-NEXT: .LBB16_13: # %else35
+; RV32-NEXT: slli a1, a3, 18
+; RV32-NEXT: bltz a1, .LBB16_155
+; RV32-NEXT: .LBB16_14: # %else38
+; RV32-NEXT: slli a1, a3, 17
+; RV32-NEXT: bltz a1, .LBB16_156
+; RV32-NEXT: .LBB16_15: # %else41
+; RV32-NEXT: slli a1, a3, 16
+; RV32-NEXT: bgez a1, .LBB16_17
+; RV32-NEXT: .LBB16_16: # %cond.store43
+; RV32-NEXT: vsetivli zero, 1, e16, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v24, v8, 15
+; RV32-NEXT: vmv.x.s a1, v24
+; RV32-NEXT: sb a1, 0(a0)
+; RV32-NEXT: srli a1, a1, 8
+; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: .LBB16_17: # %else44
+; RV32-NEXT: addi sp, sp, -2032
+; RV32-NEXT: .cfi_def_cfa_offset 2032
+; RV32-NEXT: sw ra, 2028(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s0, 2024(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: .cfi_offset s0, -8
+; RV32-NEXT: addi s0, sp, 2032
+; RV32-NEXT: .cfi_def_cfa s0, 0
+; RV32-NEXT: lui a1, 3
+; RV32-NEXT: addi a1, a1, -1776
+; RV32-NEXT: sub sp, sp, a1
+; RV32-NEXT: andi sp, sp, -128
+; RV32-NEXT: slli a1, a3, 15
+; RV32-NEXT: lui a2, 3
+; RV32-NEXT: addi a2, a2, -1606
+; RV32-NEXT: add a2, sp, a2
+; RV32-NEXT: bltz a1, .LBB16_157
+; RV32-NEXT: # %bb.18: # %else47
+; RV32-NEXT: slli a1, a3, 14
+; RV32-NEXT: bltz a1, .LBB16_158
+; RV32-NEXT: .LBB16_19: # %else50
+; RV32-NEXT: slli a1, a3, 13
+; RV32-NEXT: bltz a1, .LBB16_159
+; RV32-NEXT: .LBB16_20: # %else53
+; RV32-NEXT: slli a1, a3, 12
+; RV32-NEXT: bltz a1, .LBB16_160
+; RV32-NEXT: .LBB16_21: # %else56
+; RV32-NEXT: slli a1, a3, 11
+; RV32-NEXT: bltz a1, .LBB16_161
+; RV32-NEXT: .LBB16_22: # %else59
+; RV32-NEXT: slli a1, a3, 10
+; RV32-NEXT: bltz a1, .LBB16_162
+; RV32-NEXT: .LBB16_23: # %else62
+; RV32-NEXT: slli a1, a3, 9
+; RV32-NEXT: bltz a1, .LBB16_163
+; RV32-NEXT: .LBB16_24: # %else65
+; RV32-NEXT: slli a1, a3, 8
+; RV32-NEXT: bltz a1, .LBB16_164
+; RV32-NEXT: .LBB16_25: # %else68
+; RV32-NEXT: slli a1, a3, 7
+; RV32-NEXT: bltz a1, .LBB16_165
+; RV32-NEXT: .LBB16_26: # %else71
+; RV32-NEXT: slli a1, a3, 6
+; RV32-NEXT: bltz a1, .LBB16_166
+; RV32-NEXT: .LBB16_27: # %else74
+; RV32-NEXT: slli a1, a3, 5
+; RV32-NEXT: bltz a1, .LBB16_167
+; RV32-NEXT: .LBB16_28: # %else77
+; RV32-NEXT: slli a1, a3, 4
+; RV32-NEXT: bltz a1, .LBB16_168
+; RV32-NEXT: .LBB16_29: # %else80
+; RV32-NEXT: slli a1, a3, 3
+; RV32-NEXT: bgez a1, .LBB16_31
+; RV32-NEXT: .LBB16_30: # %cond.store82
+; RV32-NEXT: li a1, 64
+; RV32-NEXT: li a4, 21
+; RV32-NEXT: slli a4, a4, 9
+; RV32-NEXT: add a4, sp, a4
+; RV32-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v8, (a4)
+; RV32-NEXT: lh a1, 126(a2)
+; RV32-NEXT: sb a1, 0(a0)
+; RV32-NEXT: srli a1, a1, 8
+; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: .LBB16_31: # %else83
+; RV32-NEXT: slli a4, a3, 2
+; RV32-NEXT: li a1, 32
+; RV32-NEXT: bgez a4, .LBB16_33
+; RV32-NEXT: # %bb.32: # %cond.store85
+; RV32-NEXT: li a4, 64
+; RV32-NEXT: lui a5, 3
+; RV32-NEXT: addi a5, a5, -1664
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v8, (a5)
+; RV32-NEXT: lh a2, 0(a2)
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: .LBB16_33: # %else86
+; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV32-NEXT: vsrl.vx v24, v0, a1
+; RV32-NEXT: slli a2, a3, 1
+; RV32-NEXT: lui a4, 2
+; RV32-NEXT: addi a4, a4, 348
+; RV32-NEXT: add a4, sp, a4
+; RV32-NEXT: bgez a2, .LBB16_35
+; RV32-NEXT: # %bb.34: # %cond.store88
+; RV32-NEXT: li a2, 64
+; RV32-NEXT: lui a5, 3
+; RV32-NEXT: addi a5, a5, -1792
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a2, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v8, (a5)
+; RV32-NEXT: lh a2, 2016(a4)
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: .LBB16_35: # %else89
+; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV32-NEXT: vmv.x.s a2, v24
+; RV32-NEXT: bltz a3, .LBB16_169
+; RV32-NEXT: # %bb.36: # %else92
+; RV32-NEXT: andi a3, a2, 1
+; RV32-NEXT: bnez a3, .LBB16_170
+; RV32-NEXT: .LBB16_37: # %else95
+; RV32-NEXT: andi a3, a2, 2
+; RV32-NEXT: bnez a3, .LBB16_171
+; RV32-NEXT: .LBB16_38: # %else98
+; RV32-NEXT: andi a3, a2, 4
+; RV32-NEXT: bnez a3, .LBB16_172
+; RV32-NEXT: .LBB16_39: # %else101
+; RV32-NEXT: andi a3, a2, 8
+; RV32-NEXT: bnez a3, .LBB16_173
+; RV32-NEXT: .LBB16_40: # %else104
+; RV32-NEXT: andi a3, a2, 16
+; RV32-NEXT: bnez a3, .LBB16_174
+; RV32-NEXT: .LBB16_41: # %else107
+; RV32-NEXT: andi a3, a2, 32
+; RV32-NEXT: bnez a3, .LBB16_175
+; RV32-NEXT: .LBB16_42: # %else110
+; RV32-NEXT: andi a3, a2, 64
+; RV32-NEXT: bnez a3, .LBB16_176
+; RV32-NEXT: .LBB16_43: # %else113
+; RV32-NEXT: andi a3, a2, 128
+; RV32-NEXT: bnez a3, .LBB16_177
+; RV32-NEXT: .LBB16_44: # %else116
+; RV32-NEXT: andi a3, a2, 256
+; RV32-NEXT: bnez a3, .LBB16_178
+; RV32-NEXT: .LBB16_45: # %else119
+; RV32-NEXT: andi a3, a2, 512
+; RV32-NEXT: bnez a3, .LBB16_179
+; RV32-NEXT: .LBB16_46: # %else122
+; RV32-NEXT: andi a3, a2, 1024
+; RV32-NEXT: bnez a3, .LBB16_180
+; RV32-NEXT: .LBB16_47: # %else125
+; RV32-NEXT: slli a3, a2, 20
+; RV32-NEXT: bltz a3, .LBB16_181
+; RV32-NEXT: .LBB16_48: # %else128
+; RV32-NEXT: slli a3, a2, 19
+; RV32-NEXT: bltz a3, .LBB16_182
+; RV32-NEXT: .LBB16_49: # %else131
+; RV32-NEXT: slli a3, a2, 18
+; RV32-NEXT: bltz a3, .LBB16_183
+; RV32-NEXT: .LBB16_50: # %else134
+; RV32-NEXT: slli a3, a2, 17
+; RV32-NEXT: bgez a3, .LBB16_52
+; RV32-NEXT: .LBB16_51: # %cond.store136
+; RV32-NEXT: li a3, 64
+; RV32-NEXT: lui a5, 2
+; RV32-NEXT: addi a5, a5, 256
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v8, (a5)
+; RV32-NEXT: lh a3, 0(a4)
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: .LBB16_52: # %else137
+; RV32-NEXT: slli a3, a2, 16
+; RV32-NEXT: lui a4, 2
+; RV32-NEXT: addi a4, a4, -1794
+; RV32-NEXT: add a4, sp, a4
+; RV32-NEXT: bltz a3, .LBB16_184
+; RV32-NEXT: # %bb.53: # %else140
+; RV32-NEXT: slli a3, a2, 15
+; RV32-NEXT: bltz a3, .LBB16_185
+; RV32-NEXT: .LBB16_54: # %else143
+; RV32-NEXT: slli a3, a2, 14
+; RV32-NEXT: bltz a3, .LBB16_186
+; RV32-NEXT: .LBB16_55: # %else146
+; RV32-NEXT: slli a3, a2, 13
+; RV32-NEXT: bltz a3, .LBB16_187
+; RV32-NEXT: .LBB16_56: # %else149
+; RV32-NEXT: slli a3, a2, 12
+; RV32-NEXT: bltz a3, .LBB16_188
+; RV32-NEXT: .LBB16_57: # %else152
+; RV32-NEXT: slli a3, a2, 11
+; RV32-NEXT: bltz a3, .LBB16_189
+; RV32-NEXT: .LBB16_58: # %else155
+; RV32-NEXT: slli a3, a2, 10
+; RV32-NEXT: bltz a3, .LBB16_190
+; RV32-NEXT: .LBB16_59: # %else158
+; RV32-NEXT: slli a3, a2, 9
+; RV32-NEXT: bltz a3, .LBB16_191
+; RV32-NEXT: .LBB16_60: # %else161
+; RV32-NEXT: slli a3, a2, 8
+; RV32-NEXT: bltz a3, .LBB16_192
+; RV32-NEXT: .LBB16_61: # %else164
+; RV32-NEXT: slli a3, a2, 7
+; RV32-NEXT: bltz a3, .LBB16_193
+; RV32-NEXT: .LBB16_62: # %else167
+; RV32-NEXT: slli a3, a2, 6
+; RV32-NEXT: bltz a3, .LBB16_194
+; RV32-NEXT: .LBB16_63: # %else170
+; RV32-NEXT: slli a3, a2, 5
+; RV32-NEXT: bltz a3, .LBB16_195
+; RV32-NEXT: .LBB16_64: # %else173
+; RV32-NEXT: slli a3, a2, 4
+; RV32-NEXT: bltz a3, .LBB16_196
+; RV32-NEXT: .LBB16_65: # %else176
+; RV32-NEXT: slli a3, a2, 3
+; RV32-NEXT: bltz a3, .LBB16_197
+; RV32-NEXT: .LBB16_66: # %else179
+; RV32-NEXT: slli a3, a2, 2
+; RV32-NEXT: bgez a3, .LBB16_68
+; RV32-NEXT: .LBB16_67: # %cond.store181
+; RV32-NEXT: li a3, 64
+; RV32-NEXT: lui a5, 2
+; RV32-NEXT: addi a5, a5, -1664
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v8, (a5)
+; RV32-NEXT: lh a3, 252(a4)
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: .LBB16_68: # %else182
+; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV32-NEXT: slli a3, a2, 1
+; RV32-NEXT: vslidedown.vi v24, v0, 1
+; RV32-NEXT: bgez a3, .LBB16_70
+; RV32-NEXT: # %bb.69: # %cond.store184
+; RV32-NEXT: li a3, 64
+; RV32-NEXT: li a5, 25
+; RV32-NEXT: slli a5, a5, 8
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v8, (a5)
+; RV32-NEXT: lh a3, 126(a4)
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: .LBB16_70: # %else185
+; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV32-NEXT: vmv.x.s a3, v24
+; RV32-NEXT: bltz a2, .LBB16_198
+; RV32-NEXT: # %bb.71: # %else188
+; RV32-NEXT: andi a2, a3, 1
+; RV32-NEXT: bnez a2, .LBB16_199
+; RV32-NEXT: .LBB16_72: # %else191
+; RV32-NEXT: andi a2, a3, 2
+; RV32-NEXT: bnez a2, .LBB16_200
+; RV32-NEXT: .LBB16_73: # %else194
+; RV32-NEXT: andi a2, a3, 4
+; RV32-NEXT: bnez a2, .LBB16_201
+; RV32-NEXT: .LBB16_74: # %else197
+; RV32-NEXT: andi a2, a3, 8
+; RV32-NEXT: bnez a2, .LBB16_202
+; RV32-NEXT: .LBB16_75: # %else200
+; RV32-NEXT: andi a2, a3, 16
+; RV32-NEXT: bnez a2, .LBB16_203
+; RV32-NEXT: .LBB16_76: # %else203
+; RV32-NEXT: andi a2, a3, 32
+; RV32-NEXT: bnez a2, .LBB16_204
+; RV32-NEXT: .LBB16_77: # %else206
+; RV32-NEXT: andi a2, a3, 64
+; RV32-NEXT: bnez a2, .LBB16_205
+; RV32-NEXT: .LBB16_78: # %else209
+; RV32-NEXT: andi a2, a3, 128
+; RV32-NEXT: bnez a2, .LBB16_206
+; RV32-NEXT: .LBB16_79: # %else212
+; RV32-NEXT: andi a2, a3, 256
+; RV32-NEXT: bnez a2, .LBB16_207
+; RV32-NEXT: .LBB16_80: # %else215
+; RV32-NEXT: andi a2, a3, 512
+; RV32-NEXT: bnez a2, .LBB16_208
+; RV32-NEXT: .LBB16_81: # %else218
+; RV32-NEXT: andi a2, a3, 1024
+; RV32-NEXT: bnez a2, .LBB16_209
+; RV32-NEXT: .LBB16_82: # %else221
+; RV32-NEXT: slli a2, a3, 20
+; RV32-NEXT: bltz a2, .LBB16_210
+; RV32-NEXT: .LBB16_83: # %else224
+; RV32-NEXT: slli a2, a3, 19
+; RV32-NEXT: bltz a2, .LBB16_211
+; RV32-NEXT: .LBB16_84: # %else227
+; RV32-NEXT: slli a2, a3, 18
+; RV32-NEXT: bltz a2, .LBB16_212
+; RV32-NEXT: .LBB16_85: # %else230
+; RV32-NEXT: slli a2, a3, 17
+; RV32-NEXT: bltz a2, .LBB16_213
+; RV32-NEXT: .LBB16_86: # %else233
+; RV32-NEXT: slli a2, a3, 16
+; RV32-NEXT: bgez a2, .LBB16_88
+; RV32-NEXT: .LBB16_87: # %cond.store235
+; RV32-NEXT: vsetivli zero, 1, e16, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v8, v16, 15
+; RV32-NEXT: vmv.x.s a2, v8
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: .LBB16_88: # %else236
+; RV32-NEXT: slli a4, a3, 15
+; RV32-NEXT: lui a2, 1
+; RV32-NEXT: addi a2, a2, 64
+; RV32-NEXT: add a2, sp, a2
+; RV32-NEXT: bltz a4, .LBB16_214
+; RV32-NEXT: # %bb.89: # %else239
+; RV32-NEXT: slli a4, a3, 14
+; RV32-NEXT: bgez a4, .LBB16_90
+; RV32-NEXT: j .LBB16_215
+; RV32-NEXT: .LBB16_90: # %else242
+; RV32-NEXT: slli a4, a3, 13
+; RV32-NEXT: bgez a4, .LBB16_91
+; RV32-NEXT: j .LBB16_216
+; RV32-NEXT: .LBB16_91: # %else245
+; RV32-NEXT: slli a4, a3, 12
+; RV32-NEXT: bgez a4, .LBB16_92
+; RV32-NEXT: j .LBB16_217
+; RV32-NEXT: .LBB16_92: # %else248
+; RV32-NEXT: slli a4, a3, 11
+; RV32-NEXT: bgez a4, .LBB16_93
+; RV32-NEXT: j .LBB16_218
+; RV32-NEXT: .LBB16_93: # %else251
+; RV32-NEXT: slli a4, a3, 10
+; RV32-NEXT: bgez a4, .LBB16_94
+; RV32-NEXT: j .LBB16_219
+; RV32-NEXT: .LBB16_94: # %else254
+; RV32-NEXT: slli a4, a3, 9
+; RV32-NEXT: bgez a4, .LBB16_95
+; RV32-NEXT: j .LBB16_220
+; RV32-NEXT: .LBB16_95: # %else257
+; RV32-NEXT: slli a4, a3, 8
+; RV32-NEXT: bgez a4, .LBB16_96
+; RV32-NEXT: j .LBB16_221
+; RV32-NEXT: .LBB16_96: # %else260
+; RV32-NEXT: slli a4, a3, 7
+; RV32-NEXT: bgez a4, .LBB16_97
+; RV32-NEXT: j .LBB16_222
+; RV32-NEXT: .LBB16_97: # %else263
+; RV32-NEXT: slli a4, a3, 6
+; RV32-NEXT: bgez a4, .LBB16_98
+; RV32-NEXT: j .LBB16_223
+; RV32-NEXT: .LBB16_98: # %else266
+; RV32-NEXT: slli a4, a3, 5
+; RV32-NEXT: bgez a4, .LBB16_99
+; RV32-NEXT: j .LBB16_224
+; RV32-NEXT: .LBB16_99: # %else269
+; RV32-NEXT: slli a4, a3, 4
+; RV32-NEXT: bgez a4, .LBB16_100
+; RV32-NEXT: j .LBB16_225
+; RV32-NEXT: .LBB16_100: # %else272
+; RV32-NEXT: slli a4, a3, 3
+; RV32-NEXT: bgez a4, .LBB16_101
+; RV32-NEXT: j .LBB16_226
+; RV32-NEXT: .LBB16_101: # %else275
+; RV32-NEXT: slli a4, a3, 2
+; RV32-NEXT: bgez a4, .LBB16_103
+; RV32-NEXT: .LBB16_102: # %cond.store277
+; RV32-NEXT: li a4, 64
+; RV32-NEXT: lui a5, 1
+; RV32-NEXT: addi a5, a5, 384
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v16, (a5)
+; RV32-NEXT: lh a4, 378(a2)
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: srli a4, a4, 8
+; RV32-NEXT: sb a4, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: .LBB16_103: # %else278
+; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV32-NEXT: slli a4, a3, 1
+; RV32-NEXT: vsrl.vx v8, v24, a1
+; RV32-NEXT: bgez a4, .LBB16_105
+; RV32-NEXT: # %bb.104: # %cond.store280
+; RV32-NEXT: li a1, 64
+; RV32-NEXT: li a4, 17
+; RV32-NEXT: slli a4, a4, 8
+; RV32-NEXT: add a4, sp, a4
+; RV32-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v16, (a4)
+; RV32-NEXT: lh a1, 252(a2)
+; RV32-NEXT: sb a1, 0(a0)
+; RV32-NEXT: srli a1, a1, 8
+; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: .LBB16_105: # %else281
+; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV32-NEXT: vmv.x.s a1, v8
+; RV32-NEXT: bgez a3, .LBB16_107
+; RV32-NEXT: # %bb.106: # %cond.store283
+; RV32-NEXT: li a3, 64
+; RV32-NEXT: lui a4, 1
+; RV32-NEXT: addi a4, a4, 128
+; RV32-NEXT: add a4, sp, a4
+; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v16, (a4)
+; RV32-NEXT: lh a3, 126(a2)
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: .LBB16_107: # %else284
+; RV32-NEXT: andi a3, a1, 1
+; RV32-NEXT: beqz a3, .LBB16_109
+; RV32-NEXT: # %bb.108: # %cond.store286
+; RV32-NEXT: li a3, 64
+; RV32-NEXT: lui a4, 1
+; RV32-NEXT: add a4, sp, a4
+; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v16, (a4)
+; RV32-NEXT: lh a2, 0(a2)
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: .LBB16_109: # %else287
+; RV32-NEXT: andi a3, a1, 2
+; RV32-NEXT: addi a2, sp, 2018
+; RV32-NEXT: beqz a3, .LBB16_110
+; RV32-NEXT: j .LBB16_227
+; RV32-NEXT: .LBB16_110: # %else290
+; RV32-NEXT: andi a3, a1, 4
+; RV32-NEXT: beqz a3, .LBB16_111
+; RV32-NEXT: j .LBB16_228
+; RV32-NEXT: .LBB16_111: # %else293
+; RV32-NEXT: andi a3, a1, 8
+; RV32-NEXT: beqz a3, .LBB16_112
+; RV32-NEXT: j .LBB16_229
+; RV32-NEXT: .LBB16_112: # %else296
+; RV32-NEXT: andi a3, a1, 16
+; RV32-NEXT: beqz a3, .LBB16_113
+; RV32-NEXT: j .LBB16_230
+; RV32-NEXT: .LBB16_113: # %else299
+; RV32-NEXT: andi a3, a1, 32
+; RV32-NEXT: beqz a3, .LBB16_114
+; RV32-NEXT: j .LBB16_231
+; RV32-NEXT: .LBB16_114: # %else302
+; RV32-NEXT: andi a3, a1, 64
+; RV32-NEXT: beqz a3, .LBB16_115
+; RV32-NEXT: j .LBB16_232
+; RV32-NEXT: .LBB16_115: # %else305
+; RV32-NEXT: andi a3, a1, 128
+; RV32-NEXT: beqz a3, .LBB16_116
+; RV32-NEXT: j .LBB16_233
+; RV32-NEXT: .LBB16_116: # %else308
+; RV32-NEXT: andi a3, a1, 256
+; RV32-NEXT: beqz a3, .LBB16_117
+; RV32-NEXT: j .LBB16_234
+; RV32-NEXT: .LBB16_117: # %else311
+; RV32-NEXT: andi a3, a1, 512
+; RV32-NEXT: beqz a3, .LBB16_118
+; RV32-NEXT: j .LBB16_235
+; RV32-NEXT: .LBB16_118: # %else314
+; RV32-NEXT: andi a3, a1, 1024
+; RV32-NEXT: beqz a3, .LBB16_119
+; RV32-NEXT: j .LBB16_236
+; RV32-NEXT: .LBB16_119: # %else317
+; RV32-NEXT: slli a3, a1, 20
+; RV32-NEXT: bgez a3, .LBB16_120
+; RV32-NEXT: j .LBB16_237
+; RV32-NEXT: .LBB16_120: # %else320
+; RV32-NEXT: slli a3, a1, 19
+; RV32-NEXT: bgez a3, .LBB16_121
+; RV32-NEXT: j .LBB16_238
+; RV32-NEXT: .LBB16_121: # %else323
+; RV32-NEXT: slli a3, a1, 18
+; RV32-NEXT: bgez a3, .LBB16_122
+; RV32-NEXT: j .LBB16_239
+; RV32-NEXT: .LBB16_122: # %else326
+; RV32-NEXT: slli a3, a1, 17
+; RV32-NEXT: bgez a3, .LBB16_123
+; RV32-NEXT: j .LBB16_240
+; RV32-NEXT: .LBB16_123: # %else329
+; RV32-NEXT: slli a3, a1, 16
+; RV32-NEXT: bgez a3, .LBB16_124
+; RV32-NEXT: j .LBB16_241
+; RV32-NEXT: .LBB16_124: # %else332
+; RV32-NEXT: slli a3, a1, 15
+; RV32-NEXT: bgez a3, .LBB16_125
+; RV32-NEXT: j .LBB16_242
+; RV32-NEXT: .LBB16_125: # %else335
+; RV32-NEXT: slli a3, a1, 14
+; RV32-NEXT: bgez a3, .LBB16_126
+; RV32-NEXT: j .LBB16_243
+; RV32-NEXT: .LBB16_126: # %else338
+; RV32-NEXT: slli a2, a1, 13
+; RV32-NEXT: bgez a2, .LBB16_127
+; RV32-NEXT: j .LBB16_244
+; RV32-NEXT: .LBB16_127: # %else341
+; RV32-NEXT: slli a2, a1, 12
+; RV32-NEXT: bgez a2, .LBB16_128
+; RV32-NEXT: j .LBB16_245
+; RV32-NEXT: .LBB16_128: # %else344
+; RV32-NEXT: slli a2, a1, 11
+; RV32-NEXT: bgez a2, .LBB16_129
+; RV32-NEXT: j .LBB16_246
+; RV32-NEXT: .LBB16_129: # %else347
+; RV32-NEXT: slli a2, a1, 10
+; RV32-NEXT: bgez a2, .LBB16_130
+; RV32-NEXT: j .LBB16_247
+; RV32-NEXT: .LBB16_130: # %else350
+; RV32-NEXT: slli a2, a1, 9
+; RV32-NEXT: bgez a2, .LBB16_131
+; RV32-NEXT: j .LBB16_248
+; RV32-NEXT: .LBB16_131: # %else353
+; RV32-NEXT: slli a2, a1, 8
+; RV32-NEXT: bgez a2, .LBB16_132
+; RV32-NEXT: j .LBB16_249
+; RV32-NEXT: .LBB16_132: # %else356
+; RV32-NEXT: slli a2, a1, 7
+; RV32-NEXT: bgez a2, .LBB16_133
+; RV32-NEXT: j .LBB16_250
+; RV32-NEXT: .LBB16_133: # %else359
+; RV32-NEXT: slli a2, a1, 6
+; RV32-NEXT: bgez a2, .LBB16_134
+; RV32-NEXT: j .LBB16_251
+; RV32-NEXT: .LBB16_134: # %else362
+; RV32-NEXT: slli a2, a1, 5
+; RV32-NEXT: bgez a2, .LBB16_135
+; RV32-NEXT: j .LBB16_252
+; RV32-NEXT: .LBB16_135: # %else365
+; RV32-NEXT: slli a2, a1, 4
+; RV32-NEXT: bgez a2, .LBB16_136
+; RV32-NEXT: j .LBB16_253
+; RV32-NEXT: .LBB16_136: # %else368
+; RV32-NEXT: slli a2, a1, 3
+; RV32-NEXT: bgez a2, .LBB16_137
+; RV32-NEXT: j .LBB16_254
+; RV32-NEXT: .LBB16_137: # %else371
+; RV32-NEXT: slli a2, a1, 2
+; RV32-NEXT: bgez a2, .LBB16_138
+; RV32-NEXT: j .LBB16_255
+; RV32-NEXT: .LBB16_138: # %else374
+; RV32-NEXT: slli a2, a1, 1
+; RV32-NEXT: bgez a2, .LBB16_139
+; RV32-NEXT: j .LBB16_256
+; RV32-NEXT: .LBB16_139: # %else377
+; RV32-NEXT: bgez a1, .LBB16_141
+; RV32-NEXT: .LBB16_140: # %cond.store379
+; RV32-NEXT: li a1, 64
+; RV32-NEXT: addi a2, sp, 128
+; RV32-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v16, (a2)
+; RV32-NEXT: lh a1, 254(sp)
+; RV32-NEXT: sb a1, 0(a0)
+; RV32-NEXT: srli a1, a1, 8
+; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: .LBB16_141: # %else380
+; RV32-NEXT: lui a0, 3
+; RV32-NEXT: addi a0, a0, 256
+; RV32-NEXT: sub sp, s0, a0
+; RV32-NEXT: lui a0, 3
+; RV32-NEXT: addi a0, a0, -1776
+; RV32-NEXT: add sp, sp, a0
+; RV32-NEXT: lw ra, 2028(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s0, 2024(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 2032
+; RV32-NEXT: ret
+; RV32-NEXT: .LBB16_142: # %cond.store
+; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; RV32-NEXT: vmv.x.s a1, v8
+; RV32-NEXT: sb a1, 0(a0)
+; RV32-NEXT: srli a1, a1, 8
+; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: andi a1, a3, 2
+; RV32-NEXT: beqz a1, .LBB16_2
+; RV32-NEXT: .LBB16_143: # %cond.store1
+; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v24, v8, 1
+; RV32-NEXT: vmv.x.s a1, v24
+; RV32-NEXT: sb a1, 0(a0)
+; RV32-NEXT: srli a1, a1, 8
+; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: andi a1, a3, 4
+; RV32-NEXT: beqz a1, .LBB16_3
+; RV32-NEXT: .LBB16_144: # %cond.store4
+; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v24, v8, 2
+; RV32-NEXT: vmv.x.s a1, v24
+; RV32-NEXT: sb a1, 0(a0)
+; RV32-NEXT: srli a1, a1, 8
+; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: andi a1, a3, 8
+; RV32-NEXT: beqz a1, .LBB16_4
+; RV32-NEXT: .LBB16_145: # %cond.store7
+; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v24, v8, 3
+; RV32-NEXT: vmv.x.s a1, v24
+; RV32-NEXT: sb a1, 0(a0)
+; RV32-NEXT: srli a1, a1, 8
+; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: andi a1, a3, 16
+; RV32-NEXT: beqz a1, .LBB16_5
+; RV32-NEXT: .LBB16_146: # %cond.store10
+; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v24, v8, 4
+; RV32-NEXT: vmv.x.s a1, v24
+; RV32-NEXT: sb a1, 0(a0)
+; RV32-NEXT: srli a1, a1, 8
+; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: andi a1, a3, 32
+; RV32-NEXT: beqz a1, .LBB16_6
+; RV32-NEXT: .LBB16_147: # %cond.store13
+; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v24, v8, 5
+; RV32-NEXT: vmv.x.s a1, v24
+; RV32-NEXT: sb a1, 0(a0)
+; RV32-NEXT: srli a1, a1, 8
+; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: andi a1, a3, 64
+; RV32-NEXT: beqz a1, .LBB16_7
+; RV32-NEXT: .LBB16_148: # %cond.store16
+; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v24, v8, 6
+; RV32-NEXT: vmv.x.s a1, v24
+; RV32-NEXT: sb a1, 0(a0)
+; RV32-NEXT: srli a1, a1, 8
+; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: andi a1, a3, 128
+; RV32-NEXT: beqz a1, .LBB16_8
+; RV32-NEXT: .LBB16_149: # %cond.store19
+; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v24, v8, 7
+; RV32-NEXT: vmv.x.s a1, v24
+; RV32-NEXT: sb a1, 0(a0)
+; RV32-NEXT: srli a1, a1, 8
+; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: andi a1, a3, 256
+; RV32-NEXT: beqz a1, .LBB16_9
+; RV32-NEXT: .LBB16_150: # %cond.store22
+; RV32-NEXT: vsetivli zero, 1, e16, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v24, v8, 8
+; RV32-NEXT: vmv.x.s a1, v24
+; RV32-NEXT: sb a1, 0(a0)
+; RV32-NEXT: srli a1, a1, 8
+; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: andi a1, a3, 512
+; RV32-NEXT: beqz a1, .LBB16_10
+; RV32-NEXT: .LBB16_151: # %cond.store25
+; RV32-NEXT: vsetivli zero, 1, e16, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v24, v8, 9
+; RV32-NEXT: vmv.x.s a1, v24
+; RV32-NEXT: sb a1, 0(a0)
+; RV32-NEXT: srli a1, a1, 8
+; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: andi a1, a3, 1024
+; RV32-NEXT: beqz a1, .LBB16_11
+; RV32-NEXT: .LBB16_152: # %cond.store28
+; RV32-NEXT: vsetivli zero, 1, e16, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v24, v8, 10
+; RV32-NEXT: vmv.x.s a1, v24
+; RV32-NEXT: sb a1, 0(a0)
+; RV32-NEXT: srli a1, a1, 8
+; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: slli a1, a3, 20
+; RV32-NEXT: bgez a1, .LBB16_12
+; RV32-NEXT: .LBB16_153: # %cond.store31
+; RV32-NEXT: vsetivli zero, 1, e16, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v24, v8, 11
+; RV32-NEXT: vmv.x.s a1, v24
+; RV32-NEXT: sb a1, 0(a0)
+; RV32-NEXT: srli a1, a1, 8
+; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: slli a1, a3, 19
+; RV32-NEXT: bgez a1, .LBB16_13
+; RV32-NEXT: .LBB16_154: # %cond.store34
+; RV32-NEXT: vsetivli zero, 1, e16, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v24, v8, 12
+; RV32-NEXT: vmv.x.s a1, v24
+; RV32-NEXT: sb a1, 0(a0)
+; RV32-NEXT: srli a1, a1, 8
+; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: slli a1, a3, 18
+; RV32-NEXT: bgez a1, .LBB16_14
+; RV32-NEXT: .LBB16_155: # %cond.store37
+; RV32-NEXT: vsetivli zero, 1, e16, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v24, v8, 13
+; RV32-NEXT: vmv.x.s a1, v24
+; RV32-NEXT: sb a1, 0(a0)
+; RV32-NEXT: srli a1, a1, 8
+; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: slli a1, a3, 17
+; RV32-NEXT: bgez a1, .LBB16_15
+; RV32-NEXT: .LBB16_156: # %cond.store40
+; RV32-NEXT: vsetivli zero, 1, e16, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v24, v8, 14
+; RV32-NEXT: vmv.x.s a1, v24
+; RV32-NEXT: sb a1, 0(a0)
+; RV32-NEXT: srli a1, a1, 8
+; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: slli a1, a3, 16
+; RV32-NEXT: bltz a1, .LBB16_16
+; RV32-NEXT: j .LBB16_17
+; RV32-NEXT: .LBB16_157: # %cond.store46
+; RV32-NEXT: li a1, 64
+; RV32-NEXT: lui a4, 3
+; RV32-NEXT: add a4, sp, a4
+; RV32-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v8, (a4)
+; RV32-NEXT: lh a1, 1638(a2)
+; RV32-NEXT: sb a1, 0(a0)
+; RV32-NEXT: srli a1, a1, 8
+; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: slli a1, a3, 14
+; RV32-NEXT: bgez a1, .LBB16_19
+; RV32-NEXT: .LBB16_158: # %cond.store49
+; RV32-NEXT: li a1, 64
+; RV32-NEXT: lui a4, 3
+; RV32-NEXT: addi a4, a4, -128
+; RV32-NEXT: add a4, sp, a4
+; RV32-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v8, (a4)
+; RV32-NEXT: lh a1, 1512(a2)
+; RV32-NEXT: sb a1, 0(a0)
+; RV32-NEXT: srli a1, a1, 8
+; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: slli a1, a3, 13
+; RV32-NEXT: bgez a1, .LBB16_20
+; RV32-NEXT: .LBB16_159: # %cond.store52
+; RV32-NEXT: li a1, 64
+; RV32-NEXT: lui a4, 3
+; RV32-NEXT: addi a4, a4, -256
+; RV32-NEXT: add a4, sp, a4
+; RV32-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v8, (a4)
+; RV32-NEXT: lh a1, 1386(a2)
+; RV32-NEXT: sb a1, 0(a0)
+; RV32-NEXT: srli a1, a1, 8
+; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: slli a1, a3, 12
+; RV32-NEXT: bgez a1, .LBB16_21
+; RV32-NEXT: .LBB16_160: # %cond.store55
+; RV32-NEXT: li a1, 64
+; RV32-NEXT: lui a4, 3
+; RV32-NEXT: addi a4, a4, -384
+; RV32-NEXT: add a4, sp, a4
+; RV32-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v8, (a4)
+; RV32-NEXT: lh a1, 1260(a2)
+; RV32-NEXT: sb a1, 0(a0)
+; RV32-NEXT: srli a1, a1, 8
+; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: slli a1, a3, 11
+; RV32-NEXT: bgez a1, .LBB16_22
+; RV32-NEXT: .LBB16_161: # %cond.store58
+; RV32-NEXT: li a1, 64
+; RV32-NEXT: li a4, 23
+; RV32-NEXT: slli a4, a4, 9
+; RV32-NEXT: add a4, sp, a4
+; RV32-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v8, (a4)
+; RV32-NEXT: lh a1, 1134(a2)
+; RV32-NEXT: sb a1, 0(a0)
+; RV32-NEXT: srli a1, a1, 8
+; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: slli a1, a3, 10
+; RV32-NEXT: bgez a1, .LBB16_23
+; RV32-NEXT: .LBB16_162: # %cond.store61
+; RV32-NEXT: li a1, 64
+; RV32-NEXT: lui a4, 3
+; RV32-NEXT: addi a4, a4, -640
+; RV32-NEXT: add a4, sp, a4
+; RV32-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v8, (a4)
+; RV32-NEXT: lh a1, 1008(a2)
+; RV32-NEXT: sb a1, 0(a0)
+; RV32-NEXT: srli a1, a1, 8
+; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: slli a1, a3, 9
+; RV32-NEXT: bgez a1, .LBB16_24
+; RV32-NEXT: .LBB16_163: # %cond.store64
+; RV32-NEXT: li a1, 64
+; RV32-NEXT: lui a4, 3
+; RV32-NEXT: addi a4, a4, -768
+; RV32-NEXT: add a4, sp, a4
+; RV32-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v8, (a4)
+; RV32-NEXT: lh a1, 882(a2)
+; RV32-NEXT: sb a1, 0(a0)
+; RV32-NEXT: srli a1, a1, 8
+; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: slli a1, a3, 8
+; RV32-NEXT: bgez a1, .LBB16_25
+; RV32-NEXT: .LBB16_164: # %cond.store67
+; RV32-NEXT: li a1, 64
+; RV32-NEXT: lui a4, 3
+; RV32-NEXT: addi a4, a4, -896
+; RV32-NEXT: add a4, sp, a4
+; RV32-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v8, (a4)
+; RV32-NEXT: lh a1, 756(a2)
+; RV32-NEXT: sb a1, 0(a0)
+; RV32-NEXT: srli a1, a1, 8
+; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: slli a1, a3, 7
+; RV32-NEXT: bgez a1, .LBB16_26
+; RV32-NEXT: .LBB16_165: # %cond.store70
+; RV32-NEXT: li a1, 64
+; RV32-NEXT: li a4, 11
+; RV32-NEXT: slli a4, a4, 10
+; RV32-NEXT: add a4, sp, a4
+; RV32-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v8, (a4)
+; RV32-NEXT: lh a1, 630(a2)
+; RV32-NEXT: sb a1, 0(a0)
+; RV32-NEXT: srli a1, a1, 8
+; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: slli a1, a3, 6
+; RV32-NEXT: bgez a1, .LBB16_27
+; RV32-NEXT: .LBB16_166: # %cond.store73
+; RV32-NEXT: li a1, 64
+; RV32-NEXT: lui a4, 3
+; RV32-NEXT: addi a4, a4, -1152
+; RV32-NEXT: add a4, sp, a4
+; RV32-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v8, (a4)
+; RV32-NEXT: lh a1, 504(a2)
+; RV32-NEXT: sb a1, 0(a0)
+; RV32-NEXT: srli a1, a1, 8
+; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: slli a1, a3, 5
+; RV32-NEXT: bgez a1, .LBB16_28
+; RV32-NEXT: .LBB16_167: # %cond.store76
+; RV32-NEXT: li a1, 64
+; RV32-NEXT: lui a4, 3
+; RV32-NEXT: addi a4, a4, -1280
+; RV32-NEXT: add a4, sp, a4
+; RV32-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v8, (a4)
+; RV32-NEXT: lh a1, 378(a2)
+; RV32-NEXT: sb a1, 0(a0)
+; RV32-NEXT: srli a1, a1, 8
+; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: slli a1, a3, 4
+; RV32-NEXT: bgez a1, .LBB16_29
+; RV32-NEXT: .LBB16_168: # %cond.store79
+; RV32-NEXT: li a1, 64
+; RV32-NEXT: lui a4, 3
+; RV32-NEXT: addi a4, a4, -1408
+; RV32-NEXT: add a4, sp, a4
+; RV32-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v8, (a4)
+; RV32-NEXT: lh a1, 252(a2)
+; RV32-NEXT: sb a1, 0(a0)
+; RV32-NEXT: srli a1, a1, 8
+; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: slli a1, a3, 3
+; RV32-NEXT: bltz a1, .LBB16_30
+; RV32-NEXT: j .LBB16_31
+; RV32-NEXT: .LBB16_169: # %cond.store91
+; RV32-NEXT: li a3, 64
+; RV32-NEXT: lui a5, 3
+; RV32-NEXT: addi a5, a5, -1920
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v8, (a5)
+; RV32-NEXT: lh a3, 1890(a4)
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: andi a3, a2, 1
+; RV32-NEXT: beqz a3, .LBB16_37
+; RV32-NEXT: .LBB16_170: # %cond.store94
+; RV32-NEXT: li a3, 64
+; RV32-NEXT: li a5, 5
+; RV32-NEXT: slli a5, a5, 11
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v8, (a5)
+; RV32-NEXT: lh a3, 1764(a4)
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: andi a3, a2, 2
+; RV32-NEXT: beqz a3, .LBB16_38
+; RV32-NEXT: .LBB16_171: # %cond.store97
+; RV32-NEXT: li a3, 64
+; RV32-NEXT: lui a5, 2
+; RV32-NEXT: addi a5, a5, 1920
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v8, (a5)
+; RV32-NEXT: lh a3, 1638(a4)
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: andi a3, a2, 4
+; RV32-NEXT: beqz a3, .LBB16_39
+; RV32-NEXT: .LBB16_172: # %cond.store100
+; RV32-NEXT: li a3, 64
+; RV32-NEXT: lui a5, 2
+; RV32-NEXT: addi a5, a5, 1792
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v8, (a5)
+; RV32-NEXT: lh a3, 1512(a4)
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: andi a3, a2, 8
+; RV32-NEXT: beqz a3, .LBB16_40
+; RV32-NEXT: .LBB16_173: # %cond.store103
+; RV32-NEXT: li a3, 64
+; RV32-NEXT: lui a5, 2
+; RV32-NEXT: addi a5, a5, 1664
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v8, (a5)
+; RV32-NEXT: lh a3, 1386(a4)
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: andi a3, a2, 16
+; RV32-NEXT: beqz a3, .LBB16_41
+; RV32-NEXT: .LBB16_174: # %cond.store106
+; RV32-NEXT: li a3, 64
+; RV32-NEXT: li a5, 19
+; RV32-NEXT: slli a5, a5, 9
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v8, (a5)
+; RV32-NEXT: lh a3, 1260(a4)
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: andi a3, a2, 32
+; RV32-NEXT: beqz a3, .LBB16_42
+; RV32-NEXT: .LBB16_175: # %cond.store109
+; RV32-NEXT: li a3, 64
+; RV32-NEXT: lui a5, 2
+; RV32-NEXT: addi a5, a5, 1408
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v8, (a5)
+; RV32-NEXT: lh a3, 1134(a4)
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: andi a3, a2, 64
+; RV32-NEXT: beqz a3, .LBB16_43
+; RV32-NEXT: .LBB16_176: # %cond.store112
+; RV32-NEXT: li a3, 64
+; RV32-NEXT: lui a5, 2
+; RV32-NEXT: addi a5, a5, 1280
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v8, (a5)
+; RV32-NEXT: lh a3, 1008(a4)
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: andi a3, a2, 128
+; RV32-NEXT: beqz a3, .LBB16_44
+; RV32-NEXT: .LBB16_177: # %cond.store115
+; RV32-NEXT: li a3, 64
+; RV32-NEXT: lui a5, 2
+; RV32-NEXT: addi a5, a5, 1152
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v8, (a5)
+; RV32-NEXT: lh a3, 882(a4)
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: andi a3, a2, 256
+; RV32-NEXT: beqz a3, .LBB16_45
+; RV32-NEXT: .LBB16_178: # %cond.store118
+; RV32-NEXT: li a3, 64
+; RV32-NEXT: li a5, 9
+; RV32-NEXT: slli a5, a5, 10
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v8, (a5)
+; RV32-NEXT: lh a3, 756(a4)
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: andi a3, a2, 512
+; RV32-NEXT: beqz a3, .LBB16_46
+; RV32-NEXT: .LBB16_179: # %cond.store121
+; RV32-NEXT: li a3, 64
+; RV32-NEXT: lui a5, 2
+; RV32-NEXT: addi a5, a5, 896
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v8, (a5)
+; RV32-NEXT: lh a3, 630(a4)
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: andi a3, a2, 1024
+; RV32-NEXT: beqz a3, .LBB16_47
+; RV32-NEXT: .LBB16_180: # %cond.store124
+; RV32-NEXT: li a3, 64
+; RV32-NEXT: lui a5, 2
+; RV32-NEXT: addi a5, a5, 768
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v8, (a5)
+; RV32-NEXT: lh a3, 504(a4)
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: slli a3, a2, 20
+; RV32-NEXT: bgez a3, .LBB16_48
+; RV32-NEXT: .LBB16_181: # %cond.store127
+; RV32-NEXT: li a3, 64
+; RV32-NEXT: lui a5, 2
+; RV32-NEXT: addi a5, a5, 640
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v8, (a5)
+; RV32-NEXT: lh a3, 378(a4)
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: slli a3, a2, 19
+; RV32-NEXT: bgez a3, .LBB16_49
+; RV32-NEXT: .LBB16_182: # %cond.store130
+; RV32-NEXT: li a3, 64
+; RV32-NEXT: li a5, 17
+; RV32-NEXT: slli a5, a5, 9
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v8, (a5)
+; RV32-NEXT: lh a3, 252(a4)
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: slli a3, a2, 18
+; RV32-NEXT: bgez a3, .LBB16_50
+; RV32-NEXT: .LBB16_183: # %cond.store133
+; RV32-NEXT: li a3, 64
+; RV32-NEXT: lui a5, 2
+; RV32-NEXT: addi a5, a5, 384
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v8, (a5)
+; RV32-NEXT: lh a3, 126(a4)
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: slli a3, a2, 17
+; RV32-NEXT: bltz a3, .LBB16_51
+; RV32-NEXT: j .LBB16_52
+; RV32-NEXT: .LBB16_184: # %cond.store139
+; RV32-NEXT: li a3, 64
+; RV32-NEXT: lui a5, 2
+; RV32-NEXT: addi a5, a5, 128
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v8, (a5)
+; RV32-NEXT: lh a3, 2016(a4)
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: slli a3, a2, 15
+; RV32-NEXT: bgez a3, .LBB16_54
+; RV32-NEXT: .LBB16_185: # %cond.store142
+; RV32-NEXT: li a3, 64
+; RV32-NEXT: lui a5, 2
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v8, (a5)
+; RV32-NEXT: lh a3, 1890(a4)
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: slli a3, a2, 14
+; RV32-NEXT: bgez a3, .LBB16_55
+; RV32-NEXT: .LBB16_186: # %cond.store145
+; RV32-NEXT: li a3, 64
+; RV32-NEXT: lui a5, 2
+; RV32-NEXT: addi a5, a5, -128
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v8, (a5)
+; RV32-NEXT: lh a3, 1764(a4)
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: slli a3, a2, 13
+; RV32-NEXT: bgez a3, .LBB16_56
+; RV32-NEXT: .LBB16_187: # %cond.store148
+; RV32-NEXT: li a3, 64
+; RV32-NEXT: li a5, 31
+; RV32-NEXT: slli a5, a5, 8
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v8, (a5)
+; RV32-NEXT: lh a3, 1638(a4)
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: slli a3, a2, 12
+; RV32-NEXT: bgez a3, .LBB16_57
+; RV32-NEXT: .LBB16_188: # %cond.store151
+; RV32-NEXT: li a3, 64
+; RV32-NEXT: lui a5, 2
+; RV32-NEXT: addi a5, a5, -384
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v8, (a5)
+; RV32-NEXT: lh a3, 1512(a4)
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: slli a3, a2, 11
+; RV32-NEXT: bgez a3, .LBB16_58
+; RV32-NEXT: .LBB16_189: # %cond.store154
+; RV32-NEXT: li a3, 64
+; RV32-NEXT: li a5, 15
+; RV32-NEXT: slli a5, a5, 9
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v8, (a5)
+; RV32-NEXT: lh a3, 1386(a4)
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: slli a3, a2, 10
+; RV32-NEXT: bgez a3, .LBB16_59
+; RV32-NEXT: .LBB16_190: # %cond.store157
+; RV32-NEXT: li a3, 64
+; RV32-NEXT: lui a5, 2
+; RV32-NEXT: addi a5, a5, -640
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v8, (a5)
+; RV32-NEXT: lh a3, 1260(a4)
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: slli a3, a2, 9
+; RV32-NEXT: bgez a3, .LBB16_60
+; RV32-NEXT: .LBB16_191: # %cond.store160
+; RV32-NEXT: li a3, 64
+; RV32-NEXT: li a5, 29
+; RV32-NEXT: slli a5, a5, 8
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v8, (a5)
+; RV32-NEXT: lh a3, 1134(a4)
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: slli a3, a2, 8
+; RV32-NEXT: bgez a3, .LBB16_61
+; RV32-NEXT: .LBB16_192: # %cond.store163
+; RV32-NEXT: li a3, 64
+; RV32-NEXT: lui a5, 2
+; RV32-NEXT: addi a5, a5, -896
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v8, (a5)
+; RV32-NEXT: lh a3, 1008(a4)
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: slli a3, a2, 7
+; RV32-NEXT: bgez a3, .LBB16_62
+; RV32-NEXT: .LBB16_193: # %cond.store166
+; RV32-NEXT: li a3, 64
+; RV32-NEXT: li a5, 7
+; RV32-NEXT: slli a5, a5, 10
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v8, (a5)
+; RV32-NEXT: lh a3, 882(a4)
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: slli a3, a2, 6
+; RV32-NEXT: bgez a3, .LBB16_63
+; RV32-NEXT: .LBB16_194: # %cond.store169
+; RV32-NEXT: li a3, 64
+; RV32-NEXT: lui a5, 2
+; RV32-NEXT: addi a5, a5, -1152
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v8, (a5)
+; RV32-NEXT: lh a3, 756(a4)
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: slli a3, a2, 5
+; RV32-NEXT: bgez a3, .LBB16_64
+; RV32-NEXT: .LBB16_195: # %cond.store172
+; RV32-NEXT: li a3, 64
+; RV32-NEXT: li a5, 27
+; RV32-NEXT: slli a5, a5, 8
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v8, (a5)
+; RV32-NEXT: lh a3, 630(a4)
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: slli a3, a2, 4
+; RV32-NEXT: bgez a3, .LBB16_65
+; RV32-NEXT: .LBB16_196: # %cond.store175
+; RV32-NEXT: li a3, 64
+; RV32-NEXT: lui a5, 2
+; RV32-NEXT: addi a5, a5, -1408
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v8, (a5)
+; RV32-NEXT: lh a3, 504(a4)
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: slli a3, a2, 3
+; RV32-NEXT: bgez a3, .LBB16_66
+; RV32-NEXT: .LBB16_197: # %cond.store178
+; RV32-NEXT: li a3, 64
+; RV32-NEXT: li a5, 13
+; RV32-NEXT: slli a5, a5, 9
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v8, (a5)
+; RV32-NEXT: lh a3, 378(a4)
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: slli a3, a2, 2
+; RV32-NEXT: bltz a3, .LBB16_67
+; RV32-NEXT: j .LBB16_68
+; RV32-NEXT: .LBB16_198: # %cond.store187
+; RV32-NEXT: li a2, 64
+; RV32-NEXT: lui a5, 2
+; RV32-NEXT: addi a5, a5, -1920
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a2, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v8, (a5)
+; RV32-NEXT: lh a2, 0(a4)
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: andi a2, a3, 1
+; RV32-NEXT: beqz a2, .LBB16_72
+; RV32-NEXT: .LBB16_199: # %cond.store190
+; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT: vmv.x.s a2, v16
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: andi a2, a3, 2
+; RV32-NEXT: beqz a2, .LBB16_73
+; RV32-NEXT: .LBB16_200: # %cond.store193
+; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v8, v16, 1
+; RV32-NEXT: vmv.x.s a2, v8
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: andi a2, a3, 4
+; RV32-NEXT: beqz a2, .LBB16_74
+; RV32-NEXT: .LBB16_201: # %cond.store196
+; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v8, v16, 2
+; RV32-NEXT: vmv.x.s a2, v8
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: andi a2, a3, 8
+; RV32-NEXT: beqz a2, .LBB16_75
+; RV32-NEXT: .LBB16_202: # %cond.store199
+; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v8, v16, 3
+; RV32-NEXT: vmv.x.s a2, v8
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: andi a2, a3, 16
+; RV32-NEXT: beqz a2, .LBB16_76
+; RV32-NEXT: .LBB16_203: # %cond.store202
+; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v8, v16, 4
+; RV32-NEXT: vmv.x.s a2, v8
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: andi a2, a3, 32
+; RV32-NEXT: beqz a2, .LBB16_77
+; RV32-NEXT: .LBB16_204: # %cond.store205
+; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v8, v16, 5
+; RV32-NEXT: vmv.x.s a2, v8
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: andi a2, a3, 64
+; RV32-NEXT: beqz a2, .LBB16_78
+; RV32-NEXT: .LBB16_205: # %cond.store208
+; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v8, v16, 6
+; RV32-NEXT: vmv.x.s a2, v8
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: andi a2, a3, 128
+; RV32-NEXT: beqz a2, .LBB16_79
+; RV32-NEXT: .LBB16_206: # %cond.store211
+; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v8, v16, 7
+; RV32-NEXT: vmv.x.s a2, v8
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: andi a2, a3, 256
+; RV32-NEXT: beqz a2, .LBB16_80
+; RV32-NEXT: .LBB16_207: # %cond.store214
+; RV32-NEXT: vsetivli zero, 1, e16, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v8, v16, 8
+; RV32-NEXT: vmv.x.s a2, v8
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: andi a2, a3, 512
+; RV32-NEXT: beqz a2, .LBB16_81
+; RV32-NEXT: .LBB16_208: # %cond.store217
+; RV32-NEXT: vsetivli zero, 1, e16, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v8, v16, 9
+; RV32-NEXT: vmv.x.s a2, v8
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: andi a2, a3, 1024
+; RV32-NEXT: beqz a2, .LBB16_82
+; RV32-NEXT: .LBB16_209: # %cond.store220
+; RV32-NEXT: vsetivli zero, 1, e16, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v8, v16, 10
+; RV32-NEXT: vmv.x.s a2, v8
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: slli a2, a3, 20
+; RV32-NEXT: bgez a2, .LBB16_83
+; RV32-NEXT: .LBB16_210: # %cond.store223
+; RV32-NEXT: vsetivli zero, 1, e16, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v8, v16, 11
+; RV32-NEXT: vmv.x.s a2, v8
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: slli a2, a3, 19
+; RV32-NEXT: bgez a2, .LBB16_84
+; RV32-NEXT: .LBB16_211: # %cond.store226
+; RV32-NEXT: vsetivli zero, 1, e16, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v8, v16, 12
+; RV32-NEXT: vmv.x.s a2, v8
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: slli a2, a3, 18
+; RV32-NEXT: bgez a2, .LBB16_85
+; RV32-NEXT: .LBB16_212: # %cond.store229
+; RV32-NEXT: vsetivli zero, 1, e16, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v8, v16, 13
+; RV32-NEXT: vmv.x.s a2, v8
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: slli a2, a3, 17
+; RV32-NEXT: bgez a2, .LBB16_86
+; RV32-NEXT: .LBB16_213: # %cond.store232
+; RV32-NEXT: vsetivli zero, 1, e16, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v8, v16, 14
+; RV32-NEXT: vmv.x.s a2, v8
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: slli a2, a3, 16
+; RV32-NEXT: bgez a2, .LBB16_88
+; RV32-NEXT: j .LBB16_87
+; RV32-NEXT: .LBB16_214: # %cond.store238
+; RV32-NEXT: li a4, 64
+; RV32-NEXT: li a5, 3
+; RV32-NEXT: slli a5, a5, 11
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v16, (a5)
+; RV32-NEXT: lh a4, 2016(a2)
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: srli a4, a4, 8
+; RV32-NEXT: sb a4, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: slli a4, a3, 14
+; RV32-NEXT: bltz a4, .LBB16_215
+; RV32-NEXT: j .LBB16_90
+; RV32-NEXT: .LBB16_215: # %cond.store241
+; RV32-NEXT: li a4, 64
+; RV32-NEXT: lui a5, 1
+; RV32-NEXT: addi a5, a5, 1920
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v16, (a5)
+; RV32-NEXT: lh a4, 1890(a2)
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: srli a4, a4, 8
+; RV32-NEXT: sb a4, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: slli a4, a3, 13
+; RV32-NEXT: bltz a4, .LBB16_216
+; RV32-NEXT: j .LBB16_91
+; RV32-NEXT: .LBB16_216: # %cond.store244
+; RV32-NEXT: li a4, 64
+; RV32-NEXT: li a5, 23
+; RV32-NEXT: slli a5, a5, 8
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v16, (a5)
+; RV32-NEXT: lh a4, 1764(a2)
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: srli a4, a4, 8
+; RV32-NEXT: sb a4, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: slli a4, a3, 12
+; RV32-NEXT: bltz a4, .LBB16_217
+; RV32-NEXT: j .LBB16_92
+; RV32-NEXT: .LBB16_217: # %cond.store247
+; RV32-NEXT: li a4, 64
+; RV32-NEXT: lui a5, 1
+; RV32-NEXT: addi a5, a5, 1664
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v16, (a5)
+; RV32-NEXT: lh a4, 1638(a2)
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: srli a4, a4, 8
+; RV32-NEXT: sb a4, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: slli a4, a3, 11
+; RV32-NEXT: bltz a4, .LBB16_218
+; RV32-NEXT: j .LBB16_93
+; RV32-NEXT: .LBB16_218: # %cond.store250
+; RV32-NEXT: li a4, 64
+; RV32-NEXT: li a5, 11
+; RV32-NEXT: slli a5, a5, 9
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v16, (a5)
+; RV32-NEXT: lh a4, 1512(a2)
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: srli a4, a4, 8
+; RV32-NEXT: sb a4, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: slli a4, a3, 10
+; RV32-NEXT: bltz a4, .LBB16_219
+; RV32-NEXT: j .LBB16_94
+; RV32-NEXT: .LBB16_219: # %cond.store253
+; RV32-NEXT: li a4, 64
+; RV32-NEXT: lui a5, 1
+; RV32-NEXT: addi a5, a5, 1408
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v16, (a5)
+; RV32-NEXT: lh a4, 1386(a2)
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: srli a4, a4, 8
+; RV32-NEXT: sb a4, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: slli a4, a3, 9
+; RV32-NEXT: bltz a4, .LBB16_220
+; RV32-NEXT: j .LBB16_95
+; RV32-NEXT: .LBB16_220: # %cond.store256
+; RV32-NEXT: li a4, 64
+; RV32-NEXT: li a5, 21
+; RV32-NEXT: slli a5, a5, 8
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v16, (a5)
+; RV32-NEXT: lh a4, 1260(a2)
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: srli a4, a4, 8
+; RV32-NEXT: sb a4, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: slli a4, a3, 8
+; RV32-NEXT: bltz a4, .LBB16_221
+; RV32-NEXT: j .LBB16_96
+; RV32-NEXT: .LBB16_221: # %cond.store259
+; RV32-NEXT: li a4, 64
+; RV32-NEXT: lui a5, 1
+; RV32-NEXT: addi a5, a5, 1152
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v16, (a5)
+; RV32-NEXT: lh a4, 1134(a2)
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: srli a4, a4, 8
+; RV32-NEXT: sb a4, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: slli a4, a3, 7
+; RV32-NEXT: bltz a4, .LBB16_222
+; RV32-NEXT: j .LBB16_97
+; RV32-NEXT: .LBB16_222: # %cond.store262
+; RV32-NEXT: li a4, 64
+; RV32-NEXT: li a5, 5
+; RV32-NEXT: slli a5, a5, 10
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v16, (a5)
+; RV32-NEXT: lh a4, 1008(a2)
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: srli a4, a4, 8
+; RV32-NEXT: sb a4, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: slli a4, a3, 6
+; RV32-NEXT: bltz a4, .LBB16_223
+; RV32-NEXT: j .LBB16_98
+; RV32-NEXT: .LBB16_223: # %cond.store265
+; RV32-NEXT: li a4, 64
+; RV32-NEXT: lui a5, 1
+; RV32-NEXT: addi a5, a5, 896
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v16, (a5)
+; RV32-NEXT: lh a4, 882(a2)
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: srli a4, a4, 8
+; RV32-NEXT: sb a4, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: slli a4, a3, 5
+; RV32-NEXT: bltz a4, .LBB16_224
+; RV32-NEXT: j .LBB16_99
+; RV32-NEXT: .LBB16_224: # %cond.store268
+; RV32-NEXT: li a4, 64
+; RV32-NEXT: li a5, 19
+; RV32-NEXT: slli a5, a5, 8
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v16, (a5)
+; RV32-NEXT: lh a4, 756(a2)
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: srli a4, a4, 8
+; RV32-NEXT: sb a4, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: slli a4, a3, 4
+; RV32-NEXT: bltz a4, .LBB16_225
+; RV32-NEXT: j .LBB16_100
+; RV32-NEXT: .LBB16_225: # %cond.store271
+; RV32-NEXT: li a4, 64
+; RV32-NEXT: lui a5, 1
+; RV32-NEXT: addi a5, a5, 640
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v16, (a5)
+; RV32-NEXT: lh a4, 630(a2)
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: srli a4, a4, 8
+; RV32-NEXT: sb a4, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: slli a4, a3, 3
+; RV32-NEXT: bltz a4, .LBB16_226
+; RV32-NEXT: j .LBB16_101
+; RV32-NEXT: .LBB16_226: # %cond.store274
+; RV32-NEXT: li a4, 64
+; RV32-NEXT: li a5, 9
+; RV32-NEXT: slli a5, a5, 9
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: vsetvli zero, a4, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v16, (a5)
+; RV32-NEXT: lh a4, 504(a2)
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: srli a4, a4, 8
+; RV32-NEXT: sb a4, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: slli a4, a3, 2
+; RV32-NEXT: bgez a4, .LBB16_257
+; RV32-NEXT: j .LBB16_102
+; RV32-NEXT: .LBB16_257: # %cond.store274
+; RV32-NEXT: j .LBB16_103
+; RV32-NEXT: .LBB16_227: # %cond.store289
+; RV32-NEXT: li a3, 64
+; RV32-NEXT: addi a4, sp, 2047
+; RV32-NEXT: addi a4, a4, 1921
+; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v16, (a4)
+; RV32-NEXT: lh a3, 2016(a2)
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: andi a3, a1, 4
+; RV32-NEXT: bnez a3, .LBB16_228
+; RV32-NEXT: j .LBB16_111
+; RV32-NEXT: .LBB16_228: # %cond.store292
+; RV32-NEXT: li a3, 64
+; RV32-NEXT: addi a4, sp, 2047
+; RV32-NEXT: addi a4, a4, 1793
+; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v16, (a4)
+; RV32-NEXT: lh a3, 1890(a2)
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: andi a3, a1, 8
+; RV32-NEXT: bnez a3, .LBB16_229
+; RV32-NEXT: j .LBB16_112
+; RV32-NEXT: .LBB16_229: # %cond.store295
+; RV32-NEXT: li a3, 64
+; RV32-NEXT: addi a4, sp, 2047
+; RV32-NEXT: addi a4, a4, 1665
+; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v16, (a4)
+; RV32-NEXT: lh a3, 1764(a2)
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: andi a3, a1, 16
+; RV32-NEXT: bnez a3, .LBB16_230
+; RV32-NEXT: j .LBB16_113
+; RV32-NEXT: .LBB16_230: # %cond.store298
+; RV32-NEXT: li a3, 64
+; RV32-NEXT: addi a4, sp, 2047
+; RV32-NEXT: addi a4, a4, 1537
+; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v16, (a4)
+; RV32-NEXT: lh a3, 1638(a2)
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: andi a3, a1, 32
+; RV32-NEXT: bnez a3, .LBB16_231
+; RV32-NEXT: j .LBB16_114
+; RV32-NEXT: .LBB16_231: # %cond.store301
+; RV32-NEXT: li a3, 64
+; RV32-NEXT: addi a4, sp, 2047
+; RV32-NEXT: addi a4, a4, 1409
+; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v16, (a4)
+; RV32-NEXT: lh a3, 1512(a2)
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: andi a3, a1, 64
+; RV32-NEXT: bnez a3, .LBB16_232
+; RV32-NEXT: j .LBB16_115
+; RV32-NEXT: .LBB16_232: # %cond.store304
+; RV32-NEXT: li a3, 64
+; RV32-NEXT: addi a4, sp, 2047
+; RV32-NEXT: addi a4, a4, 1281
+; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v16, (a4)
+; RV32-NEXT: lh a3, 1386(a2)
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: andi a3, a1, 128
+; RV32-NEXT: bnez a3, .LBB16_233
+; RV32-NEXT: j .LBB16_116
+; RV32-NEXT: .LBB16_233: # %cond.store307
+; RV32-NEXT: li a3, 64
+; RV32-NEXT: addi a4, sp, 2047
+; RV32-NEXT: addi a4, a4, 1153
+; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v16, (a4)
+; RV32-NEXT: lh a3, 1260(a2)
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: andi a3, a1, 256
+; RV32-NEXT: bnez a3, .LBB16_234
+; RV32-NEXT: j .LBB16_117
+; RV32-NEXT: .LBB16_234: # %cond.store310
+; RV32-NEXT: li a3, 64
+; RV32-NEXT: addi a4, sp, 2047
+; RV32-NEXT: addi a4, a4, 1025
+; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v16, (a4)
+; RV32-NEXT: lh a3, 1134(a2)
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: andi a3, a1, 512
+; RV32-NEXT: bnez a3, .LBB16_235
+; RV32-NEXT: j .LBB16_118
+; RV32-NEXT: .LBB16_235: # %cond.store313
+; RV32-NEXT: li a3, 64
+; RV32-NEXT: addi a4, sp, 2047
+; RV32-NEXT: addi a4, a4, 897
+; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v16, (a4)
+; RV32-NEXT: lh a3, 1008(a2)
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: andi a3, a1, 1024
+; RV32-NEXT: bnez a3, .LBB16_236
+; RV32-NEXT: j .LBB16_119
+; RV32-NEXT: .LBB16_236: # %cond.store316
+; RV32-NEXT: li a3, 64
+; RV32-NEXT: addi a4, sp, 2047
+; RV32-NEXT: addi a4, a4, 769
+; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v16, (a4)
+; RV32-NEXT: lh a3, 882(a2)
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: slli a3, a1, 20
+; RV32-NEXT: bltz a3, .LBB16_237
+; RV32-NEXT: j .LBB16_120
+; RV32-NEXT: .LBB16_237: # %cond.store319
+; RV32-NEXT: li a3, 64
+; RV32-NEXT: addi a4, sp, 2047
+; RV32-NEXT: addi a4, a4, 641
+; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v16, (a4)
+; RV32-NEXT: lh a3, 756(a2)
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: slli a3, a1, 19
+; RV32-NEXT: bltz a3, .LBB16_238
+; RV32-NEXT: j .LBB16_121
+; RV32-NEXT: .LBB16_238: # %cond.store322
+; RV32-NEXT: li a3, 64
+; RV32-NEXT: addi a4, sp, 2047
+; RV32-NEXT: addi a4, a4, 513
+; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v16, (a4)
+; RV32-NEXT: lh a3, 630(a2)
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: slli a3, a1, 18
+; RV32-NEXT: bltz a3, .LBB16_239
+; RV32-NEXT: j .LBB16_122
+; RV32-NEXT: .LBB16_239: # %cond.store325
+; RV32-NEXT: li a3, 64
+; RV32-NEXT: addi a4, sp, 2047
+; RV32-NEXT: addi a4, a4, 385
+; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v16, (a4)
+; RV32-NEXT: lh a3, 504(a2)
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: slli a3, a1, 17
+; RV32-NEXT: bltz a3, .LBB16_240
+; RV32-NEXT: j .LBB16_123
+; RV32-NEXT: .LBB16_240: # %cond.store328
+; RV32-NEXT: li a3, 64
+; RV32-NEXT: addi a4, sp, 2047
+; RV32-NEXT: addi a4, a4, 257
+; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v16, (a4)
+; RV32-NEXT: lh a3, 378(a2)
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: slli a3, a1, 16
+; RV32-NEXT: bltz a3, .LBB16_241
+; RV32-NEXT: j .LBB16_124
+; RV32-NEXT: .LBB16_241: # %cond.store331
+; RV32-NEXT: li a3, 64
+; RV32-NEXT: addi a4, sp, 2047
+; RV32-NEXT: addi a4, a4, 129
+; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v16, (a4)
+; RV32-NEXT: lh a3, 252(a2)
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: slli a3, a1, 15
+; RV32-NEXT: bltz a3, .LBB16_242
+; RV32-NEXT: j .LBB16_125
+; RV32-NEXT: .LBB16_242: # %cond.store334
+; RV32-NEXT: li a3, 64
+; RV32-NEXT: addi a4, sp, 2047
+; RV32-NEXT: addi a4, a4, 1
+; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v16, (a4)
+; RV32-NEXT: lh a3, 126(a2)
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: slli a3, a1, 14
+; RV32-NEXT: bltz a3, .LBB16_243
+; RV32-NEXT: j .LBB16_126
+; RV32-NEXT: .LBB16_243: # %cond.store337
+; RV32-NEXT: li a3, 64
+; RV32-NEXT: addi a4, sp, 1920
+; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v16, (a4)
+; RV32-NEXT: lh a2, 0(a2)
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: slli a2, a1, 13
+; RV32-NEXT: bltz a2, .LBB16_244
+; RV32-NEXT: j .LBB16_127
+; RV32-NEXT: .LBB16_244: # %cond.store340
+; RV32-NEXT: li a2, 64
+; RV32-NEXT: addi a3, sp, 1792
+; RV32-NEXT: vsetvli zero, a2, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v16, (a3)
+; RV32-NEXT: lh a2, 1892(sp)
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: slli a2, a1, 12
+; RV32-NEXT: bltz a2, .LBB16_245
+; RV32-NEXT: j .LBB16_128
+; RV32-NEXT: .LBB16_245: # %cond.store343
+; RV32-NEXT: li a2, 64
+; RV32-NEXT: addi a3, sp, 1664
+; RV32-NEXT: vsetvli zero, a2, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v16, (a3)
+; RV32-NEXT: lh a2, 1766(sp)
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: slli a2, a1, 11
+; RV32-NEXT: bltz a2, .LBB16_246
+; RV32-NEXT: j .LBB16_129
+; RV32-NEXT: .LBB16_246: # %cond.store346
+; RV32-NEXT: li a2, 64
+; RV32-NEXT: addi a3, sp, 1536
+; RV32-NEXT: vsetvli zero, a2, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v16, (a3)
+; RV32-NEXT: lh a2, 1640(sp)
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: slli a2, a1, 10
+; RV32-NEXT: bltz a2, .LBB16_247
+; RV32-NEXT: j .LBB16_130
+; RV32-NEXT: .LBB16_247: # %cond.store349
+; RV32-NEXT: li a2, 64
+; RV32-NEXT: addi a3, sp, 1408
+; RV32-NEXT: vsetvli zero, a2, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v16, (a3)
+; RV32-NEXT: lh a2, 1514(sp)
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: slli a2, a1, 9
+; RV32-NEXT: bltz a2, .LBB16_248
+; RV32-NEXT: j .LBB16_131
+; RV32-NEXT: .LBB16_248: # %cond.store352
+; RV32-NEXT: li a2, 64
+; RV32-NEXT: addi a3, sp, 1280
+; RV32-NEXT: vsetvli zero, a2, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v16, (a3)
+; RV32-NEXT: lh a2, 1388(sp)
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: slli a2, a1, 8
+; RV32-NEXT: bltz a2, .LBB16_249
+; RV32-NEXT: j .LBB16_132
+; RV32-NEXT: .LBB16_249: # %cond.store355
+; RV32-NEXT: li a2, 64
+; RV32-NEXT: addi a3, sp, 1152
+; RV32-NEXT: vsetvli zero, a2, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v16, (a3)
+; RV32-NEXT: lh a2, 1262(sp)
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: slli a2, a1, 7
+; RV32-NEXT: bltz a2, .LBB16_250
+; RV32-NEXT: j .LBB16_133
+; RV32-NEXT: .LBB16_250: # %cond.store358
+; RV32-NEXT: li a2, 64
+; RV32-NEXT: addi a3, sp, 1024
+; RV32-NEXT: vsetvli zero, a2, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v16, (a3)
+; RV32-NEXT: lh a2, 1136(sp)
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: slli a2, a1, 6
+; RV32-NEXT: bltz a2, .LBB16_251
+; RV32-NEXT: j .LBB16_134
+; RV32-NEXT: .LBB16_251: # %cond.store361
+; RV32-NEXT: li a2, 64
+; RV32-NEXT: addi a3, sp, 896
+; RV32-NEXT: vsetvli zero, a2, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v16, (a3)
+; RV32-NEXT: lh a2, 1010(sp)
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: slli a2, a1, 5
+; RV32-NEXT: bltz a2, .LBB16_252
+; RV32-NEXT: j .LBB16_135
+; RV32-NEXT: .LBB16_252: # %cond.store364
+; RV32-NEXT: li a2, 64
+; RV32-NEXT: addi a3, sp, 768
+; RV32-NEXT: vsetvli zero, a2, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v16, (a3)
+; RV32-NEXT: lh a2, 884(sp)
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: slli a2, a1, 4
+; RV32-NEXT: bltz a2, .LBB16_253
+; RV32-NEXT: j .LBB16_136
+; RV32-NEXT: .LBB16_253: # %cond.store367
+; RV32-NEXT: li a2, 64
+; RV32-NEXT: addi a3, sp, 640
+; RV32-NEXT: vsetvli zero, a2, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v16, (a3)
+; RV32-NEXT: lh a2, 758(sp)
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: slli a2, a1, 3
+; RV32-NEXT: bltz a2, .LBB16_254
+; RV32-NEXT: j .LBB16_137
+; RV32-NEXT: .LBB16_254: # %cond.store370
+; RV32-NEXT: li a2, 64
+; RV32-NEXT: addi a3, sp, 512
+; RV32-NEXT: vsetvli zero, a2, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v16, (a3)
+; RV32-NEXT: lh a2, 632(sp)
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: slli a2, a1, 2
+; RV32-NEXT: bltz a2, .LBB16_255
+; RV32-NEXT: j .LBB16_138
+; RV32-NEXT: .LBB16_255: # %cond.store373
+; RV32-NEXT: li a2, 64
+; RV32-NEXT: addi a3, sp, 384
+; RV32-NEXT: vsetvli zero, a2, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v16, (a3)
+; RV32-NEXT: lh a2, 506(sp)
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: slli a2, a1, 1
+; RV32-NEXT: bltz a2, .LBB16_256
+; RV32-NEXT: j .LBB16_139
+; RV32-NEXT: .LBB16_256: # %cond.store376
+; RV32-NEXT: li a2, 64
+; RV32-NEXT: addi a3, sp, 256
+; RV32-NEXT: vsetvli zero, a2, e16, m8, ta, ma
+; RV32-NEXT: vse16.v v16, (a3)
+; RV32-NEXT: lh a2, 380(sp)
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: bgez a1, .LBB16_258
+; RV32-NEXT: j .LBB16_140
+; RV32-NEXT: .LBB16_258: # %cond.store376
+; RV32-NEXT: j .LBB16_141
+entry:
+ tail call void @llvm.masked.compressstore.v128i16(<128 x i16> %data, ptr %p, <128 x i1> %mask)
+ ret void
+}
+
+; Compress + store for i32 type
+
+define void @test_compresstore_i32_v1(ptr %p, <1 x i1> %mask, <1 x i32> %data) {
+; RV64-LABEL: test_compresstore_i32_v1:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; RV64-NEXT: vcompress.vm v9, v8, v0
+; RV64-NEXT: vcpop.m a1, v0
+; RV64-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; RV64-NEXT: vse32.v v9, (a0)
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_compresstore_i32_v1:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; RV32-NEXT: vcompress.vm v9, v8, v0
+; RV32-NEXT: vcpop.m a1, v0
+; RV32-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; RV32-NEXT: vse32.v v9, (a0)
+; RV32-NEXT: ret
+entry:
+ tail call void @llvm.masked.compressstore.v1i32(<1 x i32> %data, ptr %p, <1 x i1> %mask)
+ ret void
+}
+
+define void @test_compresstore_i32_v2(ptr %p, <2 x i1> %mask, <2 x i32> %data) {
+; RV64-LABEL: test_compresstore_i32_v2:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; RV64-NEXT: vcompress.vm v9, v8, v0
+; RV64-NEXT: vcpop.m a1, v0
+; RV64-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; RV64-NEXT: vse32.v v9, (a0)
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_compresstore_i32_v2:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; RV32-NEXT: vcompress.vm v9, v8, v0
+; RV32-NEXT: vcpop.m a1, v0
+; RV32-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; RV32-NEXT: vse32.v v9, (a0)
+; RV32-NEXT: ret
+entry:
+ tail call void @llvm.masked.compressstore.v2i32(<2 x i32> %data, ptr %p, <2 x i1> %mask)
+ ret void
+}
+
+define void @test_compresstore_i32_v4(ptr %p, <4 x i1> %mask, <4 x i32> %data) {
+; RV64-LABEL: test_compresstore_i32_v4:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV64-NEXT: vcompress.vm v9, v8, v0
+; RV64-NEXT: vcpop.m a1, v0
+; RV64-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; RV64-NEXT: vse32.v v9, (a0)
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_compresstore_i32_v4:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV32-NEXT: vcompress.vm v9, v8, v0
+; RV32-NEXT: vcpop.m a1, v0
+; RV32-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; RV32-NEXT: vse32.v v9, (a0)
+; RV32-NEXT: ret
+entry:
+ tail call void @llvm.masked.compressstore.v4i32(<4 x i32> %data, ptr %p, <4 x i1> %mask)
+ ret void
+}
+
+define void @test_compresstore_i32_v8(ptr %p, <8 x i1> %mask, <8 x i32> %data) {
+; RV64-LABEL: test_compresstore_i32_v8:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV64-NEXT: vcompress.vm v10, v8, v0
+; RV64-NEXT: vcpop.m a1, v0
+; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; RV64-NEXT: vse32.v v10, (a0)
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_compresstore_i32_v8:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV32-NEXT: vcompress.vm v10, v8, v0
+; RV32-NEXT: vcpop.m a1, v0
+; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; RV32-NEXT: vse32.v v10, (a0)
+; RV32-NEXT: ret
+entry:
+ tail call void @llvm.masked.compressstore.v8i32(<8 x i32> %data, ptr %p, <8 x i1> %mask)
+ ret void
+}
+
+define void @test_compresstore_i32_v16(ptr %p, <16 x i1> %mask, <16 x i32> %data) {
+; RV64-LABEL: test_compresstore_i32_v16:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma
+; RV64-NEXT: vcompress.vm v12, v8, v0
+; RV64-NEXT: vcpop.m a1, v0
+; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; RV64-NEXT: vse32.v v12, (a0)
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_compresstore_i32_v16:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
+; RV32-NEXT: vcompress.vm v12, v8, v0
+; RV32-NEXT: vcpop.m a1, v0
+; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT: vse32.v v12, (a0)
+; RV32-NEXT: ret
+entry:
+ tail call void @llvm.masked.compressstore.v16i32(<16 x i32> %data, ptr %p, <16 x i1> %mask)
+ ret void
+}
+
+define void @test_compresstore_i32_v32(ptr %p, <32 x i1> %mask, <32 x i32> %data) {
+; RV64-LABEL: test_compresstore_i32_v32:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: li a1, 32
+; RV64-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; RV64-NEXT: vcompress.vm v16, v8, v0
+; RV64-NEXT: vcpop.m a1, v0
+; RV64-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; RV64-NEXT: vse32.v v16, (a0)
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_compresstore_i32_v32:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: li a1, 32
+; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; RV32-NEXT: vcompress.vm v16, v8, v0
+; RV32-NEXT: vcpop.m a1, v0
+; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; RV32-NEXT: vse32.v v16, (a0)
+; RV32-NEXT: ret
+entry:
+ tail call void @llvm.masked.compressstore.v32i32(<32 x i32> %data, ptr %p, <32 x i1> %mask)
+ ret void
+}
+
+define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data) {
+; RV64-LABEL: test_compresstore_i32_v64:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vmv.x.s a1, v0
+; RV64-NEXT: andi a2, a1, 1
+; RV64-NEXT: bnez a2, .LBB23_69
+; RV64-NEXT: # %bb.1: # %else
+; RV64-NEXT: andi a2, a1, 2
+; RV64-NEXT: bnez a2, .LBB23_70
+; RV64-NEXT: .LBB23_2: # %else2
+; RV64-NEXT: andi a2, a1, 4
+; RV64-NEXT: bnez a2, .LBB23_71
+; RV64-NEXT: .LBB23_3: # %else5
+; RV64-NEXT: andi a2, a1, 8
+; RV64-NEXT: bnez a2, .LBB23_72
+; RV64-NEXT: .LBB23_4: # %else8
+; RV64-NEXT: andi a2, a1, 16
+; RV64-NEXT: bnez a2, .LBB23_73
+; RV64-NEXT: .LBB23_5: # %else11
+; RV64-NEXT: andi a2, a1, 32
+; RV64-NEXT: bnez a2, .LBB23_74
+; RV64-NEXT: .LBB23_6: # %else14
+; RV64-NEXT: andi a2, a1, 64
+; RV64-NEXT: bnez a2, .LBB23_75
+; RV64-NEXT: .LBB23_7: # %else17
+; RV64-NEXT: andi a2, a1, 128
+; RV64-NEXT: beqz a2, .LBB23_9
+; RV64-NEXT: .LBB23_8: # %cond.store19
+; RV64-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV64-NEXT: vslidedown.vi v24, v8, 7
+; RV64-NEXT: vmv.x.s a2, v24
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: srli a3, a2, 24
+; RV64-NEXT: sb a3, 3(a0)
+; RV64-NEXT: srli a3, a2, 16
+; RV64-NEXT: sb a3, 2(a0)
+; RV64-NEXT: srli a2, a2, 8
+; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: addi a0, a0, 4
+; RV64-NEXT: .LBB23_9: # %else20
+; RV64-NEXT: addi sp, sp, -2032
+; RV64-NEXT: .cfi_def_cfa_offset 2032
+; RV64-NEXT: sd ra, 2024(sp) # 8-byte Folded Spill
+; RV64-NEXT: sd s0, 2016(sp) # 8-byte Folded Spill
+; RV64-NEXT: .cfi_offset ra, -8
+; RV64-NEXT: .cfi_offset s0, -16
+; RV64-NEXT: addi s0, sp, 2032
+; RV64-NEXT: .cfi_def_cfa s0, 0
+; RV64-NEXT: lui a2, 1
+; RV64-NEXT: addiw a2, a2, 272
+; RV64-NEXT: sub sp, sp, a2
+; RV64-NEXT: andi sp, sp, -128
+; RV64-NEXT: andi a2, a1, 256
+; RV64-NEXT: beqz a2, .LBB23_11
+; RV64-NEXT: # %bb.10: # %cond.store22
+; RV64-NEXT: li a2, 32
+; RV64-NEXT: li a3, 3
+; RV64-NEXT: slli a3, a3, 11
+; RV64-NEXT: add a3, sp, a3
+; RV64-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; RV64-NEXT: vse32.v v8, (a3)
+; RV64-NEXT: lui a2, 2
+; RV64-NEXT: add a2, sp, a2
+; RV64-NEXT: lw a2, -2016(a2)
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: srli a3, a2, 24
+; RV64-NEXT: sb a3, 3(a0)
+; RV64-NEXT: srli a3, a2, 16
+; RV64-NEXT: sb a3, 2(a0)
+; RV64-NEXT: srli a2, a2, 8
+; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: addi a0, a0, 4
+; RV64-NEXT: .LBB23_11: # %else23
+; RV64-NEXT: andi a3, a1, 512
+; RV64-NEXT: addi a2, sp, 2047
+; RV64-NEXT: addi a2, a2, 2021
+; RV64-NEXT: bnez a3, .LBB23_76
+; RV64-NEXT: # %bb.12: # %else26
+; RV64-NEXT: andi a3, a1, 1024
+; RV64-NEXT: bnez a3, .LBB23_77
+; RV64-NEXT: .LBB23_13: # %else29
+; RV64-NEXT: slli a3, a1, 52
+; RV64-NEXT: bltz a3, .LBB23_78
+; RV64-NEXT: .LBB23_14: # %else32
+; RV64-NEXT: slli a3, a1, 51
+; RV64-NEXT: bltz a3, .LBB23_79
+; RV64-NEXT: .LBB23_15: # %else35
+; RV64-NEXT: slli a3, a1, 50
+; RV64-NEXT: bltz a3, .LBB23_80
+; RV64-NEXT: .LBB23_16: # %else38
+; RV64-NEXT: slli a3, a1, 49
+; RV64-NEXT: bltz a3, .LBB23_81
+; RV64-NEXT: .LBB23_17: # %else41
+; RV64-NEXT: slli a3, a1, 48
+; RV64-NEXT: bltz a3, .LBB23_82
+; RV64-NEXT: .LBB23_18: # %else44
+; RV64-NEXT: slli a3, a1, 47
+; RV64-NEXT: bltz a3, .LBB23_83
+; RV64-NEXT: .LBB23_19: # %else47
+; RV64-NEXT: slli a3, a1, 46
+; RV64-NEXT: bltz a3, .LBB23_84
+; RV64-NEXT: .LBB23_20: # %else50
+; RV64-NEXT: slli a3, a1, 45
+; RV64-NEXT: bltz a3, .LBB23_85
+; RV64-NEXT: .LBB23_21: # %else53
+; RV64-NEXT: slli a3, a1, 44
+; RV64-NEXT: bltz a3, .LBB23_86
+; RV64-NEXT: .LBB23_22: # %else56
+; RV64-NEXT: slli a3, a1, 43
+; RV64-NEXT: bltz a3, .LBB23_87
+; RV64-NEXT: .LBB23_23: # %else59
+; RV64-NEXT: slli a3, a1, 42
+; RV64-NEXT: bltz a3, .LBB23_88
+; RV64-NEXT: .LBB23_24: # %else62
+; RV64-NEXT: slli a3, a1, 41
+; RV64-NEXT: bltz a3, .LBB23_89
+; RV64-NEXT: .LBB23_25: # %else65
+; RV64-NEXT: slli a3, a1, 40
+; RV64-NEXT: bltz a3, .LBB23_90
+; RV64-NEXT: .LBB23_26: # %else68
+; RV64-NEXT: slli a3, a1, 39
+; RV64-NEXT: bltz a3, .LBB23_91
+; RV64-NEXT: .LBB23_27: # %else71
+; RV64-NEXT: slli a3, a1, 38
+; RV64-NEXT: bgez a3, .LBB23_29
+; RV64-NEXT: .LBB23_28: # %cond.store73
+; RV64-NEXT: li a3, 32
+; RV64-NEXT: addi a4, sp, 2047
+; RV64-NEXT: addi a4, a4, 1921
+; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; RV64-NEXT: vse32.v v8, (a4)
+; RV64-NEXT: lw a2, 0(a2)
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: srli a3, a2, 24
+; RV64-NEXT: sb a3, 3(a0)
+; RV64-NEXT: srli a3, a2, 16
+; RV64-NEXT: sb a3, 2(a0)
+; RV64-NEXT: srli a2, a2, 8
+; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: addi a0, a0, 4
+; RV64-NEXT: .LBB23_29: # %else74
+; RV64-NEXT: slli a3, a1, 37
+; RV64-NEXT: addi a2, sp, 1988
+; RV64-NEXT: bltz a3, .LBB23_92
+; RV64-NEXT: # %bb.30: # %else77
+; RV64-NEXT: slli a3, a1, 36
+; RV64-NEXT: bltz a3, .LBB23_93
+; RV64-NEXT: .LBB23_31: # %else80
+; RV64-NEXT: slli a3, a1, 35
+; RV64-NEXT: bltz a3, .LBB23_94
+; RV64-NEXT: .LBB23_32: # %else83
+; RV64-NEXT: slli a3, a1, 34
+; RV64-NEXT: bltz a3, .LBB23_95
+; RV64-NEXT: .LBB23_33: # %else86
+; RV64-NEXT: slli a3, a1, 33
+; RV64-NEXT: bltz a3, .LBB23_96
+; RV64-NEXT: .LBB23_34: # %else89
+; RV64-NEXT: slli a3, a1, 32
+; RV64-NEXT: bltz a3, .LBB23_97
+; RV64-NEXT: .LBB23_35: # %else92
+; RV64-NEXT: slli a3, a1, 31
+; RV64-NEXT: bltz a3, .LBB23_98
+; RV64-NEXT: .LBB23_36: # %else95
+; RV64-NEXT: slli a3, a1, 30
+; RV64-NEXT: bltz a3, .LBB23_99
+; RV64-NEXT: .LBB23_37: # %else98
+; RV64-NEXT: slli a3, a1, 29
+; RV64-NEXT: bltz a3, .LBB23_100
+; RV64-NEXT: .LBB23_38: # %else101
+; RV64-NEXT: slli a3, a1, 28
+; RV64-NEXT: bltz a3, .LBB23_101
+; RV64-NEXT: .LBB23_39: # %else104
+; RV64-NEXT: slli a3, a1, 27
+; RV64-NEXT: bltz a3, .LBB23_102
+; RV64-NEXT: .LBB23_40: # %else107
+; RV64-NEXT: slli a3, a1, 26
+; RV64-NEXT: bltz a3, .LBB23_103
+; RV64-NEXT: .LBB23_41: # %else110
+; RV64-NEXT: slli a3, a1, 25
+; RV64-NEXT: bltz a3, .LBB23_104
+; RV64-NEXT: .LBB23_42: # %else113
+; RV64-NEXT: slli a3, a1, 24
+; RV64-NEXT: bltz a3, .LBB23_105
+; RV64-NEXT: .LBB23_43: # %else116
+; RV64-NEXT: slli a3, a1, 23
+; RV64-NEXT: bltz a3, .LBB23_106
+; RV64-NEXT: .LBB23_44: # %else119
+; RV64-NEXT: slli a3, a1, 22
+; RV64-NEXT: bltz a3, .LBB23_107
+; RV64-NEXT: .LBB23_45: # %else122
+; RV64-NEXT: slli a3, a1, 21
+; RV64-NEXT: bltz a3, .LBB23_108
+; RV64-NEXT: .LBB23_46: # %else125
+; RV64-NEXT: slli a3, a1, 20
+; RV64-NEXT: bltz a3, .LBB23_109
+; RV64-NEXT: .LBB23_47: # %else128
+; RV64-NEXT: slli a3, a1, 19
+; RV64-NEXT: bltz a3, .LBB23_110
+; RV64-NEXT: .LBB23_48: # %else131
+; RV64-NEXT: slli a3, a1, 18
+; RV64-NEXT: bltz a3, .LBB23_111
+; RV64-NEXT: .LBB23_49: # %else134
+; RV64-NEXT: slli a3, a1, 17
+; RV64-NEXT: bltz a3, .LBB23_112
+; RV64-NEXT: .LBB23_50: # %else137
+; RV64-NEXT: slli a3, a1, 16
+; RV64-NEXT: bltz a3, .LBB23_113
+; RV64-NEXT: .LBB23_51: # %else140
+; RV64-NEXT: slli a3, a1, 15
+; RV64-NEXT: bltz a3, .LBB23_114
+; RV64-NEXT: .LBB23_52: # %else143
+; RV64-NEXT: slli a3, a1, 14
+; RV64-NEXT: bltz a3, .LBB23_115
+; RV64-NEXT: .LBB23_53: # %else146
+; RV64-NEXT: slli a2, a1, 13
+; RV64-NEXT: bltz a2, .LBB23_116
+; RV64-NEXT: .LBB23_54: # %else149
+; RV64-NEXT: slli a2, a1, 12
+; RV64-NEXT: bltz a2, .LBB23_117
+; RV64-NEXT: .LBB23_55: # %else152
+; RV64-NEXT: slli a2, a1, 11
+; RV64-NEXT: bltz a2, .LBB23_118
+; RV64-NEXT: .LBB23_56: # %else155
+; RV64-NEXT: slli a2, a1, 10
+; RV64-NEXT: bltz a2, .LBB23_119
+; RV64-NEXT: .LBB23_57: # %else158
+; RV64-NEXT: slli a2, a1, 9
+; RV64-NEXT: bltz a2, .LBB23_120
+; RV64-NEXT: .LBB23_58: # %else161
+; RV64-NEXT: slli a2, a1, 8
+; RV64-NEXT: bltz a2, .LBB23_121
+; RV64-NEXT: .LBB23_59: # %else164
+; RV64-NEXT: slli a2, a1, 7
+; RV64-NEXT: bltz a2, .LBB23_122
+; RV64-NEXT: .LBB23_60: # %else167
+; RV64-NEXT: slli a2, a1, 6
+; RV64-NEXT: bltz a2, .LBB23_123
+; RV64-NEXT: .LBB23_61: # %else170
+; RV64-NEXT: slli a2, a1, 5
+; RV64-NEXT: bltz a2, .LBB23_124
+; RV64-NEXT: .LBB23_62: # %else173
+; RV64-NEXT: slli a2, a1, 4
+; RV64-NEXT: bltz a2, .LBB23_125
+; RV64-NEXT: .LBB23_63: # %else176
+; RV64-NEXT: slli a2, a1, 3
+; RV64-NEXT: bltz a2, .LBB23_126
+; RV64-NEXT: .LBB23_64: # %else179
+; RV64-NEXT: slli a2, a1, 2
+; RV64-NEXT: bltz a2, .LBB23_127
+; RV64-NEXT: .LBB23_65: # %else182
+; RV64-NEXT: slli a2, a1, 1
+; RV64-NEXT: bltz a2, .LBB23_128
+; RV64-NEXT: .LBB23_66: # %else185
+; RV64-NEXT: bgez a1, .LBB23_68
+; RV64-NEXT: .LBB23_67: # %cond.store187
+; RV64-NEXT: li a1, 32
+; RV64-NEXT: addi a2, sp, 128
+; RV64-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; RV64-NEXT: vse32.v v16, (a2)
+; RV64-NEXT: lw a1, 252(sp)
+; RV64-NEXT: sb a1, 0(a0)
+; RV64-NEXT: srli a2, a1, 24
+; RV64-NEXT: sb a2, 3(a0)
+; RV64-NEXT: srli a2, a1, 16
+; RV64-NEXT: sb a2, 2(a0)
+; RV64-NEXT: srli a1, a1, 8
+; RV64-NEXT: sb a1, 1(a0)
+; RV64-NEXT: .LBB23_68: # %else188
+; RV64-NEXT: li a0, 25
+; RV64-NEXT: slli a0, a0, 8
+; RV64-NEXT: sub sp, s0, a0
+; RV64-NEXT: lui a0, 1
+; RV64-NEXT: addiw a0, a0, 272
+; RV64-NEXT: add sp, sp, a0
+; RV64-NEXT: ld ra, 2024(sp) # 8-byte Folded Reload
+; RV64-NEXT: ld s0, 2016(sp) # 8-byte Folded Reload
+; RV64-NEXT: addi sp, sp, 2032
+; RV64-NEXT: ret
+; RV64-NEXT: .LBB23_69: # %cond.store
+; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; RV64-NEXT: vmv.x.s a2, v8
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: srli a3, a2, 24
+; RV64-NEXT: sb a3, 3(a0)
+; RV64-NEXT: srli a3, a2, 16
+; RV64-NEXT: sb a3, 2(a0)
+; RV64-NEXT: srli a2, a2, 8
+; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: addi a0, a0, 4
+; RV64-NEXT: andi a2, a1, 2
+; RV64-NEXT: beqz a2, .LBB23_2
+; RV64-NEXT: .LBB23_70: # %cond.store1
+; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV64-NEXT: vslidedown.vi v24, v8, 1
+; RV64-NEXT: vmv.x.s a2, v24
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: srli a3, a2, 24
+; RV64-NEXT: sb a3, 3(a0)
+; RV64-NEXT: srli a3, a2, 16
+; RV64-NEXT: sb a3, 2(a0)
+; RV64-NEXT: srli a2, a2, 8
+; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: addi a0, a0, 4
+; RV64-NEXT: andi a2, a1, 4
+; RV64-NEXT: beqz a2, .LBB23_3
+; RV64-NEXT: .LBB23_71: # %cond.store4
+; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV64-NEXT: vslidedown.vi v24, v8, 2
+; RV64-NEXT: vmv.x.s a2, v24
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: srli a3, a2, 24
+; RV64-NEXT: sb a3, 3(a0)
+; RV64-NEXT: srli a3, a2, 16
+; RV64-NEXT: sb a3, 2(a0)
+; RV64-NEXT: srli a2, a2, 8
+; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: addi a0, a0, 4
+; RV64-NEXT: andi a2, a1, 8
+; RV64-NEXT: beqz a2, .LBB23_4
+; RV64-NEXT: .LBB23_72: # %cond.store7
+; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV64-NEXT: vslidedown.vi v24, v8, 3
+; RV64-NEXT: vmv.x.s a2, v24
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: srli a3, a2, 24
+; RV64-NEXT: sb a3, 3(a0)
+; RV64-NEXT: srli a3, a2, 16
+; RV64-NEXT: sb a3, 2(a0)
+; RV64-NEXT: srli a2, a2, 8
+; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: addi a0, a0, 4
+; RV64-NEXT: andi a2, a1, 16
+; RV64-NEXT: beqz a2, .LBB23_5
+; RV64-NEXT: .LBB23_73: # %cond.store10
+; RV64-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV64-NEXT: vslidedown.vi v24, v8, 4
+; RV64-NEXT: vmv.x.s a2, v24
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: srli a3, a2, 24
+; RV64-NEXT: sb a3, 3(a0)
+; RV64-NEXT: srli a3, a2, 16
+; RV64-NEXT: sb a3, 2(a0)
+; RV64-NEXT: srli a2, a2, 8
+; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: addi a0, a0, 4
+; RV64-NEXT: andi a2, a1, 32
+; RV64-NEXT: beqz a2, .LBB23_6
+; RV64-NEXT: .LBB23_74: # %cond.store13
+; RV64-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV64-NEXT: vslidedown.vi v24, v8, 5
+; RV64-NEXT: vmv.x.s a2, v24
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: srli a3, a2, 24
+; RV64-NEXT: sb a3, 3(a0)
+; RV64-NEXT: srli a3, a2, 16
+; RV64-NEXT: sb a3, 2(a0)
+; RV64-NEXT: srli a2, a2, 8
+; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: addi a0, a0, 4
+; RV64-NEXT: andi a2, a1, 64
+; RV64-NEXT: beqz a2, .LBB23_7
+; RV64-NEXT: .LBB23_75: # %cond.store16
+; RV64-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV64-NEXT: vslidedown.vi v24, v8, 6
+; RV64-NEXT: vmv.x.s a2, v24
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: srli a3, a2, 24
+; RV64-NEXT: sb a3, 3(a0)
+; RV64-NEXT: srli a3, a2, 16
+; RV64-NEXT: sb a3, 2(a0)
+; RV64-NEXT: srli a2, a2, 8
+; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: addi a0, a0, 4
+; RV64-NEXT: andi a2, a1, 128
+; RV64-NEXT: bnez a2, .LBB23_8
+; RV64-NEXT: j .LBB23_9
+; RV64-NEXT: .LBB23_76: # %cond.store25
+; RV64-NEXT: li a3, 32
+; RV64-NEXT: lui a4, 1
+; RV64-NEXT: addiw a4, a4, 1920
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; RV64-NEXT: vse32.v v8, (a4)
+; RV64-NEXT: lw a3, 1984(a2)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a4, a3, 24
+; RV64-NEXT: sb a4, 3(a0)
+; RV64-NEXT: srli a4, a3, 16
+; RV64-NEXT: sb a4, 2(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 4
+; RV64-NEXT: andi a3, a1, 1024
+; RV64-NEXT: beqz a3, .LBB23_13
+; RV64-NEXT: .LBB23_77: # %cond.store28
+; RV64-NEXT: li a3, 32
+; RV64-NEXT: li a4, 23
+; RV64-NEXT: slli a4, a4, 8
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; RV64-NEXT: vse32.v v8, (a4)
+; RV64-NEXT: lw a3, 1860(a2)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a4, a3, 24
+; RV64-NEXT: sb a4, 3(a0)
+; RV64-NEXT: srli a4, a3, 16
+; RV64-NEXT: sb a4, 2(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 4
+; RV64-NEXT: slli a3, a1, 52
+; RV64-NEXT: bgez a3, .LBB23_14
+; RV64-NEXT: .LBB23_78: # %cond.store31
+; RV64-NEXT: li a3, 32
+; RV64-NEXT: lui a4, 1
+; RV64-NEXT: addiw a4, a4, 1664
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; RV64-NEXT: vse32.v v8, (a4)
+; RV64-NEXT: lw a3, 1736(a2)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a4, a3, 24
+; RV64-NEXT: sb a4, 3(a0)
+; RV64-NEXT: srli a4, a3, 16
+; RV64-NEXT: sb a4, 2(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 4
+; RV64-NEXT: slli a3, a1, 51
+; RV64-NEXT: bgez a3, .LBB23_15
+; RV64-NEXT: .LBB23_79: # %cond.store34
+; RV64-NEXT: li a3, 32
+; RV64-NEXT: li a4, 11
+; RV64-NEXT: slli a4, a4, 9
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; RV64-NEXT: vse32.v v8, (a4)
+; RV64-NEXT: lw a3, 1612(a2)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a4, a3, 24
+; RV64-NEXT: sb a4, 3(a0)
+; RV64-NEXT: srli a4, a3, 16
+; RV64-NEXT: sb a4, 2(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 4
+; RV64-NEXT: slli a3, a1, 50
+; RV64-NEXT: bgez a3, .LBB23_16
+; RV64-NEXT: .LBB23_80: # %cond.store37
+; RV64-NEXT: li a3, 32
+; RV64-NEXT: lui a4, 1
+; RV64-NEXT: addiw a4, a4, 1408
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; RV64-NEXT: vse32.v v8, (a4)
+; RV64-NEXT: lw a3, 1488(a2)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a4, a3, 24
+; RV64-NEXT: sb a4, 3(a0)
+; RV64-NEXT: srli a4, a3, 16
+; RV64-NEXT: sb a4, 2(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 4
+; RV64-NEXT: slli a3, a1, 49
+; RV64-NEXT: bgez a3, .LBB23_17
+; RV64-NEXT: .LBB23_81: # %cond.store40
+; RV64-NEXT: li a3, 32
+; RV64-NEXT: li a4, 21
+; RV64-NEXT: slli a4, a4, 8
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; RV64-NEXT: vse32.v v8, (a4)
+; RV64-NEXT: lw a3, 1364(a2)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a4, a3, 24
+; RV64-NEXT: sb a4, 3(a0)
+; RV64-NEXT: srli a4, a3, 16
+; RV64-NEXT: sb a4, 2(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 4
+; RV64-NEXT: slli a3, a1, 48
+; RV64-NEXT: bgez a3, .LBB23_18
+; RV64-NEXT: .LBB23_82: # %cond.store43
+; RV64-NEXT: li a3, 32
+; RV64-NEXT: lui a4, 1
+; RV64-NEXT: addiw a4, a4, 1152
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; RV64-NEXT: vse32.v v8, (a4)
+; RV64-NEXT: lw a3, 1240(a2)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a4, a3, 24
+; RV64-NEXT: sb a4, 3(a0)
+; RV64-NEXT: srli a4, a3, 16
+; RV64-NEXT: sb a4, 2(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 4
+; RV64-NEXT: slli a3, a1, 47
+; RV64-NEXT: bgez a3, .LBB23_19
+; RV64-NEXT: .LBB23_83: # %cond.store46
+; RV64-NEXT: li a3, 32
+; RV64-NEXT: li a4, 5
+; RV64-NEXT: slli a4, a4, 10
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; RV64-NEXT: vse32.v v8, (a4)
+; RV64-NEXT: lw a3, 1116(a2)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a4, a3, 24
+; RV64-NEXT: sb a4, 3(a0)
+; RV64-NEXT: srli a4, a3, 16
+; RV64-NEXT: sb a4, 2(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 4
+; RV64-NEXT: slli a3, a1, 46
+; RV64-NEXT: bgez a3, .LBB23_20
+; RV64-NEXT: .LBB23_84: # %cond.store49
+; RV64-NEXT: li a3, 32
+; RV64-NEXT: lui a4, 1
+; RV64-NEXT: addiw a4, a4, 896
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; RV64-NEXT: vse32.v v8, (a4)
+; RV64-NEXT: lw a3, 992(a2)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a4, a3, 24
+; RV64-NEXT: sb a4, 3(a0)
+; RV64-NEXT: srli a4, a3, 16
+; RV64-NEXT: sb a4, 2(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 4
+; RV64-NEXT: slli a3, a1, 45
+; RV64-NEXT: bgez a3, .LBB23_21
+; RV64-NEXT: .LBB23_85: # %cond.store52
+; RV64-NEXT: li a3, 32
+; RV64-NEXT: li a4, 19
+; RV64-NEXT: slli a4, a4, 8
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; RV64-NEXT: vse32.v v8, (a4)
+; RV64-NEXT: lw a3, 868(a2)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a4, a3, 24
+; RV64-NEXT: sb a4, 3(a0)
+; RV64-NEXT: srli a4, a3, 16
+; RV64-NEXT: sb a4, 2(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 4
+; RV64-NEXT: slli a3, a1, 44
+; RV64-NEXT: bgez a3, .LBB23_22
+; RV64-NEXT: .LBB23_86: # %cond.store55
+; RV64-NEXT: li a3, 32
+; RV64-NEXT: lui a4, 1
+; RV64-NEXT: addiw a4, a4, 640
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; RV64-NEXT: vse32.v v8, (a4)
+; RV64-NEXT: lw a3, 744(a2)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a4, a3, 24
+; RV64-NEXT: sb a4, 3(a0)
+; RV64-NEXT: srli a4, a3, 16
+; RV64-NEXT: sb a4, 2(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 4
+; RV64-NEXT: slli a3, a1, 43
+; RV64-NEXT: bgez a3, .LBB23_23
+; RV64-NEXT: .LBB23_87: # %cond.store58
+; RV64-NEXT: li a3, 32
+; RV64-NEXT: li a4, 9
+; RV64-NEXT: slli a4, a4, 9
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; RV64-NEXT: vse32.v v8, (a4)
+; RV64-NEXT: lw a3, 620(a2)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a4, a3, 24
+; RV64-NEXT: sb a4, 3(a0)
+; RV64-NEXT: srli a4, a3, 16
+; RV64-NEXT: sb a4, 2(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 4
+; RV64-NEXT: slli a3, a1, 42
+; RV64-NEXT: bgez a3, .LBB23_24
+; RV64-NEXT: .LBB23_88: # %cond.store61
+; RV64-NEXT: li a3, 32
+; RV64-NEXT: lui a4, 1
+; RV64-NEXT: addiw a4, a4, 384
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; RV64-NEXT: vse32.v v8, (a4)
+; RV64-NEXT: lw a3, 496(a2)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a4, a3, 24
+; RV64-NEXT: sb a4, 3(a0)
+; RV64-NEXT: srli a4, a3, 16
+; RV64-NEXT: sb a4, 2(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 4
+; RV64-NEXT: slli a3, a1, 41
+; RV64-NEXT: bgez a3, .LBB23_25
+; RV64-NEXT: .LBB23_89: # %cond.store64
+; RV64-NEXT: li a3, 32
+; RV64-NEXT: li a4, 17
+; RV64-NEXT: slli a4, a4, 8
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; RV64-NEXT: vse32.v v8, (a4)
+; RV64-NEXT: lw a3, 372(a2)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a4, a3, 24
+; RV64-NEXT: sb a4, 3(a0)
+; RV64-NEXT: srli a4, a3, 16
+; RV64-NEXT: sb a4, 2(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 4
+; RV64-NEXT: slli a3, a1, 40
+; RV64-NEXT: bgez a3, .LBB23_26
+; RV64-NEXT: .LBB23_90: # %cond.store67
+; RV64-NEXT: li a3, 32
+; RV64-NEXT: lui a4, 1
+; RV64-NEXT: addiw a4, a4, 128
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; RV64-NEXT: vse32.v v8, (a4)
+; RV64-NEXT: lw a3, 248(a2)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a4, a3, 24
+; RV64-NEXT: sb a4, 3(a0)
+; RV64-NEXT: srli a4, a3, 16
+; RV64-NEXT: sb a4, 2(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 4
+; RV64-NEXT: slli a3, a1, 39
+; RV64-NEXT: bgez a3, .LBB23_27
+; RV64-NEXT: .LBB23_91: # %cond.store70
+; RV64-NEXT: li a3, 32
+; RV64-NEXT: lui a4, 1
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; RV64-NEXT: vse32.v v8, (a4)
+; RV64-NEXT: lw a3, 124(a2)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a4, a3, 24
+; RV64-NEXT: sb a4, 3(a0)
+; RV64-NEXT: srli a4, a3, 16
+; RV64-NEXT: sb a4, 2(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 4
+; RV64-NEXT: slli a3, a1, 38
+; RV64-NEXT: bltz a3, .LBB23_28
+; RV64-NEXT: j .LBB23_29
+; RV64-NEXT: .LBB23_92: # %cond.store76
+; RV64-NEXT: li a3, 32
+; RV64-NEXT: addi a4, sp, 2047
+; RV64-NEXT: addi a4, a4, 1793
+; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; RV64-NEXT: vse32.v v8, (a4)
+; RV64-NEXT: lw a3, 1956(a2)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a4, a3, 24
+; RV64-NEXT: sb a4, 3(a0)
+; RV64-NEXT: srli a4, a3, 16
+; RV64-NEXT: sb a4, 2(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 4
+; RV64-NEXT: slli a3, a1, 36
+; RV64-NEXT: bgez a3, .LBB23_31
+; RV64-NEXT: .LBB23_93: # %cond.store79
+; RV64-NEXT: li a3, 32
+; RV64-NEXT: addi a4, sp, 2047
+; RV64-NEXT: addi a4, a4, 1665
+; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; RV64-NEXT: vse32.v v8, (a4)
+; RV64-NEXT: lw a3, 1832(a2)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a4, a3, 24
+; RV64-NEXT: sb a4, 3(a0)
+; RV64-NEXT: srli a4, a3, 16
+; RV64-NEXT: sb a4, 2(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 4
+; RV64-NEXT: slli a3, a1, 35
+; RV64-NEXT: bgez a3, .LBB23_32
+; RV64-NEXT: .LBB23_94: # %cond.store82
+; RV64-NEXT: li a3, 32
+; RV64-NEXT: addi a4, sp, 2047
+; RV64-NEXT: addi a4, a4, 1537
+; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; RV64-NEXT: vse32.v v8, (a4)
+; RV64-NEXT: lw a3, 1708(a2)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a4, a3, 24
+; RV64-NEXT: sb a4, 3(a0)
+; RV64-NEXT: srli a4, a3, 16
+; RV64-NEXT: sb a4, 2(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 4
+; RV64-NEXT: slli a3, a1, 34
+; RV64-NEXT: bgez a3, .LBB23_33
+; RV64-NEXT: .LBB23_95: # %cond.store85
+; RV64-NEXT: li a3, 32
+; RV64-NEXT: addi a4, sp, 2047
+; RV64-NEXT: addi a4, a4, 1409
+; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; RV64-NEXT: vse32.v v8, (a4)
+; RV64-NEXT: lw a3, 1584(a2)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a4, a3, 24
+; RV64-NEXT: sb a4, 3(a0)
+; RV64-NEXT: srli a4, a3, 16
+; RV64-NEXT: sb a4, 2(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 4
+; RV64-NEXT: slli a3, a1, 33
+; RV64-NEXT: bgez a3, .LBB23_34
+; RV64-NEXT: .LBB23_96: # %cond.store88
+; RV64-NEXT: li a3, 32
+; RV64-NEXT: addi a4, sp, 2047
+; RV64-NEXT: addi a4, a4, 1281
+; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; RV64-NEXT: vse32.v v8, (a4)
+; RV64-NEXT: lw a3, 1460(a2)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a4, a3, 24
+; RV64-NEXT: sb a4, 3(a0)
+; RV64-NEXT: srli a4, a3, 16
+; RV64-NEXT: sb a4, 2(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 4
+; RV64-NEXT: slli a3, a1, 32
+; RV64-NEXT: bgez a3, .LBB23_35
+; RV64-NEXT: .LBB23_97: # %cond.store91
+; RV64-NEXT: li a3, 32
+; RV64-NEXT: addi a4, sp, 2047
+; RV64-NEXT: addi a4, a4, 1153
+; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; RV64-NEXT: vse32.v v8, (a4)
+; RV64-NEXT: lw a3, 1336(a2)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a4, a3, 24
+; RV64-NEXT: sb a4, 3(a0)
+; RV64-NEXT: srli a4, a3, 16
+; RV64-NEXT: sb a4, 2(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 4
+; RV64-NEXT: slli a3, a1, 31
+; RV64-NEXT: bgez a3, .LBB23_36
+; RV64-NEXT: .LBB23_98: # %cond.store94
+; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV64-NEXT: vmv.x.s a3, v16
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a4, a3, 24
+; RV64-NEXT: sb a4, 3(a0)
+; RV64-NEXT: srli a4, a3, 16
+; RV64-NEXT: sb a4, 2(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 4
+; RV64-NEXT: slli a3, a1, 30
+; RV64-NEXT: bgez a3, .LBB23_37
+; RV64-NEXT: .LBB23_99: # %cond.store97
+; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV64-NEXT: vslidedown.vi v8, v16, 1
+; RV64-NEXT: vmv.x.s a3, v8
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a4, a3, 24
+; RV64-NEXT: sb a4, 3(a0)
+; RV64-NEXT: srli a4, a3, 16
+; RV64-NEXT: sb a4, 2(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 4
+; RV64-NEXT: slli a3, a1, 29
+; RV64-NEXT: bgez a3, .LBB23_38
+; RV64-NEXT: .LBB23_100: # %cond.store100
+; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV64-NEXT: vslidedown.vi v8, v16, 2
+; RV64-NEXT: vmv.x.s a3, v8
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a4, a3, 24
+; RV64-NEXT: sb a4, 3(a0)
+; RV64-NEXT: srli a4, a3, 16
+; RV64-NEXT: sb a4, 2(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 4
+; RV64-NEXT: slli a3, a1, 28
+; RV64-NEXT: bgez a3, .LBB23_39
+; RV64-NEXT: .LBB23_101: # %cond.store103
+; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV64-NEXT: vslidedown.vi v8, v16, 3
+; RV64-NEXT: vmv.x.s a3, v8
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a4, a3, 24
+; RV64-NEXT: sb a4, 3(a0)
+; RV64-NEXT: srli a4, a3, 16
+; RV64-NEXT: sb a4, 2(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 4
+; RV64-NEXT: slli a3, a1, 27
+; RV64-NEXT: bgez a3, .LBB23_40
+; RV64-NEXT: .LBB23_102: # %cond.store106
+; RV64-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV64-NEXT: vslidedown.vi v8, v16, 4
+; RV64-NEXT: vmv.x.s a3, v8
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a4, a3, 24
+; RV64-NEXT: sb a4, 3(a0)
+; RV64-NEXT: srli a4, a3, 16
+; RV64-NEXT: sb a4, 2(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 4
+; RV64-NEXT: slli a3, a1, 26
+; RV64-NEXT: bgez a3, .LBB23_41
+; RV64-NEXT: .LBB23_103: # %cond.store109
+; RV64-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV64-NEXT: vslidedown.vi v8, v16, 5
+; RV64-NEXT: vmv.x.s a3, v8
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a4, a3, 24
+; RV64-NEXT: sb a4, 3(a0)
+; RV64-NEXT: srli a4, a3, 16
+; RV64-NEXT: sb a4, 2(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 4
+; RV64-NEXT: slli a3, a1, 25
+; RV64-NEXT: bgez a3, .LBB23_42
+; RV64-NEXT: .LBB23_104: # %cond.store112
+; RV64-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV64-NEXT: vslidedown.vi v8, v16, 6
+; RV64-NEXT: vmv.x.s a3, v8
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a4, a3, 24
+; RV64-NEXT: sb a4, 3(a0)
+; RV64-NEXT: srli a4, a3, 16
+; RV64-NEXT: sb a4, 2(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 4
+; RV64-NEXT: slli a3, a1, 24
+; RV64-NEXT: bgez a3, .LBB23_43
+; RV64-NEXT: .LBB23_105: # %cond.store115
+; RV64-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV64-NEXT: vslidedown.vi v8, v16, 7
+; RV64-NEXT: vmv.x.s a3, v8
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a4, a3, 24
+; RV64-NEXT: sb a4, 3(a0)
+; RV64-NEXT: srli a4, a3, 16
+; RV64-NEXT: sb a4, 2(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 4
+; RV64-NEXT: slli a3, a1, 23
+; RV64-NEXT: bgez a3, .LBB23_44
+; RV64-NEXT: .LBB23_106: # %cond.store118
+; RV64-NEXT: li a3, 32
+; RV64-NEXT: addi a4, sp, 2047
+; RV64-NEXT: addi a4, a4, 1025
+; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; RV64-NEXT: vse32.v v16, (a4)
+; RV64-NEXT: lw a3, 1116(a2)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a4, a3, 24
+; RV64-NEXT: sb a4, 3(a0)
+; RV64-NEXT: srli a4, a3, 16
+; RV64-NEXT: sb a4, 2(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 4
+; RV64-NEXT: slli a3, a1, 22
+; RV64-NEXT: bgez a3, .LBB23_45
+; RV64-NEXT: .LBB23_107: # %cond.store121
+; RV64-NEXT: li a3, 32
+; RV64-NEXT: addi a4, sp, 2047
+; RV64-NEXT: addi a4, a4, 897
+; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; RV64-NEXT: vse32.v v16, (a4)
+; RV64-NEXT: lw a3, 992(a2)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a4, a3, 24
+; RV64-NEXT: sb a4, 3(a0)
+; RV64-NEXT: srli a4, a3, 16
+; RV64-NEXT: sb a4, 2(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 4
+; RV64-NEXT: slli a3, a1, 21
+; RV64-NEXT: bgez a3, .LBB23_46
+; RV64-NEXT: .LBB23_108: # %cond.store124
+; RV64-NEXT: li a3, 32
+; RV64-NEXT: addi a4, sp, 2047
+; RV64-NEXT: addi a4, a4, 769
+; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; RV64-NEXT: vse32.v v16, (a4)
+; RV64-NEXT: lw a3, 868(a2)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a4, a3, 24
+; RV64-NEXT: sb a4, 3(a0)
+; RV64-NEXT: srli a4, a3, 16
+; RV64-NEXT: sb a4, 2(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 4
+; RV64-NEXT: slli a3, a1, 20
+; RV64-NEXT: bgez a3, .LBB23_47
+; RV64-NEXT: .LBB23_109: # %cond.store127
+; RV64-NEXT: li a3, 32
+; RV64-NEXT: addi a4, sp, 2047
+; RV64-NEXT: addi a4, a4, 641
+; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; RV64-NEXT: vse32.v v16, (a4)
+; RV64-NEXT: lw a3, 744(a2)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a4, a3, 24
+; RV64-NEXT: sb a4, 3(a0)
+; RV64-NEXT: srli a4, a3, 16
+; RV64-NEXT: sb a4, 2(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 4
+; RV64-NEXT: slli a3, a1, 19
+; RV64-NEXT: bgez a3, .LBB23_48
+; RV64-NEXT: .LBB23_110: # %cond.store130
+; RV64-NEXT: li a3, 32
+; RV64-NEXT: addi a4, sp, 2047
+; RV64-NEXT: addi a4, a4, 513
+; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; RV64-NEXT: vse32.v v16, (a4)
+; RV64-NEXT: lw a3, 620(a2)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a4, a3, 24
+; RV64-NEXT: sb a4, 3(a0)
+; RV64-NEXT: srli a4, a3, 16
+; RV64-NEXT: sb a4, 2(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 4
+; RV64-NEXT: slli a3, a1, 18
+; RV64-NEXT: bgez a3, .LBB23_49
+; RV64-NEXT: .LBB23_111: # %cond.store133
+; RV64-NEXT: li a3, 32
+; RV64-NEXT: addi a4, sp, 2047
+; RV64-NEXT: addi a4, a4, 385
+; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; RV64-NEXT: vse32.v v16, (a4)
+; RV64-NEXT: lw a3, 496(a2)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a4, a3, 24
+; RV64-NEXT: sb a4, 3(a0)
+; RV64-NEXT: srli a4, a3, 16
+; RV64-NEXT: sb a4, 2(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 4
+; RV64-NEXT: slli a3, a1, 17
+; RV64-NEXT: bgez a3, .LBB23_50
+; RV64-NEXT: .LBB23_112: # %cond.store136
+; RV64-NEXT: li a3, 32
+; RV64-NEXT: addi a4, sp, 2047
+; RV64-NEXT: addi a4, a4, 257
+; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; RV64-NEXT: vse32.v v16, (a4)
+; RV64-NEXT: lw a3, 372(a2)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a4, a3, 24
+; RV64-NEXT: sb a4, 3(a0)
+; RV64-NEXT: srli a4, a3, 16
+; RV64-NEXT: sb a4, 2(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 4
+; RV64-NEXT: slli a3, a1, 16
+; RV64-NEXT: bgez a3, .LBB23_51
+; RV64-NEXT: .LBB23_113: # %cond.store139
+; RV64-NEXT: li a3, 32
+; RV64-NEXT: addi a4, sp, 2047
+; RV64-NEXT: addi a4, a4, 129
+; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; RV64-NEXT: vse32.v v16, (a4)
+; RV64-NEXT: lw a3, 248(a2)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a4, a3, 24
+; RV64-NEXT: sb a4, 3(a0)
+; RV64-NEXT: srli a4, a3, 16
+; RV64-NEXT: sb a4, 2(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 4
+; RV64-NEXT: slli a3, a1, 15
+; RV64-NEXT: bgez a3, .LBB23_52
+; RV64-NEXT: .LBB23_114: # %cond.store142
+; RV64-NEXT: li a3, 32
+; RV64-NEXT: addi a4, sp, 2047
+; RV64-NEXT: addi a4, a4, 1
+; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; RV64-NEXT: vse32.v v16, (a4)
+; RV64-NEXT: lw a3, 124(a2)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a4, a3, 24
+; RV64-NEXT: sb a4, 3(a0)
+; RV64-NEXT: srli a4, a3, 16
+; RV64-NEXT: sb a4, 2(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 4
+; RV64-NEXT: slli a3, a1, 14
+; RV64-NEXT: bgez a3, .LBB23_53
+; RV64-NEXT: .LBB23_115: # %cond.store145
+; RV64-NEXT: li a3, 32
+; RV64-NEXT: addi a4, sp, 1920
+; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; RV64-NEXT: vse32.v v16, (a4)
+; RV64-NEXT: lw a2, 0(a2)
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: srli a3, a2, 24
+; RV64-NEXT: sb a3, 3(a0)
+; RV64-NEXT: srli a3, a2, 16
+; RV64-NEXT: sb a3, 2(a0)
+; RV64-NEXT: srli a2, a2, 8
+; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: addi a0, a0, 4
+; RV64-NEXT: slli a2, a1, 13
+; RV64-NEXT: bgez a2, .LBB23_54
+; RV64-NEXT: .LBB23_116: # %cond.store148
+; RV64-NEXT: li a2, 32
+; RV64-NEXT: addi a3, sp, 1792
+; RV64-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; RV64-NEXT: vse32.v v16, (a3)
+; RV64-NEXT: lw a2, 1864(sp)
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: srli a3, a2, 24
+; RV64-NEXT: sb a3, 3(a0)
+; RV64-NEXT: srli a3, a2, 16
+; RV64-NEXT: sb a3, 2(a0)
+; RV64-NEXT: srli a2, a2, 8
+; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: addi a0, a0, 4
+; RV64-NEXT: slli a2, a1, 12
+; RV64-NEXT: bgez a2, .LBB23_55
+; RV64-NEXT: .LBB23_117: # %cond.store151
+; RV64-NEXT: li a2, 32
+; RV64-NEXT: addi a3, sp, 1664
+; RV64-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; RV64-NEXT: vse32.v v16, (a3)
+; RV64-NEXT: lw a2, 1740(sp)
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: srli a3, a2, 24
+; RV64-NEXT: sb a3, 3(a0)
+; RV64-NEXT: srli a3, a2, 16
+; RV64-NEXT: sb a3, 2(a0)
+; RV64-NEXT: srli a2, a2, 8
+; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: addi a0, a0, 4
+; RV64-NEXT: slli a2, a1, 11
+; RV64-NEXT: bgez a2, .LBB23_56
+; RV64-NEXT: .LBB23_118: # %cond.store154
+; RV64-NEXT: li a2, 32
+; RV64-NEXT: addi a3, sp, 1536
+; RV64-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; RV64-NEXT: vse32.v v16, (a3)
+; RV64-NEXT: lw a2, 1616(sp)
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: srli a3, a2, 24
+; RV64-NEXT: sb a3, 3(a0)
+; RV64-NEXT: srli a3, a2, 16
+; RV64-NEXT: sb a3, 2(a0)
+; RV64-NEXT: srli a2, a2, 8
+; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: addi a0, a0, 4
+; RV64-NEXT: slli a2, a1, 10
+; RV64-NEXT: bgez a2, .LBB23_57
+; RV64-NEXT: .LBB23_119: # %cond.store157
+; RV64-NEXT: li a2, 32
+; RV64-NEXT: addi a3, sp, 1408
+; RV64-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; RV64-NEXT: vse32.v v16, (a3)
+; RV64-NEXT: lw a2, 1492(sp)
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: srli a3, a2, 24
+; RV64-NEXT: sb a3, 3(a0)
+; RV64-NEXT: srli a3, a2, 16
+; RV64-NEXT: sb a3, 2(a0)
+; RV64-NEXT: srli a2, a2, 8
+; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: addi a0, a0, 4
+; RV64-NEXT: slli a2, a1, 9
+; RV64-NEXT: bgez a2, .LBB23_58
+; RV64-NEXT: .LBB23_120: # %cond.store160
+; RV64-NEXT: li a2, 32
+; RV64-NEXT: addi a3, sp, 1280
+; RV64-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; RV64-NEXT: vse32.v v16, (a3)
+; RV64-NEXT: lw a2, 1368(sp)
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: srli a3, a2, 24
+; RV64-NEXT: sb a3, 3(a0)
+; RV64-NEXT: srli a3, a2, 16
+; RV64-NEXT: sb a3, 2(a0)
+; RV64-NEXT: srli a2, a2, 8
+; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: addi a0, a0, 4
+; RV64-NEXT: slli a2, a1, 8
+; RV64-NEXT: bgez a2, .LBB23_59
+; RV64-NEXT: .LBB23_121: # %cond.store163
+; RV64-NEXT: li a2, 32
+; RV64-NEXT: addi a3, sp, 1152
+; RV64-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; RV64-NEXT: vse32.v v16, (a3)
+; RV64-NEXT: lw a2, 1244(sp)
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: srli a3, a2, 24
+; RV64-NEXT: sb a3, 3(a0)
+; RV64-NEXT: srli a3, a2, 16
+; RV64-NEXT: sb a3, 2(a0)
+; RV64-NEXT: srli a2, a2, 8
+; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: addi a0, a0, 4
+; RV64-NEXT: slli a2, a1, 7
+; RV64-NEXT: bgez a2, .LBB23_60
+; RV64-NEXT: .LBB23_122: # %cond.store166
+; RV64-NEXT: li a2, 32
+; RV64-NEXT: addi a3, sp, 1024
+; RV64-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; RV64-NEXT: vse32.v v16, (a3)
+; RV64-NEXT: lw a2, 1120(sp)
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: srli a3, a2, 24
+; RV64-NEXT: sb a3, 3(a0)
+; RV64-NEXT: srli a3, a2, 16
+; RV64-NEXT: sb a3, 2(a0)
+; RV64-NEXT: srli a2, a2, 8
+; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: addi a0, a0, 4
+; RV64-NEXT: slli a2, a1, 6
+; RV64-NEXT: bgez a2, .LBB23_61
+; RV64-NEXT: .LBB23_123: # %cond.store169
+; RV64-NEXT: li a2, 32
+; RV64-NEXT: addi a3, sp, 896
+; RV64-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; RV64-NEXT: vse32.v v16, (a3)
+; RV64-NEXT: lw a2, 996(sp)
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: srli a3, a2, 24
+; RV64-NEXT: sb a3, 3(a0)
+; RV64-NEXT: srli a3, a2, 16
+; RV64-NEXT: sb a3, 2(a0)
+; RV64-NEXT: srli a2, a2, 8
+; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: addi a0, a0, 4
+; RV64-NEXT: slli a2, a1, 5
+; RV64-NEXT: bgez a2, .LBB23_62
+; RV64-NEXT: .LBB23_124: # %cond.store172
+; RV64-NEXT: li a2, 32
+; RV64-NEXT: addi a3, sp, 768
+; RV64-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; RV64-NEXT: vse32.v v16, (a3)
+; RV64-NEXT: lw a2, 872(sp)
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: srli a3, a2, 24
+; RV64-NEXT: sb a3, 3(a0)
+; RV64-NEXT: srli a3, a2, 16
+; RV64-NEXT: sb a3, 2(a0)
+; RV64-NEXT: srli a2, a2, 8
+; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: addi a0, a0, 4
+; RV64-NEXT: slli a2, a1, 4
+; RV64-NEXT: bgez a2, .LBB23_63
+; RV64-NEXT: .LBB23_125: # %cond.store175
+; RV64-NEXT: li a2, 32
+; RV64-NEXT: addi a3, sp, 640
+; RV64-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; RV64-NEXT: vse32.v v16, (a3)
+; RV64-NEXT: lw a2, 748(sp)
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: srli a3, a2, 24
+; RV64-NEXT: sb a3, 3(a0)
+; RV64-NEXT: srli a3, a2, 16
+; RV64-NEXT: sb a3, 2(a0)
+; RV64-NEXT: srli a2, a2, 8
+; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: addi a0, a0, 4
+; RV64-NEXT: slli a2, a1, 3
+; RV64-NEXT: bgez a2, .LBB23_64
+; RV64-NEXT: .LBB23_126: # %cond.store178
+; RV64-NEXT: li a2, 32
+; RV64-NEXT: addi a3, sp, 512
+; RV64-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; RV64-NEXT: vse32.v v16, (a3)
+; RV64-NEXT: lw a2, 624(sp)
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: srli a3, a2, 24
+; RV64-NEXT: sb a3, 3(a0)
+; RV64-NEXT: srli a3, a2, 16
+; RV64-NEXT: sb a3, 2(a0)
+; RV64-NEXT: srli a2, a2, 8
+; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: addi a0, a0, 4
+; RV64-NEXT: slli a2, a1, 2
+; RV64-NEXT: bgez a2, .LBB23_65
+; RV64-NEXT: .LBB23_127: # %cond.store181
+; RV64-NEXT: li a2, 32
+; RV64-NEXT: addi a3, sp, 384
+; RV64-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; RV64-NEXT: vse32.v v16, (a3)
+; RV64-NEXT: lw a2, 500(sp)
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: srli a3, a2, 24
+; RV64-NEXT: sb a3, 3(a0)
+; RV64-NEXT: srli a3, a2, 16
+; RV64-NEXT: sb a3, 2(a0)
+; RV64-NEXT: srli a2, a2, 8
+; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: addi a0, a0, 4
+; RV64-NEXT: slli a2, a1, 1
+; RV64-NEXT: bgez a2, .LBB23_66
+; RV64-NEXT: .LBB23_128: # %cond.store184
+; RV64-NEXT: li a2, 32
+; RV64-NEXT: addi a3, sp, 256
+; RV64-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; RV64-NEXT: vse32.v v16, (a3)
+; RV64-NEXT: lw a2, 376(sp)
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: srli a3, a2, 24
+; RV64-NEXT: sb a3, 3(a0)
+; RV64-NEXT: srli a3, a2, 16
+; RV64-NEXT: sb a3, 2(a0)
+; RV64-NEXT: srli a2, a2, 8
+; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: addi a0, a0, 4
+; RV64-NEXT: bltz a1, .LBB23_67
+; RV64-NEXT: j .LBB23_68
+;
+; RV32-LABEL: test_compresstore_i32_v64:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV32-NEXT: vmv.x.s a2, v0
+; RV32-NEXT: andi a1, a2, 1
+; RV32-NEXT: bnez a1, .LBB23_72
+; RV32-NEXT: # %bb.1: # %else
+; RV32-NEXT: andi a1, a2, 2
+; RV32-NEXT: bnez a1, .LBB23_73
+; RV32-NEXT: .LBB23_2: # %else2
+; RV32-NEXT: andi a1, a2, 4
+; RV32-NEXT: bnez a1, .LBB23_74
+; RV32-NEXT: .LBB23_3: # %else5
+; RV32-NEXT: andi a1, a2, 8
+; RV32-NEXT: bnez a1, .LBB23_75
+; RV32-NEXT: .LBB23_4: # %else8
+; RV32-NEXT: andi a1, a2, 16
+; RV32-NEXT: bnez a1, .LBB23_76
+; RV32-NEXT: .LBB23_5: # %else11
+; RV32-NEXT: andi a1, a2, 32
+; RV32-NEXT: bnez a1, .LBB23_77
+; RV32-NEXT: .LBB23_6: # %else14
+; RV32-NEXT: andi a1, a2, 64
+; RV32-NEXT: bnez a1, .LBB23_78
+; RV32-NEXT: .LBB23_7: # %else17
+; RV32-NEXT: andi a1, a2, 128
+; RV32-NEXT: beqz a1, .LBB23_9
+; RV32-NEXT: .LBB23_8: # %cond.store19
+; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v24, v8, 7
+; RV32-NEXT: vmv.x.s a1, v24
+; RV32-NEXT: sb a1, 0(a0)
+; RV32-NEXT: srli a3, a1, 24
+; RV32-NEXT: sb a3, 3(a0)
+; RV32-NEXT: srli a3, a1, 16
+; RV32-NEXT: sb a3, 2(a0)
+; RV32-NEXT: srli a1, a1, 8
+; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: addi a0, a0, 4
+; RV32-NEXT: .LBB23_9: # %else20
+; RV32-NEXT: addi sp, sp, -2032
+; RV32-NEXT: .cfi_def_cfa_offset 2032
+; RV32-NEXT: sw ra, 2028(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s0, 2024(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: .cfi_offset s0, -8
+; RV32-NEXT: addi s0, sp, 2032
+; RV32-NEXT: .cfi_def_cfa s0, 0
+; RV32-NEXT: lui a1, 1
+; RV32-NEXT: addi a1, a1, 272
+; RV32-NEXT: sub sp, sp, a1
+; RV32-NEXT: andi sp, sp, -128
+; RV32-NEXT: andi a1, a2, 256
+; RV32-NEXT: beqz a1, .LBB23_11
+; RV32-NEXT: # %bb.10: # %cond.store22
+; RV32-NEXT: li a1, 32
+; RV32-NEXT: li a3, 3
+; RV32-NEXT: slli a3, a3, 11
+; RV32-NEXT: add a3, sp, a3
+; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; RV32-NEXT: vse32.v v8, (a3)
+; RV32-NEXT: lui a1, 2
+; RV32-NEXT: add a1, sp, a1
+; RV32-NEXT: lw a1, -2016(a1)
+; RV32-NEXT: sb a1, 0(a0)
+; RV32-NEXT: srli a3, a1, 24
+; RV32-NEXT: sb a3, 3(a0)
+; RV32-NEXT: srli a3, a1, 16
+; RV32-NEXT: sb a3, 2(a0)
+; RV32-NEXT: srli a1, a1, 8
+; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: addi a0, a0, 4
+; RV32-NEXT: .LBB23_11: # %else23
+; RV32-NEXT: andi a3, a2, 512
+; RV32-NEXT: addi a1, sp, 2047
+; RV32-NEXT: addi a1, a1, 2021
+; RV32-NEXT: bnez a3, .LBB23_79
+; RV32-NEXT: # %bb.12: # %else26
+; RV32-NEXT: andi a3, a2, 1024
+; RV32-NEXT: bnez a3, .LBB23_80
+; RV32-NEXT: .LBB23_13: # %else29
+; RV32-NEXT: slli a3, a2, 20
+; RV32-NEXT: bltz a3, .LBB23_81
+; RV32-NEXT: .LBB23_14: # %else32
+; RV32-NEXT: slli a3, a2, 19
+; RV32-NEXT: bltz a3, .LBB23_82
+; RV32-NEXT: .LBB23_15: # %else35
+; RV32-NEXT: slli a3, a2, 18
+; RV32-NEXT: bltz a3, .LBB23_83
+; RV32-NEXT: .LBB23_16: # %else38
+; RV32-NEXT: slli a3, a2, 17
+; RV32-NEXT: bltz a3, .LBB23_84
+; RV32-NEXT: .LBB23_17: # %else41
+; RV32-NEXT: slli a3, a2, 16
+; RV32-NEXT: bltz a3, .LBB23_85
+; RV32-NEXT: .LBB23_18: # %else44
+; RV32-NEXT: slli a3, a2, 15
+; RV32-NEXT: bltz a3, .LBB23_86
+; RV32-NEXT: .LBB23_19: # %else47
+; RV32-NEXT: slli a3, a2, 14
+; RV32-NEXT: bltz a3, .LBB23_87
+; RV32-NEXT: .LBB23_20: # %else50
+; RV32-NEXT: slli a3, a2, 13
+; RV32-NEXT: bltz a3, .LBB23_88
+; RV32-NEXT: .LBB23_21: # %else53
+; RV32-NEXT: slli a3, a2, 12
+; RV32-NEXT: bltz a3, .LBB23_89
+; RV32-NEXT: .LBB23_22: # %else56
+; RV32-NEXT: slli a3, a2, 11
+; RV32-NEXT: bltz a3, .LBB23_90
+; RV32-NEXT: .LBB23_23: # %else59
+; RV32-NEXT: slli a3, a2, 10
+; RV32-NEXT: bltz a3, .LBB23_91
+; RV32-NEXT: .LBB23_24: # %else62
+; RV32-NEXT: slli a3, a2, 9
+; RV32-NEXT: bltz a3, .LBB23_92
+; RV32-NEXT: .LBB23_25: # %else65
+; RV32-NEXT: slli a3, a2, 8
+; RV32-NEXT: bltz a3, .LBB23_93
+; RV32-NEXT: .LBB23_26: # %else68
+; RV32-NEXT: slli a3, a2, 7
+; RV32-NEXT: bltz a3, .LBB23_94
+; RV32-NEXT: .LBB23_27: # %else71
+; RV32-NEXT: slli a3, a2, 6
+; RV32-NEXT: bgez a3, .LBB23_29
+; RV32-NEXT: .LBB23_28: # %cond.store73
+; RV32-NEXT: li a3, 32
+; RV32-NEXT: addi a4, sp, 2047
+; RV32-NEXT: addi a4, a4, 1921
+; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; RV32-NEXT: vse32.v v8, (a4)
+; RV32-NEXT: lw a1, 0(a1)
+; RV32-NEXT: sb a1, 0(a0)
+; RV32-NEXT: srli a3, a1, 24
+; RV32-NEXT: sb a3, 3(a0)
+; RV32-NEXT: srli a3, a1, 16
+; RV32-NEXT: sb a3, 2(a0)
+; RV32-NEXT: srli a1, a1, 8
+; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: addi a0, a0, 4
+; RV32-NEXT: .LBB23_29: # %else74
+; RV32-NEXT: slli a1, a2, 5
+; RV32-NEXT: addi a3, sp, 1988
+; RV32-NEXT: bltz a1, .LBB23_95
+; RV32-NEXT: # %bb.30: # %else77
+; RV32-NEXT: slli a1, a2, 4
+; RV32-NEXT: bltz a1, .LBB23_96
+; RV32-NEXT: .LBB23_31: # %else80
+; RV32-NEXT: slli a1, a2, 3
+; RV32-NEXT: bgez a1, .LBB23_33
+; RV32-NEXT: .LBB23_32: # %cond.store82
+; RV32-NEXT: li a1, 32
+; RV32-NEXT: addi a4, sp, 2047
+; RV32-NEXT: addi a4, a4, 1537
+; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; RV32-NEXT: vse32.v v8, (a4)
+; RV32-NEXT: lw a1, 1708(a3)
+; RV32-NEXT: sb a1, 0(a0)
+; RV32-NEXT: srli a4, a1, 24
+; RV32-NEXT: sb a4, 3(a0)
+; RV32-NEXT: srli a4, a1, 16
+; RV32-NEXT: sb a4, 2(a0)
+; RV32-NEXT: srli a1, a1, 8
+; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: addi a0, a0, 4
+; RV32-NEXT: .LBB23_33: # %else83
+; RV32-NEXT: slli a4, a2, 2
+; RV32-NEXT: li a1, 32
+; RV32-NEXT: bgez a4, .LBB23_35
+; RV32-NEXT: # %bb.34: # %cond.store85
+; RV32-NEXT: li a4, 32
+; RV32-NEXT: addi a5, sp, 2047
+; RV32-NEXT: addi a5, a5, 1409
+; RV32-NEXT: vsetvli zero, a4, e32, m8, ta, ma
+; RV32-NEXT: vse32.v v8, (a5)
+; RV32-NEXT: lw a4, 1584(a3)
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: srli a5, a4, 24
+; RV32-NEXT: sb a5, 3(a0)
+; RV32-NEXT: srli a5, a4, 16
+; RV32-NEXT: sb a5, 2(a0)
+; RV32-NEXT: srli a4, a4, 8
+; RV32-NEXT: sb a4, 1(a0)
+; RV32-NEXT: addi a0, a0, 4
+; RV32-NEXT: .LBB23_35: # %else86
+; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV32-NEXT: slli a4, a2, 1
+; RV32-NEXT: vsrl.vx v24, v0, a1
+; RV32-NEXT: bgez a4, .LBB23_37
+; RV32-NEXT: # %bb.36: # %cond.store88
+; RV32-NEXT: li a1, 32
+; RV32-NEXT: addi a4, sp, 2047
+; RV32-NEXT: addi a4, a4, 1281
+; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; RV32-NEXT: vse32.v v8, (a4)
+; RV32-NEXT: lw a1, 1460(a3)
+; RV32-NEXT: sb a1, 0(a0)
+; RV32-NEXT: srli a4, a1, 24
+; RV32-NEXT: sb a4, 3(a0)
+; RV32-NEXT: srli a4, a1, 16
+; RV32-NEXT: sb a4, 2(a0)
+; RV32-NEXT: srli a1, a1, 8
+; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: addi a0, a0, 4
+; RV32-NEXT: .LBB23_37: # %else89
+; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV32-NEXT: vmv.x.s a1, v24
+; RV32-NEXT: bltz a2, .LBB23_97
+; RV32-NEXT: # %bb.38: # %else92
+; RV32-NEXT: andi a2, a1, 1
+; RV32-NEXT: bnez a2, .LBB23_98
+; RV32-NEXT: .LBB23_39: # %else95
+; RV32-NEXT: andi a2, a1, 2
+; RV32-NEXT: bnez a2, .LBB23_99
+; RV32-NEXT: .LBB23_40: # %else98
+; RV32-NEXT: andi a2, a1, 4
+; RV32-NEXT: bnez a2, .LBB23_100
+; RV32-NEXT: .LBB23_41: # %else101
+; RV32-NEXT: andi a2, a1, 8
+; RV32-NEXT: bnez a2, .LBB23_101
+; RV32-NEXT: .LBB23_42: # %else104
+; RV32-NEXT: andi a2, a1, 16
+; RV32-NEXT: bnez a2, .LBB23_102
+; RV32-NEXT: .LBB23_43: # %else107
+; RV32-NEXT: andi a2, a1, 32
+; RV32-NEXT: bnez a2, .LBB23_103
+; RV32-NEXT: .LBB23_44: # %else110
+; RV32-NEXT: andi a2, a1, 64
+; RV32-NEXT: bnez a2, .LBB23_104
+; RV32-NEXT: .LBB23_45: # %else113
+; RV32-NEXT: andi a2, a1, 128
+; RV32-NEXT: bnez a2, .LBB23_105
+; RV32-NEXT: .LBB23_46: # %else116
+; RV32-NEXT: andi a2, a1, 256
+; RV32-NEXT: bnez a2, .LBB23_106
+; RV32-NEXT: .LBB23_47: # %else119
+; RV32-NEXT: andi a2, a1, 512
+; RV32-NEXT: bnez a2, .LBB23_107
+; RV32-NEXT: .LBB23_48: # %else122
+; RV32-NEXT: andi a2, a1, 1024
+; RV32-NEXT: bnez a2, .LBB23_108
+; RV32-NEXT: .LBB23_49: # %else125
+; RV32-NEXT: slli a2, a1, 20
+; RV32-NEXT: bltz a2, .LBB23_109
+; RV32-NEXT: .LBB23_50: # %else128
+; RV32-NEXT: slli a2, a1, 19
+; RV32-NEXT: bltz a2, .LBB23_110
+; RV32-NEXT: .LBB23_51: # %else131
+; RV32-NEXT: slli a2, a1, 18
+; RV32-NEXT: bltz a2, .LBB23_111
+; RV32-NEXT: .LBB23_52: # %else134
+; RV32-NEXT: slli a2, a1, 17
+; RV32-NEXT: bltz a2, .LBB23_112
+; RV32-NEXT: .LBB23_53: # %else137
+; RV32-NEXT: slli a2, a1, 16
+; RV32-NEXT: bltz a2, .LBB23_113
+; RV32-NEXT: .LBB23_54: # %else140
+; RV32-NEXT: slli a2, a1, 15
+; RV32-NEXT: bltz a2, .LBB23_114
+; RV32-NEXT: .LBB23_55: # %else143
+; RV32-NEXT: slli a2, a1, 14
+; RV32-NEXT: bltz a2, .LBB23_115
+; RV32-NEXT: .LBB23_56: # %else146
+; RV32-NEXT: slli a2, a1, 13
+; RV32-NEXT: bltz a2, .LBB23_116
+; RV32-NEXT: .LBB23_57: # %else149
+; RV32-NEXT: slli a2, a1, 12
+; RV32-NEXT: bltz a2, .LBB23_117
+; RV32-NEXT: .LBB23_58: # %else152
+; RV32-NEXT: slli a2, a1, 11
+; RV32-NEXT: bltz a2, .LBB23_118
+; RV32-NEXT: .LBB23_59: # %else155
+; RV32-NEXT: slli a2, a1, 10
+; RV32-NEXT: bltz a2, .LBB23_119
+; RV32-NEXT: .LBB23_60: # %else158
+; RV32-NEXT: slli a2, a1, 9
+; RV32-NEXT: bltz a2, .LBB23_120
+; RV32-NEXT: .LBB23_61: # %else161
+; RV32-NEXT: slli a2, a1, 8
+; RV32-NEXT: bltz a2, .LBB23_121
+; RV32-NEXT: .LBB23_62: # %else164
+; RV32-NEXT: slli a2, a1, 7
+; RV32-NEXT: bltz a2, .LBB23_122
+; RV32-NEXT: .LBB23_63: # %else167
+; RV32-NEXT: slli a2, a1, 6
+; RV32-NEXT: bltz a2, .LBB23_123
+; RV32-NEXT: .LBB23_64: # %else170
+; RV32-NEXT: slli a2, a1, 5
+; RV32-NEXT: bltz a2, .LBB23_124
+; RV32-NEXT: .LBB23_65: # %else173
+; RV32-NEXT: slli a2, a1, 4
+; RV32-NEXT: bltz a2, .LBB23_125
+; RV32-NEXT: .LBB23_66: # %else176
+; RV32-NEXT: slli a2, a1, 3
+; RV32-NEXT: bltz a2, .LBB23_126
+; RV32-NEXT: .LBB23_67: # %else179
+; RV32-NEXT: slli a2, a1, 2
+; RV32-NEXT: bltz a2, .LBB23_127
+; RV32-NEXT: .LBB23_68: # %else182
+; RV32-NEXT: slli a2, a1, 1
+; RV32-NEXT: bltz a2, .LBB23_128
+; RV32-NEXT: .LBB23_69: # %else185
+; RV32-NEXT: bgez a1, .LBB23_71
+; RV32-NEXT: .LBB23_70: # %cond.store187
+; RV32-NEXT: li a1, 32
+; RV32-NEXT: addi a2, sp, 128
+; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; RV32-NEXT: vse32.v v16, (a2)
+; RV32-NEXT: lw a1, 252(sp)
+; RV32-NEXT: sb a1, 0(a0)
+; RV32-NEXT: srli a2, a1, 24
+; RV32-NEXT: sb a2, 3(a0)
+; RV32-NEXT: srli a2, a1, 16
+; RV32-NEXT: sb a2, 2(a0)
+; RV32-NEXT: srli a1, a1, 8
+; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: .LBB23_71: # %else188
+; RV32-NEXT: li a0, 25
+; RV32-NEXT: slli a0, a0, 8
+; RV32-NEXT: sub sp, s0, a0
+; RV32-NEXT: lui a0, 1
+; RV32-NEXT: addi a0, a0, 272
+; RV32-NEXT: add sp, sp, a0
+; RV32-NEXT: lw ra, 2028(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s0, 2024(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 2032
+; RV32-NEXT: ret
+; RV32-NEXT: .LBB23_72: # %cond.store
+; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; RV32-NEXT: vmv.x.s a1, v8
+; RV32-NEXT: sb a1, 0(a0)
+; RV32-NEXT: srli a3, a1, 24
+; RV32-NEXT: sb a3, 3(a0)
+; RV32-NEXT: srli a3, a1, 16
+; RV32-NEXT: sb a3, 2(a0)
+; RV32-NEXT: srli a1, a1, 8
+; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: addi a0, a0, 4
+; RV32-NEXT: andi a1, a2, 2
+; RV32-NEXT: beqz a1, .LBB23_2
+; RV32-NEXT: .LBB23_73: # %cond.store1
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v24, v8, 1
+; RV32-NEXT: vmv.x.s a1, v24
+; RV32-NEXT: sb a1, 0(a0)
+; RV32-NEXT: srli a3, a1, 24
+; RV32-NEXT: sb a3, 3(a0)
+; RV32-NEXT: srli a3, a1, 16
+; RV32-NEXT: sb a3, 2(a0)
+; RV32-NEXT: srli a1, a1, 8
+; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: addi a0, a0, 4
+; RV32-NEXT: andi a1, a2, 4
+; RV32-NEXT: beqz a1, .LBB23_3
+; RV32-NEXT: .LBB23_74: # %cond.store4
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v24, v8, 2
+; RV32-NEXT: vmv.x.s a1, v24
+; RV32-NEXT: sb a1, 0(a0)
+; RV32-NEXT: srli a3, a1, 24
+; RV32-NEXT: sb a3, 3(a0)
+; RV32-NEXT: srli a3, a1, 16
+; RV32-NEXT: sb a3, 2(a0)
+; RV32-NEXT: srli a1, a1, 8
+; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: addi a0, a0, 4
+; RV32-NEXT: andi a1, a2, 8
+; RV32-NEXT: beqz a1, .LBB23_4
+; RV32-NEXT: .LBB23_75: # %cond.store7
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v24, v8, 3
+; RV32-NEXT: vmv.x.s a1, v24
+; RV32-NEXT: sb a1, 0(a0)
+; RV32-NEXT: srli a3, a1, 24
+; RV32-NEXT: sb a3, 3(a0)
+; RV32-NEXT: srli a3, a1, 16
+; RV32-NEXT: sb a3, 2(a0)
+; RV32-NEXT: srli a1, a1, 8
+; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: addi a0, a0, 4
+; RV32-NEXT: andi a1, a2, 16
+; RV32-NEXT: beqz a1, .LBB23_5
+; RV32-NEXT: .LBB23_76: # %cond.store10
+; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v24, v8, 4
+; RV32-NEXT: vmv.x.s a1, v24
+; RV32-NEXT: sb a1, 0(a0)
+; RV32-NEXT: srli a3, a1, 24
+; RV32-NEXT: sb a3, 3(a0)
+; RV32-NEXT: srli a3, a1, 16
+; RV32-NEXT: sb a3, 2(a0)
+; RV32-NEXT: srli a1, a1, 8
+; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: addi a0, a0, 4
+; RV32-NEXT: andi a1, a2, 32
+; RV32-NEXT: beqz a1, .LBB23_6
+; RV32-NEXT: .LBB23_77: # %cond.store13
+; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v24, v8, 5
+; RV32-NEXT: vmv.x.s a1, v24
+; RV32-NEXT: sb a1, 0(a0)
+; RV32-NEXT: srli a3, a1, 24
+; RV32-NEXT: sb a3, 3(a0)
+; RV32-NEXT: srli a3, a1, 16
+; RV32-NEXT: sb a3, 2(a0)
+; RV32-NEXT: srli a1, a1, 8
+; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: addi a0, a0, 4
+; RV32-NEXT: andi a1, a2, 64
+; RV32-NEXT: beqz a1, .LBB23_7
+; RV32-NEXT: .LBB23_78: # %cond.store16
+; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v24, v8, 6
+; RV32-NEXT: vmv.x.s a1, v24
+; RV32-NEXT: sb a1, 0(a0)
+; RV32-NEXT: srli a3, a1, 24
+; RV32-NEXT: sb a3, 3(a0)
+; RV32-NEXT: srli a3, a1, 16
+; RV32-NEXT: sb a3, 2(a0)
+; RV32-NEXT: srli a1, a1, 8
+; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: addi a0, a0, 4
+; RV32-NEXT: andi a1, a2, 128
+; RV32-NEXT: bnez a1, .LBB23_8
+; RV32-NEXT: j .LBB23_9
+; RV32-NEXT: .LBB23_79: # %cond.store25
+; RV32-NEXT: li a3, 32
+; RV32-NEXT: lui a4, 1
+; RV32-NEXT: addi a4, a4, 1920
+; RV32-NEXT: add a4, sp, a4
+; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; RV32-NEXT: vse32.v v8, (a4)
+; RV32-NEXT: lw a3, 1984(a1)
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: srli a4, a3, 24
+; RV32-NEXT: sb a4, 3(a0)
+; RV32-NEXT: srli a4, a3, 16
+; RV32-NEXT: sb a4, 2(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: addi a0, a0, 4
+; RV32-NEXT: andi a3, a2, 1024
+; RV32-NEXT: beqz a3, .LBB23_13
+; RV32-NEXT: .LBB23_80: # %cond.store28
+; RV32-NEXT: li a3, 32
+; RV32-NEXT: li a4, 23
+; RV32-NEXT: slli a4, a4, 8
+; RV32-NEXT: add a4, sp, a4
+; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; RV32-NEXT: vse32.v v8, (a4)
+; RV32-NEXT: lw a3, 1860(a1)
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: srli a4, a3, 24
+; RV32-NEXT: sb a4, 3(a0)
+; RV32-NEXT: srli a4, a3, 16
+; RV32-NEXT: sb a4, 2(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: addi a0, a0, 4
+; RV32-NEXT: slli a3, a2, 20
+; RV32-NEXT: bgez a3, .LBB23_14
+; RV32-NEXT: .LBB23_81: # %cond.store31
+; RV32-NEXT: li a3, 32
+; RV32-NEXT: lui a4, 1
+; RV32-NEXT: addi a4, a4, 1664
+; RV32-NEXT: add a4, sp, a4
+; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; RV32-NEXT: vse32.v v8, (a4)
+; RV32-NEXT: lw a3, 1736(a1)
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: srli a4, a3, 24
+; RV32-NEXT: sb a4, 3(a0)
+; RV32-NEXT: srli a4, a3, 16
+; RV32-NEXT: sb a4, 2(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: addi a0, a0, 4
+; RV32-NEXT: slli a3, a2, 19
+; RV32-NEXT: bgez a3, .LBB23_15
+; RV32-NEXT: .LBB23_82: # %cond.store34
+; RV32-NEXT: li a3, 32
+; RV32-NEXT: li a4, 11
+; RV32-NEXT: slli a4, a4, 9
+; RV32-NEXT: add a4, sp, a4
+; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; RV32-NEXT: vse32.v v8, (a4)
+; RV32-NEXT: lw a3, 1612(a1)
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: srli a4, a3, 24
+; RV32-NEXT: sb a4, 3(a0)
+; RV32-NEXT: srli a4, a3, 16
+; RV32-NEXT: sb a4, 2(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: addi a0, a0, 4
+; RV32-NEXT: slli a3, a2, 18
+; RV32-NEXT: bgez a3, .LBB23_16
+; RV32-NEXT: .LBB23_83: # %cond.store37
+; RV32-NEXT: li a3, 32
+; RV32-NEXT: lui a4, 1
+; RV32-NEXT: addi a4, a4, 1408
+; RV32-NEXT: add a4, sp, a4
+; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; RV32-NEXT: vse32.v v8, (a4)
+; RV32-NEXT: lw a3, 1488(a1)
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: srli a4, a3, 24
+; RV32-NEXT: sb a4, 3(a0)
+; RV32-NEXT: srli a4, a3, 16
+; RV32-NEXT: sb a4, 2(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: addi a0, a0, 4
+; RV32-NEXT: slli a3, a2, 17
+; RV32-NEXT: bgez a3, .LBB23_17
+; RV32-NEXT: .LBB23_84: # %cond.store40
+; RV32-NEXT: li a3, 32
+; RV32-NEXT: li a4, 21
+; RV32-NEXT: slli a4, a4, 8
+; RV32-NEXT: add a4, sp, a4
+; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; RV32-NEXT: vse32.v v8, (a4)
+; RV32-NEXT: lw a3, 1364(a1)
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: srli a4, a3, 24
+; RV32-NEXT: sb a4, 3(a0)
+; RV32-NEXT: srli a4, a3, 16
+; RV32-NEXT: sb a4, 2(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: addi a0, a0, 4
+; RV32-NEXT: slli a3, a2, 16
+; RV32-NEXT: bgez a3, .LBB23_18
+; RV32-NEXT: .LBB23_85: # %cond.store43
+; RV32-NEXT: li a3, 32
+; RV32-NEXT: lui a4, 1
+; RV32-NEXT: addi a4, a4, 1152
+; RV32-NEXT: add a4, sp, a4
+; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; RV32-NEXT: vse32.v v8, (a4)
+; RV32-NEXT: lw a3, 1240(a1)
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: srli a4, a3, 24
+; RV32-NEXT: sb a4, 3(a0)
+; RV32-NEXT: srli a4, a3, 16
+; RV32-NEXT: sb a4, 2(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: addi a0, a0, 4
+; RV32-NEXT: slli a3, a2, 15
+; RV32-NEXT: bgez a3, .LBB23_19
+; RV32-NEXT: .LBB23_86: # %cond.store46
+; RV32-NEXT: li a3, 32
+; RV32-NEXT: li a4, 5
+; RV32-NEXT: slli a4, a4, 10
+; RV32-NEXT: add a4, sp, a4
+; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; RV32-NEXT: vse32.v v8, (a4)
+; RV32-NEXT: lw a3, 1116(a1)
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: srli a4, a3, 24
+; RV32-NEXT: sb a4, 3(a0)
+; RV32-NEXT: srli a4, a3, 16
+; RV32-NEXT: sb a4, 2(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: addi a0, a0, 4
+; RV32-NEXT: slli a3, a2, 14
+; RV32-NEXT: bgez a3, .LBB23_20
+; RV32-NEXT: .LBB23_87: # %cond.store49
+; RV32-NEXT: li a3, 32
+; RV32-NEXT: lui a4, 1
+; RV32-NEXT: addi a4, a4, 896
+; RV32-NEXT: add a4, sp, a4
+; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; RV32-NEXT: vse32.v v8, (a4)
+; RV32-NEXT: lw a3, 992(a1)
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: srli a4, a3, 24
+; RV32-NEXT: sb a4, 3(a0)
+; RV32-NEXT: srli a4, a3, 16
+; RV32-NEXT: sb a4, 2(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: addi a0, a0, 4
+; RV32-NEXT: slli a3, a2, 13
+; RV32-NEXT: bgez a3, .LBB23_21
+; RV32-NEXT: .LBB23_88: # %cond.store52
+; RV32-NEXT: li a3, 32
+; RV32-NEXT: li a4, 19
+; RV32-NEXT: slli a4, a4, 8
+; RV32-NEXT: add a4, sp, a4
+; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; RV32-NEXT: vse32.v v8, (a4)
+; RV32-NEXT: lw a3, 868(a1)
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: srli a4, a3, 24
+; RV32-NEXT: sb a4, 3(a0)
+; RV32-NEXT: srli a4, a3, 16
+; RV32-NEXT: sb a4, 2(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: addi a0, a0, 4
+; RV32-NEXT: slli a3, a2, 12
+; RV32-NEXT: bgez a3, .LBB23_22
+; RV32-NEXT: .LBB23_89: # %cond.store55
+; RV32-NEXT: li a3, 32
+; RV32-NEXT: lui a4, 1
+; RV32-NEXT: addi a4, a4, 640
+; RV32-NEXT: add a4, sp, a4
+; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; RV32-NEXT: vse32.v v8, (a4)
+; RV32-NEXT: lw a3, 744(a1)
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: srli a4, a3, 24
+; RV32-NEXT: sb a4, 3(a0)
+; RV32-NEXT: srli a4, a3, 16
+; RV32-NEXT: sb a4, 2(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: addi a0, a0, 4
+; RV32-NEXT: slli a3, a2, 11
+; RV32-NEXT: bgez a3, .LBB23_23
+; RV32-NEXT: .LBB23_90: # %cond.store58
+; RV32-NEXT: li a3, 32
+; RV32-NEXT: li a4, 9
+; RV32-NEXT: slli a4, a4, 9
+; RV32-NEXT: add a4, sp, a4
+; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; RV32-NEXT: vse32.v v8, (a4)
+; RV32-NEXT: lw a3, 620(a1)
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: srli a4, a3, 24
+; RV32-NEXT: sb a4, 3(a0)
+; RV32-NEXT: srli a4, a3, 16
+; RV32-NEXT: sb a4, 2(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: addi a0, a0, 4
+; RV32-NEXT: slli a3, a2, 10
+; RV32-NEXT: bgez a3, .LBB23_24
+; RV32-NEXT: .LBB23_91: # %cond.store61
+; RV32-NEXT: li a3, 32
+; RV32-NEXT: lui a4, 1
+; RV32-NEXT: addi a4, a4, 384
+; RV32-NEXT: add a4, sp, a4
+; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; RV32-NEXT: vse32.v v8, (a4)
+; RV32-NEXT: lw a3, 496(a1)
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: srli a4, a3, 24
+; RV32-NEXT: sb a4, 3(a0)
+; RV32-NEXT: srli a4, a3, 16
+; RV32-NEXT: sb a4, 2(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: addi a0, a0, 4
+; RV32-NEXT: slli a3, a2, 9
+; RV32-NEXT: bgez a3, .LBB23_25
+; RV32-NEXT: .LBB23_92: # %cond.store64
+; RV32-NEXT: li a3, 32
+; RV32-NEXT: li a4, 17
+; RV32-NEXT: slli a4, a4, 8
+; RV32-NEXT: add a4, sp, a4
+; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; RV32-NEXT: vse32.v v8, (a4)
+; RV32-NEXT: lw a3, 372(a1)
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: srli a4, a3, 24
+; RV32-NEXT: sb a4, 3(a0)
+; RV32-NEXT: srli a4, a3, 16
+; RV32-NEXT: sb a4, 2(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: addi a0, a0, 4
+; RV32-NEXT: slli a3, a2, 8
+; RV32-NEXT: bgez a3, .LBB23_26
+; RV32-NEXT: .LBB23_93: # %cond.store67
+; RV32-NEXT: li a3, 32
+; RV32-NEXT: lui a4, 1
+; RV32-NEXT: addi a4, a4, 128
+; RV32-NEXT: add a4, sp, a4
+; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; RV32-NEXT: vse32.v v8, (a4)
+; RV32-NEXT: lw a3, 248(a1)
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: srli a4, a3, 24
+; RV32-NEXT: sb a4, 3(a0)
+; RV32-NEXT: srli a4, a3, 16
+; RV32-NEXT: sb a4, 2(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: addi a0, a0, 4
+; RV32-NEXT: slli a3, a2, 7
+; RV32-NEXT: bgez a3, .LBB23_27
+; RV32-NEXT: .LBB23_94: # %cond.store70
+; RV32-NEXT: li a3, 32
+; RV32-NEXT: lui a4, 1
+; RV32-NEXT: add a4, sp, a4
+; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; RV32-NEXT: vse32.v v8, (a4)
+; RV32-NEXT: lw a3, 124(a1)
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: srli a4, a3, 24
+; RV32-NEXT: sb a4, 3(a0)
+; RV32-NEXT: srli a4, a3, 16
+; RV32-NEXT: sb a4, 2(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: addi a0, a0, 4
+; RV32-NEXT: slli a3, a2, 6
+; RV32-NEXT: bltz a3, .LBB23_28
+; RV32-NEXT: j .LBB23_29
+; RV32-NEXT: .LBB23_95: # %cond.store76
+; RV32-NEXT: li a1, 32
+; RV32-NEXT: addi a4, sp, 2047
+; RV32-NEXT: addi a4, a4, 1793
+; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; RV32-NEXT: vse32.v v8, (a4)
+; RV32-NEXT: lw a1, 1956(a3)
+; RV32-NEXT: sb a1, 0(a0)
+; RV32-NEXT: srli a4, a1, 24
+; RV32-NEXT: sb a4, 3(a0)
+; RV32-NEXT: srli a4, a1, 16
+; RV32-NEXT: sb a4, 2(a0)
+; RV32-NEXT: srli a1, a1, 8
+; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: addi a0, a0, 4
+; RV32-NEXT: slli a1, a2, 4
+; RV32-NEXT: bgez a1, .LBB23_31
+; RV32-NEXT: .LBB23_96: # %cond.store79
+; RV32-NEXT: li a1, 32
+; RV32-NEXT: addi a4, sp, 2047
+; RV32-NEXT: addi a4, a4, 1665
+; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; RV32-NEXT: vse32.v v8, (a4)
+; RV32-NEXT: lw a1, 1832(a3)
+; RV32-NEXT: sb a1, 0(a0)
+; RV32-NEXT: srli a4, a1, 24
+; RV32-NEXT: sb a4, 3(a0)
+; RV32-NEXT: srli a4, a1, 16
+; RV32-NEXT: sb a4, 2(a0)
+; RV32-NEXT: srli a1, a1, 8
+; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: addi a0, a0, 4
+; RV32-NEXT: slli a1, a2, 3
+; RV32-NEXT: bltz a1, .LBB23_32
+; RV32-NEXT: j .LBB23_33
+; RV32-NEXT: .LBB23_97: # %cond.store91
+; RV32-NEXT: li a2, 32
+; RV32-NEXT: addi a4, sp, 2047
+; RV32-NEXT: addi a4, a4, 1153
+; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; RV32-NEXT: vse32.v v8, (a4)
+; RV32-NEXT: lw a2, 1336(a3)
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: srli a4, a2, 24
+; RV32-NEXT: sb a4, 3(a0)
+; RV32-NEXT: srli a4, a2, 16
+; RV32-NEXT: sb a4, 2(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: addi a0, a0, 4
+; RV32-NEXT: andi a2, a1, 1
+; RV32-NEXT: beqz a2, .LBB23_39
+; RV32-NEXT: .LBB23_98: # %cond.store94
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: vmv.x.s a2, v16
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: srli a4, a2, 24
+; RV32-NEXT: sb a4, 3(a0)
+; RV32-NEXT: srli a4, a2, 16
+; RV32-NEXT: sb a4, 2(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: addi a0, a0, 4
+; RV32-NEXT: andi a2, a1, 2
+; RV32-NEXT: beqz a2, .LBB23_40
+; RV32-NEXT: .LBB23_99: # %cond.store97
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v8, v16, 1
+; RV32-NEXT: vmv.x.s a2, v8
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: srli a4, a2, 24
+; RV32-NEXT: sb a4, 3(a0)
+; RV32-NEXT: srli a4, a2, 16
+; RV32-NEXT: sb a4, 2(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: addi a0, a0, 4
+; RV32-NEXT: andi a2, a1, 4
+; RV32-NEXT: beqz a2, .LBB23_41
+; RV32-NEXT: .LBB23_100: # %cond.store100
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v8, v16, 2
+; RV32-NEXT: vmv.x.s a2, v8
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: srli a4, a2, 24
+; RV32-NEXT: sb a4, 3(a0)
+; RV32-NEXT: srli a4, a2, 16
+; RV32-NEXT: sb a4, 2(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: addi a0, a0, 4
+; RV32-NEXT: andi a2, a1, 8
+; RV32-NEXT: beqz a2, .LBB23_42
+; RV32-NEXT: .LBB23_101: # %cond.store103
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v8, v16, 3
+; RV32-NEXT: vmv.x.s a2, v8
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: srli a4, a2, 24
+; RV32-NEXT: sb a4, 3(a0)
+; RV32-NEXT: srli a4, a2, 16
+; RV32-NEXT: sb a4, 2(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: addi a0, a0, 4
+; RV32-NEXT: andi a2, a1, 16
+; RV32-NEXT: beqz a2, .LBB23_43
+; RV32-NEXT: .LBB23_102: # %cond.store106
+; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v8, v16, 4
+; RV32-NEXT: vmv.x.s a2, v8
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: srli a4, a2, 24
+; RV32-NEXT: sb a4, 3(a0)
+; RV32-NEXT: srli a4, a2, 16
+; RV32-NEXT: sb a4, 2(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: addi a0, a0, 4
+; RV32-NEXT: andi a2, a1, 32
+; RV32-NEXT: beqz a2, .LBB23_44
+; RV32-NEXT: .LBB23_103: # %cond.store109
+; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v8, v16, 5
+; RV32-NEXT: vmv.x.s a2, v8
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: srli a4, a2, 24
+; RV32-NEXT: sb a4, 3(a0)
+; RV32-NEXT: srli a4, a2, 16
+; RV32-NEXT: sb a4, 2(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: addi a0, a0, 4
+; RV32-NEXT: andi a2, a1, 64
+; RV32-NEXT: beqz a2, .LBB23_45
+; RV32-NEXT: .LBB23_104: # %cond.store112
+; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v8, v16, 6
+; RV32-NEXT: vmv.x.s a2, v8
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: srli a4, a2, 24
+; RV32-NEXT: sb a4, 3(a0)
+; RV32-NEXT: srli a4, a2, 16
+; RV32-NEXT: sb a4, 2(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: addi a0, a0, 4
+; RV32-NEXT: andi a2, a1, 128
+; RV32-NEXT: beqz a2, .LBB23_46
+; RV32-NEXT: .LBB23_105: # %cond.store115
+; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v8, v16, 7
+; RV32-NEXT: vmv.x.s a2, v8
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: srli a4, a2, 24
+; RV32-NEXT: sb a4, 3(a0)
+; RV32-NEXT: srli a4, a2, 16
+; RV32-NEXT: sb a4, 2(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: addi a0, a0, 4
+; RV32-NEXT: andi a2, a1, 256
+; RV32-NEXT: beqz a2, .LBB23_47
+; RV32-NEXT: .LBB23_106: # %cond.store118
+; RV32-NEXT: li a2, 32
+; RV32-NEXT: addi a4, sp, 2047
+; RV32-NEXT: addi a4, a4, 1025
+; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; RV32-NEXT: vse32.v v16, (a4)
+; RV32-NEXT: lw a2, 1116(a3)
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: srli a4, a2, 24
+; RV32-NEXT: sb a4, 3(a0)
+; RV32-NEXT: srli a4, a2, 16
+; RV32-NEXT: sb a4, 2(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: addi a0, a0, 4
+; RV32-NEXT: andi a2, a1, 512
+; RV32-NEXT: beqz a2, .LBB23_48
+; RV32-NEXT: .LBB23_107: # %cond.store121
+; RV32-NEXT: li a2, 32
+; RV32-NEXT: addi a4, sp, 2047
+; RV32-NEXT: addi a4, a4, 897
+; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; RV32-NEXT: vse32.v v16, (a4)
+; RV32-NEXT: lw a2, 992(a3)
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: srli a4, a2, 24
+; RV32-NEXT: sb a4, 3(a0)
+; RV32-NEXT: srli a4, a2, 16
+; RV32-NEXT: sb a4, 2(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: addi a0, a0, 4
+; RV32-NEXT: andi a2, a1, 1024
+; RV32-NEXT: beqz a2, .LBB23_49
+; RV32-NEXT: .LBB23_108: # %cond.store124
+; RV32-NEXT: li a2, 32
+; RV32-NEXT: addi a4, sp, 2047
+; RV32-NEXT: addi a4, a4, 769
+; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; RV32-NEXT: vse32.v v16, (a4)
+; RV32-NEXT: lw a2, 868(a3)
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: srli a4, a2, 24
+; RV32-NEXT: sb a4, 3(a0)
+; RV32-NEXT: srli a4, a2, 16
+; RV32-NEXT: sb a4, 2(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: addi a0, a0, 4
+; RV32-NEXT: slli a2, a1, 20
+; RV32-NEXT: bgez a2, .LBB23_50
+; RV32-NEXT: .LBB23_109: # %cond.store127
+; RV32-NEXT: li a2, 32
+; RV32-NEXT: addi a4, sp, 2047
+; RV32-NEXT: addi a4, a4, 641
+; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; RV32-NEXT: vse32.v v16, (a4)
+; RV32-NEXT: lw a2, 744(a3)
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: srli a4, a2, 24
+; RV32-NEXT: sb a4, 3(a0)
+; RV32-NEXT: srli a4, a2, 16
+; RV32-NEXT: sb a4, 2(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: addi a0, a0, 4
+; RV32-NEXT: slli a2, a1, 19
+; RV32-NEXT: bgez a2, .LBB23_51
+; RV32-NEXT: .LBB23_110: # %cond.store130
+; RV32-NEXT: li a2, 32
+; RV32-NEXT: addi a4, sp, 2047
+; RV32-NEXT: addi a4, a4, 513
+; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; RV32-NEXT: vse32.v v16, (a4)
+; RV32-NEXT: lw a2, 620(a3)
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: srli a4, a2, 24
+; RV32-NEXT: sb a4, 3(a0)
+; RV32-NEXT: srli a4, a2, 16
+; RV32-NEXT: sb a4, 2(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: addi a0, a0, 4
+; RV32-NEXT: slli a2, a1, 18
+; RV32-NEXT: bgez a2, .LBB23_52
+; RV32-NEXT: .LBB23_111: # %cond.store133
+; RV32-NEXT: li a2, 32
+; RV32-NEXT: addi a4, sp, 2047
+; RV32-NEXT: addi a4, a4, 385
+; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; RV32-NEXT: vse32.v v16, (a4)
+; RV32-NEXT: lw a2, 496(a3)
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: srli a4, a2, 24
+; RV32-NEXT: sb a4, 3(a0)
+; RV32-NEXT: srli a4, a2, 16
+; RV32-NEXT: sb a4, 2(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: addi a0, a0, 4
+; RV32-NEXT: slli a2, a1, 17
+; RV32-NEXT: bgez a2, .LBB23_53
+; RV32-NEXT: .LBB23_112: # %cond.store136
+; RV32-NEXT: li a2, 32
+; RV32-NEXT: addi a4, sp, 2047
+; RV32-NEXT: addi a4, a4, 257
+; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; RV32-NEXT: vse32.v v16, (a4)
+; RV32-NEXT: lw a2, 372(a3)
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: srli a4, a2, 24
+; RV32-NEXT: sb a4, 3(a0)
+; RV32-NEXT: srli a4, a2, 16
+; RV32-NEXT: sb a4, 2(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: addi a0, a0, 4
+; RV32-NEXT: slli a2, a1, 16
+; RV32-NEXT: bgez a2, .LBB23_54
+; RV32-NEXT: .LBB23_113: # %cond.store139
+; RV32-NEXT: li a2, 32
+; RV32-NEXT: addi a4, sp, 2047
+; RV32-NEXT: addi a4, a4, 129
+; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; RV32-NEXT: vse32.v v16, (a4)
+; RV32-NEXT: lw a2, 248(a3)
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: srli a4, a2, 24
+; RV32-NEXT: sb a4, 3(a0)
+; RV32-NEXT: srli a4, a2, 16
+; RV32-NEXT: sb a4, 2(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: addi a0, a0, 4
+; RV32-NEXT: slli a2, a1, 15
+; RV32-NEXT: bgez a2, .LBB23_55
+; RV32-NEXT: .LBB23_114: # %cond.store142
+; RV32-NEXT: li a2, 32
+; RV32-NEXT: addi a4, sp, 2047
+; RV32-NEXT: addi a4, a4, 1
+; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; RV32-NEXT: vse32.v v16, (a4)
+; RV32-NEXT: lw a2, 124(a3)
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: srli a4, a2, 24
+; RV32-NEXT: sb a4, 3(a0)
+; RV32-NEXT: srli a4, a2, 16
+; RV32-NEXT: sb a4, 2(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: addi a0, a0, 4
+; RV32-NEXT: slli a2, a1, 14
+; RV32-NEXT: bgez a2, .LBB23_56
+; RV32-NEXT: .LBB23_115: # %cond.store145
+; RV32-NEXT: li a2, 32
+; RV32-NEXT: addi a4, sp, 1920
+; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; RV32-NEXT: vse32.v v16, (a4)
+; RV32-NEXT: lw a2, 0(a3)
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: srli a3, a2, 24
+; RV32-NEXT: sb a3, 3(a0)
+; RV32-NEXT: srli a3, a2, 16
+; RV32-NEXT: sb a3, 2(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: addi a0, a0, 4
+; RV32-NEXT: slli a2, a1, 13
+; RV32-NEXT: bgez a2, .LBB23_57
+; RV32-NEXT: .LBB23_116: # %cond.store148
+; RV32-NEXT: li a2, 32
+; RV32-NEXT: addi a3, sp, 1792
+; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; RV32-NEXT: vse32.v v16, (a3)
+; RV32-NEXT: lw a2, 1864(sp)
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: srli a3, a2, 24
+; RV32-NEXT: sb a3, 3(a0)
+; RV32-NEXT: srli a3, a2, 16
+; RV32-NEXT: sb a3, 2(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: addi a0, a0, 4
+; RV32-NEXT: slli a2, a1, 12
+; RV32-NEXT: bgez a2, .LBB23_58
+; RV32-NEXT: .LBB23_117: # %cond.store151
+; RV32-NEXT: li a2, 32
+; RV32-NEXT: addi a3, sp, 1664
+; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; RV32-NEXT: vse32.v v16, (a3)
+; RV32-NEXT: lw a2, 1740(sp)
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: srli a3, a2, 24
+; RV32-NEXT: sb a3, 3(a0)
+; RV32-NEXT: srli a3, a2, 16
+; RV32-NEXT: sb a3, 2(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: addi a0, a0, 4
+; RV32-NEXT: slli a2, a1, 11
+; RV32-NEXT: bgez a2, .LBB23_59
+; RV32-NEXT: .LBB23_118: # %cond.store154
+; RV32-NEXT: li a2, 32
+; RV32-NEXT: addi a3, sp, 1536
+; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; RV32-NEXT: vse32.v v16, (a3)
+; RV32-NEXT: lw a2, 1616(sp)
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: srli a3, a2, 24
+; RV32-NEXT: sb a3, 3(a0)
+; RV32-NEXT: srli a3, a2, 16
+; RV32-NEXT: sb a3, 2(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: addi a0, a0, 4
+; RV32-NEXT: slli a2, a1, 10
+; RV32-NEXT: bgez a2, .LBB23_60
+; RV32-NEXT: .LBB23_119: # %cond.store157
+; RV32-NEXT: li a2, 32
+; RV32-NEXT: addi a3, sp, 1408
+; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; RV32-NEXT: vse32.v v16, (a3)
+; RV32-NEXT: lw a2, 1492(sp)
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: srli a3, a2, 24
+; RV32-NEXT: sb a3, 3(a0)
+; RV32-NEXT: srli a3, a2, 16
+; RV32-NEXT: sb a3, 2(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: addi a0, a0, 4
+; RV32-NEXT: slli a2, a1, 9
+; RV32-NEXT: bgez a2, .LBB23_61
+; RV32-NEXT: .LBB23_120: # %cond.store160
+; RV32-NEXT: li a2, 32
+; RV32-NEXT: addi a3, sp, 1280
+; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; RV32-NEXT: vse32.v v16, (a3)
+; RV32-NEXT: lw a2, 1368(sp)
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: srli a3, a2, 24
+; RV32-NEXT: sb a3, 3(a0)
+; RV32-NEXT: srli a3, a2, 16
+; RV32-NEXT: sb a3, 2(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: addi a0, a0, 4
+; RV32-NEXT: slli a2, a1, 8
+; RV32-NEXT: bgez a2, .LBB23_62
+; RV32-NEXT: .LBB23_121: # %cond.store163
+; RV32-NEXT: li a2, 32
+; RV32-NEXT: addi a3, sp, 1152
+; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; RV32-NEXT: vse32.v v16, (a3)
+; RV32-NEXT: lw a2, 1244(sp)
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: srli a3, a2, 24
+; RV32-NEXT: sb a3, 3(a0)
+; RV32-NEXT: srli a3, a2, 16
+; RV32-NEXT: sb a3, 2(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: addi a0, a0, 4
+; RV32-NEXT: slli a2, a1, 7
+; RV32-NEXT: bgez a2, .LBB23_63
+; RV32-NEXT: .LBB23_122: # %cond.store166
+; RV32-NEXT: li a2, 32
+; RV32-NEXT: addi a3, sp, 1024
+; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; RV32-NEXT: vse32.v v16, (a3)
+; RV32-NEXT: lw a2, 1120(sp)
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: srli a3, a2, 24
+; RV32-NEXT: sb a3, 3(a0)
+; RV32-NEXT: srli a3, a2, 16
+; RV32-NEXT: sb a3, 2(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: addi a0, a0, 4
+; RV32-NEXT: slli a2, a1, 6
+; RV32-NEXT: bgez a2, .LBB23_64
+; RV32-NEXT: .LBB23_123: # %cond.store169
+; RV32-NEXT: li a2, 32
+; RV32-NEXT: addi a3, sp, 896
+; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; RV32-NEXT: vse32.v v16, (a3)
+; RV32-NEXT: lw a2, 996(sp)
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: srli a3, a2, 24
+; RV32-NEXT: sb a3, 3(a0)
+; RV32-NEXT: srli a3, a2, 16
+; RV32-NEXT: sb a3, 2(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: addi a0, a0, 4
+; RV32-NEXT: slli a2, a1, 5
+; RV32-NEXT: bgez a2, .LBB23_65
+; RV32-NEXT: .LBB23_124: # %cond.store172
+; RV32-NEXT: li a2, 32
+; RV32-NEXT: addi a3, sp, 768
+; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; RV32-NEXT: vse32.v v16, (a3)
+; RV32-NEXT: lw a2, 872(sp)
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: srli a3, a2, 24
+; RV32-NEXT: sb a3, 3(a0)
+; RV32-NEXT: srli a3, a2, 16
+; RV32-NEXT: sb a3, 2(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: addi a0, a0, 4
+; RV32-NEXT: slli a2, a1, 4
+; RV32-NEXT: bgez a2, .LBB23_66
+; RV32-NEXT: .LBB23_125: # %cond.store175
+; RV32-NEXT: li a2, 32
+; RV32-NEXT: addi a3, sp, 640
+; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; RV32-NEXT: vse32.v v16, (a3)
+; RV32-NEXT: lw a2, 748(sp)
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: srli a3, a2, 24
+; RV32-NEXT: sb a3, 3(a0)
+; RV32-NEXT: srli a3, a2, 16
+; RV32-NEXT: sb a3, 2(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: addi a0, a0, 4
+; RV32-NEXT: slli a2, a1, 3
+; RV32-NEXT: bgez a2, .LBB23_67
+; RV32-NEXT: .LBB23_126: # %cond.store178
+; RV32-NEXT: li a2, 32
+; RV32-NEXT: addi a3, sp, 512
+; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; RV32-NEXT: vse32.v v16, (a3)
+; RV32-NEXT: lw a2, 624(sp)
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: srli a3, a2, 24
+; RV32-NEXT: sb a3, 3(a0)
+; RV32-NEXT: srli a3, a2, 16
+; RV32-NEXT: sb a3, 2(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: addi a0, a0, 4
+; RV32-NEXT: slli a2, a1, 2
+; RV32-NEXT: bgez a2, .LBB23_68
+; RV32-NEXT: .LBB23_127: # %cond.store181
+; RV32-NEXT: li a2, 32
+; RV32-NEXT: addi a3, sp, 384
+; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; RV32-NEXT: vse32.v v16, (a3)
+; RV32-NEXT: lw a2, 500(sp)
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: srli a3, a2, 24
+; RV32-NEXT: sb a3, 3(a0)
+; RV32-NEXT: srli a3, a2, 16
+; RV32-NEXT: sb a3, 2(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: addi a0, a0, 4
+; RV32-NEXT: slli a2, a1, 1
+; RV32-NEXT: bgez a2, .LBB23_69
+; RV32-NEXT: .LBB23_128: # %cond.store184
+; RV32-NEXT: li a2, 32
+; RV32-NEXT: addi a3, sp, 256
+; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; RV32-NEXT: vse32.v v16, (a3)
+; RV32-NEXT: lw a2, 376(sp)
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: srli a3, a2, 24
+; RV32-NEXT: sb a3, 3(a0)
+; RV32-NEXT: srli a3, a2, 16
+; RV32-NEXT: sb a3, 2(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: addi a0, a0, 4
+; RV32-NEXT: bltz a1, .LBB23_70
+; RV32-NEXT: j .LBB23_71
+entry:
+ tail call void @llvm.masked.compressstore.v64i32(<64 x i32> %data, ptr %p, <64 x i1> %mask)
+ ret void
+}
+
+; Compress + store for i64 type
+
+define void @test_compresstore_i64_v1(ptr %p, <1 x i1> %mask, <1 x i64> %data) {
+; RV64-LABEL: test_compresstore_i64_v1:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vcompress.vm v9, v8, v0
+; RV64-NEXT: vcpop.m a1, v0
+; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; RV64-NEXT: vse64.v v9, (a0)
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_compresstore_i64_v1:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV32-NEXT: vcompress.vm v9, v8, v0
+; RV32-NEXT: vcpop.m a1, v0
+; RV32-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; RV32-NEXT: vse64.v v9, (a0)
+; RV32-NEXT: ret
+entry:
+ tail call void @llvm.masked.compressstore.v1i64(<1 x i64> %data, ptr %p, <1 x i1> %mask)
+ ret void
+}
+
+define void @test_compresstore_i64_v2(ptr %p, <2 x i1> %mask, <2 x i64> %data) {
+; RV64-LABEL: test_compresstore_i64_v2:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; RV64-NEXT: vcompress.vm v9, v8, v0
+; RV64-NEXT: vcpop.m a1, v0
+; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; RV64-NEXT: vse64.v v9, (a0)
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_compresstore_i64_v2:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; RV32-NEXT: vcompress.vm v9, v8, v0
+; RV32-NEXT: vcpop.m a1, v0
+; RV32-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; RV32-NEXT: vse64.v v9, (a0)
+; RV32-NEXT: ret
+entry:
+ tail call void @llvm.masked.compressstore.v2i64(<2 x i64> %data, ptr %p, <2 x i1> %mask)
+ ret void
+}
+
+define void @test_compresstore_i64_v4(ptr %p, <4 x i1> %mask, <4 x i64> %data) {
+; RV64-LABEL: test_compresstore_i64_v4:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT: vcompress.vm v10, v8, v0
+; RV64-NEXT: vcpop.m a1, v0
+; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; RV64-NEXT: vse64.v v10, (a0)
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_compresstore_i64_v4:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV32-NEXT: vcompress.vm v10, v8, v0
+; RV32-NEXT: vcpop.m a1, v0
+; RV32-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; RV32-NEXT: vse64.v v10, (a0)
+; RV32-NEXT: ret
+entry:
+ tail call void @llvm.masked.compressstore.v4i64(<4 x i64> %data, ptr %p, <4 x i1> %mask)
+ ret void
+}
+
+define void @test_compresstore_i64_v8(ptr %p, <8 x i1> %mask, <8 x i64> %data) {
+; RV64-LABEL: test_compresstore_i64_v8:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64-NEXT: vcompress.vm v12, v8, v0
+; RV64-NEXT: vcpop.m a1, v0
+; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; RV64-NEXT: vse64.v v12, (a0)
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_compresstore_i64_v8:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV32-NEXT: vcompress.vm v12, v8, v0
+; RV32-NEXT: vcpop.m a1, v0
+; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; RV32-NEXT: vse64.v v12, (a0)
+; RV32-NEXT: ret
+entry:
+ tail call void @llvm.masked.compressstore.v8i64(<8 x i64> %data, ptr %p, <8 x i1> %mask)
+ ret void
+}
+
+define void @test_compresstore_i64_v16(ptr %p, <16 x i1> %mask, <16 x i64> %data) {
+; RV64-LABEL: test_compresstore_i64_v16:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; RV64-NEXT: vcompress.vm v16, v8, v0
+; RV64-NEXT: vcpop.m a1, v0
+; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT: vse64.v v16, (a0)
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_compresstore_i64_v16:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; RV32-NEXT: vcompress.vm v16, v8, v0
+; RV32-NEXT: vcpop.m a1, v0
+; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT: vse64.v v16, (a0)
+; RV32-NEXT: ret
+entry:
+ tail call void @llvm.masked.compressstore.v16i64(<16 x i64> %data, ptr %p, <16 x i1> %mask)
+ ret void
+}
+
+define void @test_compresstore_i64_v32(ptr %p, <32 x i1> %mask, <32 x i64> %data) {
+; RV64-LABEL: test_compresstore_i64_v32:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV64-NEXT: vmv.x.s a1, v0
+; RV64-NEXT: andi a2, a1, 1
+; RV64-NEXT: bnez a2, .LBB29_36
+; RV64-NEXT: # %bb.1: # %else
+; RV64-NEXT: andi a2, a1, 2
+; RV64-NEXT: bnez a2, .LBB29_37
+; RV64-NEXT: .LBB29_2: # %else2
+; RV64-NEXT: andi a2, a1, 4
+; RV64-NEXT: bnez a2, .LBB29_38
+; RV64-NEXT: .LBB29_3: # %else5
+; RV64-NEXT: andi a2, a1, 8
+; RV64-NEXT: beqz a2, .LBB29_5
+; RV64-NEXT: .LBB29_4: # %cond.store7
+; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma
+; RV64-NEXT: vslidedown.vi v24, v8, 3
+; RV64-NEXT: vmv.x.s a2, v24
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: srli a3, a2, 56
+; RV64-NEXT: sb a3, 7(a0)
+; RV64-NEXT: srli a3, a2, 48
+; RV64-NEXT: sb a3, 6(a0)
+; RV64-NEXT: srli a3, a2, 40
+; RV64-NEXT: sb a3, 5(a0)
+; RV64-NEXT: srli a3, a2, 32
+; RV64-NEXT: sb a3, 4(a0)
+; RV64-NEXT: srli a3, a2, 24
+; RV64-NEXT: sb a3, 3(a0)
+; RV64-NEXT: srli a3, a2, 16
+; RV64-NEXT: sb a3, 2(a0)
+; RV64-NEXT: srli a2, a2, 8
+; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: addi a0, a0, 8
+; RV64-NEXT: .LBB29_5: # %else8
+; RV64-NEXT: addi sp, sp, -2032
+; RV64-NEXT: .cfi_def_cfa_offset 2032
+; RV64-NEXT: sd ra, 2024(sp) # 8-byte Folded Spill
+; RV64-NEXT: sd s0, 2016(sp) # 8-byte Folded Spill
+; RV64-NEXT: .cfi_offset ra, -8
+; RV64-NEXT: .cfi_offset s0, -16
+; RV64-NEXT: addi s0, sp, 2032
+; RV64-NEXT: .cfi_def_cfa s0, 0
+; RV64-NEXT: addi sp, sp, -1296
+; RV64-NEXT: andi sp, sp, -128
+; RV64-NEXT: andi a3, a1, 16
+; RV64-NEXT: addi a2, sp, 2024
+; RV64-NEXT: bnez a3, .LBB29_39
+; RV64-NEXT: # %bb.6: # %else11
+; RV64-NEXT: andi a3, a1, 32
+; RV64-NEXT: bnez a3, .LBB29_40
+; RV64-NEXT: .LBB29_7: # %else14
+; RV64-NEXT: andi a3, a1, 64
+; RV64-NEXT: bnez a3, .LBB29_41
+; RV64-NEXT: .LBB29_8: # %else17
+; RV64-NEXT: andi a3, a1, 128
+; RV64-NEXT: bnez a3, .LBB29_42
+; RV64-NEXT: .LBB29_9: # %else20
+; RV64-NEXT: andi a3, a1, 256
+; RV64-NEXT: bnez a3, .LBB29_43
+; RV64-NEXT: .LBB29_10: # %else23
+; RV64-NEXT: andi a3, a1, 512
+; RV64-NEXT: bnez a3, .LBB29_44
+; RV64-NEXT: .LBB29_11: # %else26
+; RV64-NEXT: andi a3, a1, 1024
+; RV64-NEXT: bnez a3, .LBB29_45
+; RV64-NEXT: .LBB29_12: # %else29
+; RV64-NEXT: slli a3, a1, 52
+; RV64-NEXT: bltz a3, .LBB29_46
+; RV64-NEXT: .LBB29_13: # %else32
+; RV64-NEXT: slli a3, a1, 51
+; RV64-NEXT: bltz a3, .LBB29_47
+; RV64-NEXT: .LBB29_14: # %else35
+; RV64-NEXT: slli a3, a1, 50
+; RV64-NEXT: bltz a3, .LBB29_48
+; RV64-NEXT: .LBB29_15: # %else38
+; RV64-NEXT: slli a2, a1, 49
+; RV64-NEXT: bltz a2, .LBB29_49
+; RV64-NEXT: .LBB29_16: # %else41
+; RV64-NEXT: slli a2, a1, 48
+; RV64-NEXT: bltz a2, .LBB29_50
+; RV64-NEXT: .LBB29_17: # %else44
+; RV64-NEXT: slli a2, a1, 47
+; RV64-NEXT: bltz a2, .LBB29_51
+; RV64-NEXT: .LBB29_18: # %else47
+; RV64-NEXT: slli a2, a1, 46
+; RV64-NEXT: bltz a2, .LBB29_52
+; RV64-NEXT: .LBB29_19: # %else50
+; RV64-NEXT: slli a2, a1, 45
+; RV64-NEXT: bltz a2, .LBB29_53
+; RV64-NEXT: .LBB29_20: # %else53
+; RV64-NEXT: slli a2, a1, 44
+; RV64-NEXT: bltz a2, .LBB29_54
+; RV64-NEXT: .LBB29_21: # %else56
+; RV64-NEXT: slli a2, a1, 43
+; RV64-NEXT: bltz a2, .LBB29_55
+; RV64-NEXT: .LBB29_22: # %else59
+; RV64-NEXT: slli a2, a1, 42
+; RV64-NEXT: bltz a2, .LBB29_56
+; RV64-NEXT: .LBB29_23: # %else62
+; RV64-NEXT: slli a2, a1, 41
+; RV64-NEXT: bltz a2, .LBB29_57
+; RV64-NEXT: .LBB29_24: # %else65
+; RV64-NEXT: slli a2, a1, 40
+; RV64-NEXT: bltz a2, .LBB29_58
+; RV64-NEXT: .LBB29_25: # %else68
+; RV64-NEXT: slli a2, a1, 39
+; RV64-NEXT: bltz a2, .LBB29_59
+; RV64-NEXT: .LBB29_26: # %else71
+; RV64-NEXT: slli a2, a1, 38
+; RV64-NEXT: bltz a2, .LBB29_60
+; RV64-NEXT: .LBB29_27: # %else74
+; RV64-NEXT: slli a2, a1, 37
+; RV64-NEXT: bltz a2, .LBB29_61
+; RV64-NEXT: .LBB29_28: # %else77
+; RV64-NEXT: slli a2, a1, 36
+; RV64-NEXT: bltz a2, .LBB29_62
+; RV64-NEXT: .LBB29_29: # %else80
+; RV64-NEXT: slli a2, a1, 35
+; RV64-NEXT: bltz a2, .LBB29_63
+; RV64-NEXT: .LBB29_30: # %else83
+; RV64-NEXT: slli a2, a1, 34
+; RV64-NEXT: bltz a2, .LBB29_64
+; RV64-NEXT: .LBB29_31: # %else86
+; RV64-NEXT: slli a2, a1, 33
+; RV64-NEXT: bgez a2, .LBB29_33
+; RV64-NEXT: .LBB29_32: # %cond.store88
+; RV64-NEXT: addi a2, sp, 256
+; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; RV64-NEXT: vse64.v v16, (a2)
+; RV64-NEXT: ld a2, 368(sp)
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: srli a3, a2, 56
+; RV64-NEXT: sb a3, 7(a0)
+; RV64-NEXT: srli a3, a2, 48
+; RV64-NEXT: sb a3, 6(a0)
+; RV64-NEXT: srli a3, a2, 40
+; RV64-NEXT: sb a3, 5(a0)
+; RV64-NEXT: srli a3, a2, 32
+; RV64-NEXT: sb a3, 4(a0)
+; RV64-NEXT: srli a3, a2, 24
+; RV64-NEXT: sb a3, 3(a0)
+; RV64-NEXT: srli a3, a2, 16
+; RV64-NEXT: sb a3, 2(a0)
+; RV64-NEXT: srli a2, a2, 8
+; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: addi a0, a0, 8
+; RV64-NEXT: .LBB29_33: # %else89
+; RV64-NEXT: lui a2, 524288
+; RV64-NEXT: and a1, a1, a2
+; RV64-NEXT: beqz a1, .LBB29_35
+; RV64-NEXT: # %bb.34: # %cond.store91
+; RV64-NEXT: addi a1, sp, 128
+; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; RV64-NEXT: vse64.v v16, (a1)
+; RV64-NEXT: ld a1, 248(sp)
+; RV64-NEXT: sb a1, 0(a0)
+; RV64-NEXT: srli a2, a1, 56
+; RV64-NEXT: sb a2, 7(a0)
+; RV64-NEXT: srli a2, a1, 48
+; RV64-NEXT: sb a2, 6(a0)
+; RV64-NEXT: srli a2, a1, 40
+; RV64-NEXT: sb a2, 5(a0)
+; RV64-NEXT: srli a2, a1, 32
+; RV64-NEXT: sb a2, 4(a0)
+; RV64-NEXT: srli a2, a1, 24
+; RV64-NEXT: sb a2, 3(a0)
+; RV64-NEXT: srli a2, a1, 16
+; RV64-NEXT: sb a2, 2(a0)
+; RV64-NEXT: srli a1, a1, 8
+; RV64-NEXT: sb a1, 1(a0)
+; RV64-NEXT: .LBB29_35: # %else92
+; RV64-NEXT: addi sp, s0, -2048
+; RV64-NEXT: addi sp, sp, -1280
+; RV64-NEXT: addi sp, sp, 1296
+; RV64-NEXT: ld ra, 2024(sp) # 8-byte Folded Reload
+; RV64-NEXT: ld s0, 2016(sp) # 8-byte Folded Reload
+; RV64-NEXT: addi sp, sp, 2032
+; RV64-NEXT: ret
+; RV64-NEXT: .LBB29_36: # %cond.store
+; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, ma
+; RV64-NEXT: vmv.x.s a2, v8
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: srli a3, a2, 56
+; RV64-NEXT: sb a3, 7(a0)
+; RV64-NEXT: srli a3, a2, 48
+; RV64-NEXT: sb a3, 6(a0)
+; RV64-NEXT: srli a3, a2, 40
+; RV64-NEXT: sb a3, 5(a0)
+; RV64-NEXT: srli a3, a2, 32
+; RV64-NEXT: sb a3, 4(a0)
+; RV64-NEXT: srli a3, a2, 24
+; RV64-NEXT: sb a3, 3(a0)
+; RV64-NEXT: srli a3, a2, 16
+; RV64-NEXT: sb a3, 2(a0)
+; RV64-NEXT: srli a2, a2, 8
+; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: addi a0, a0, 8
+; RV64-NEXT: andi a2, a1, 2
+; RV64-NEXT: beqz a2, .LBB29_2
+; RV64-NEXT: .LBB29_37: # %cond.store1
+; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vslidedown.vi v24, v8, 1
+; RV64-NEXT: vmv.x.s a2, v24
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: srli a3, a2, 56
+; RV64-NEXT: sb a3, 7(a0)
+; RV64-NEXT: srli a3, a2, 48
+; RV64-NEXT: sb a3, 6(a0)
+; RV64-NEXT: srli a3, a2, 40
+; RV64-NEXT: sb a3, 5(a0)
+; RV64-NEXT: srli a3, a2, 32
+; RV64-NEXT: sb a3, 4(a0)
+; RV64-NEXT: srli a3, a2, 24
+; RV64-NEXT: sb a3, 3(a0)
+; RV64-NEXT: srli a3, a2, 16
+; RV64-NEXT: sb a3, 2(a0)
+; RV64-NEXT: srli a2, a2, 8
+; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: addi a0, a0, 8
+; RV64-NEXT: andi a2, a1, 4
+; RV64-NEXT: beqz a2, .LBB29_3
+; RV64-NEXT: .LBB29_38: # %cond.store4
+; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma
+; RV64-NEXT: vslidedown.vi v24, v8, 2
+; RV64-NEXT: vmv.x.s a2, v24
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: srli a3, a2, 56
+; RV64-NEXT: sb a3, 7(a0)
+; RV64-NEXT: srli a3, a2, 48
+; RV64-NEXT: sb a3, 6(a0)
+; RV64-NEXT: srli a3, a2, 40
+; RV64-NEXT: sb a3, 5(a0)
+; RV64-NEXT: srli a3, a2, 32
+; RV64-NEXT: sb a3, 4(a0)
+; RV64-NEXT: srli a3, a2, 24
+; RV64-NEXT: sb a3, 3(a0)
+; RV64-NEXT: srli a3, a2, 16
+; RV64-NEXT: sb a3, 2(a0)
+; RV64-NEXT: srli a2, a2, 8
+; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: addi a0, a0, 8
+; RV64-NEXT: andi a2, a1, 8
+; RV64-NEXT: bnez a2, .LBB29_4
+; RV64-NEXT: j .LBB29_5
+; RV64-NEXT: .LBB29_39: # %cond.store10
+; RV64-NEXT: addi a3, sp, 2047
+; RV64-NEXT: addi a3, a3, 1025
+; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; RV64-NEXT: vse64.v v8, (a3)
+; RV64-NEXT: ld a3, 1080(a2)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a4, a3, 56
+; RV64-NEXT: sb a4, 7(a0)
+; RV64-NEXT: srli a4, a3, 48
+; RV64-NEXT: sb a4, 6(a0)
+; RV64-NEXT: srli a4, a3, 40
+; RV64-NEXT: sb a4, 5(a0)
+; RV64-NEXT: srli a4, a3, 32
+; RV64-NEXT: sb a4, 4(a0)
+; RV64-NEXT: srli a4, a3, 24
+; RV64-NEXT: sb a4, 3(a0)
+; RV64-NEXT: srli a4, a3, 16
+; RV64-NEXT: sb a4, 2(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 8
+; RV64-NEXT: andi a3, a1, 32
+; RV64-NEXT: beqz a3, .LBB29_7
+; RV64-NEXT: .LBB29_40: # %cond.store13
+; RV64-NEXT: addi a3, sp, 2047
+; RV64-NEXT: addi a3, a3, 897
+; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; RV64-NEXT: vse64.v v8, (a3)
+; RV64-NEXT: ld a3, 960(a2)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a4, a3, 56
+; RV64-NEXT: sb a4, 7(a0)
+; RV64-NEXT: srli a4, a3, 48
+; RV64-NEXT: sb a4, 6(a0)
+; RV64-NEXT: srli a4, a3, 40
+; RV64-NEXT: sb a4, 5(a0)
+; RV64-NEXT: srli a4, a3, 32
+; RV64-NEXT: sb a4, 4(a0)
+; RV64-NEXT: srli a4, a3, 24
+; RV64-NEXT: sb a4, 3(a0)
+; RV64-NEXT: srli a4, a3, 16
+; RV64-NEXT: sb a4, 2(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 8
+; RV64-NEXT: andi a3, a1, 64
+; RV64-NEXT: beqz a3, .LBB29_8
+; RV64-NEXT: .LBB29_41: # %cond.store16
+; RV64-NEXT: addi a3, sp, 2047
+; RV64-NEXT: addi a3, a3, 769
+; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; RV64-NEXT: vse64.v v8, (a3)
+; RV64-NEXT: ld a3, 840(a2)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a4, a3, 56
+; RV64-NEXT: sb a4, 7(a0)
+; RV64-NEXT: srli a4, a3, 48
+; RV64-NEXT: sb a4, 6(a0)
+; RV64-NEXT: srli a4, a3, 40
+; RV64-NEXT: sb a4, 5(a0)
+; RV64-NEXT: srli a4, a3, 32
+; RV64-NEXT: sb a4, 4(a0)
+; RV64-NEXT: srli a4, a3, 24
+; RV64-NEXT: sb a4, 3(a0)
+; RV64-NEXT: srli a4, a3, 16
+; RV64-NEXT: sb a4, 2(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 8
+; RV64-NEXT: andi a3, a1, 128
+; RV64-NEXT: beqz a3, .LBB29_9
+; RV64-NEXT: .LBB29_42: # %cond.store19
+; RV64-NEXT: addi a3, sp, 2047
+; RV64-NEXT: addi a3, a3, 641
+; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; RV64-NEXT: vse64.v v8, (a3)
+; RV64-NEXT: ld a3, 720(a2)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a4, a3, 56
+; RV64-NEXT: sb a4, 7(a0)
+; RV64-NEXT: srli a4, a3, 48
+; RV64-NEXT: sb a4, 6(a0)
+; RV64-NEXT: srli a4, a3, 40
+; RV64-NEXT: sb a4, 5(a0)
+; RV64-NEXT: srli a4, a3, 32
+; RV64-NEXT: sb a4, 4(a0)
+; RV64-NEXT: srli a4, a3, 24
+; RV64-NEXT: sb a4, 3(a0)
+; RV64-NEXT: srli a4, a3, 16
+; RV64-NEXT: sb a4, 2(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 8
+; RV64-NEXT: andi a3, a1, 256
+; RV64-NEXT: beqz a3, .LBB29_10
+; RV64-NEXT: .LBB29_43: # %cond.store22
+; RV64-NEXT: addi a3, sp, 2047
+; RV64-NEXT: addi a3, a3, 513
+; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; RV64-NEXT: vse64.v v8, (a3)
+; RV64-NEXT: ld a3, 600(a2)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a4, a3, 56
+; RV64-NEXT: sb a4, 7(a0)
+; RV64-NEXT: srli a4, a3, 48
+; RV64-NEXT: sb a4, 6(a0)
+; RV64-NEXT: srli a4, a3, 40
+; RV64-NEXT: sb a4, 5(a0)
+; RV64-NEXT: srli a4, a3, 32
+; RV64-NEXT: sb a4, 4(a0)
+; RV64-NEXT: srli a4, a3, 24
+; RV64-NEXT: sb a4, 3(a0)
+; RV64-NEXT: srli a4, a3, 16
+; RV64-NEXT: sb a4, 2(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 8
+; RV64-NEXT: andi a3, a1, 512
+; RV64-NEXT: beqz a3, .LBB29_11
+; RV64-NEXT: .LBB29_44: # %cond.store25
+; RV64-NEXT: addi a3, sp, 2047
+; RV64-NEXT: addi a3, a3, 385
+; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; RV64-NEXT: vse64.v v8, (a3)
+; RV64-NEXT: ld a3, 480(a2)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a4, a3, 56
+; RV64-NEXT: sb a4, 7(a0)
+; RV64-NEXT: srli a4, a3, 48
+; RV64-NEXT: sb a4, 6(a0)
+; RV64-NEXT: srli a4, a3, 40
+; RV64-NEXT: sb a4, 5(a0)
+; RV64-NEXT: srli a4, a3, 32
+; RV64-NEXT: sb a4, 4(a0)
+; RV64-NEXT: srli a4, a3, 24
+; RV64-NEXT: sb a4, 3(a0)
+; RV64-NEXT: srli a4, a3, 16
+; RV64-NEXT: sb a4, 2(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 8
+; RV64-NEXT: andi a3, a1, 1024
+; RV64-NEXT: beqz a3, .LBB29_12
+; RV64-NEXT: .LBB29_45: # %cond.store28
+; RV64-NEXT: addi a3, sp, 2047
+; RV64-NEXT: addi a3, a3, 257
+; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; RV64-NEXT: vse64.v v8, (a3)
+; RV64-NEXT: ld a3, 360(a2)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a4, a3, 56
+; RV64-NEXT: sb a4, 7(a0)
+; RV64-NEXT: srli a4, a3, 48
+; RV64-NEXT: sb a4, 6(a0)
+; RV64-NEXT: srli a4, a3, 40
+; RV64-NEXT: sb a4, 5(a0)
+; RV64-NEXT: srli a4, a3, 32
+; RV64-NEXT: sb a4, 4(a0)
+; RV64-NEXT: srli a4, a3, 24
+; RV64-NEXT: sb a4, 3(a0)
+; RV64-NEXT: srli a4, a3, 16
+; RV64-NEXT: sb a4, 2(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 8
+; RV64-NEXT: slli a3, a1, 52
+; RV64-NEXT: bgez a3, .LBB29_13
+; RV64-NEXT: .LBB29_46: # %cond.store31
+; RV64-NEXT: addi a3, sp, 2047
+; RV64-NEXT: addi a3, a3, 129
+; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; RV64-NEXT: vse64.v v8, (a3)
+; RV64-NEXT: ld a3, 240(a2)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a4, a3, 56
+; RV64-NEXT: sb a4, 7(a0)
+; RV64-NEXT: srli a4, a3, 48
+; RV64-NEXT: sb a4, 6(a0)
+; RV64-NEXT: srli a4, a3, 40
+; RV64-NEXT: sb a4, 5(a0)
+; RV64-NEXT: srli a4, a3, 32
+; RV64-NEXT: sb a4, 4(a0)
+; RV64-NEXT: srli a4, a3, 24
+; RV64-NEXT: sb a4, 3(a0)
+; RV64-NEXT: srli a4, a3, 16
+; RV64-NEXT: sb a4, 2(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 8
+; RV64-NEXT: slli a3, a1, 51
+; RV64-NEXT: bgez a3, .LBB29_14
+; RV64-NEXT: .LBB29_47: # %cond.store34
+; RV64-NEXT: addi a3, sp, 2047
+; RV64-NEXT: addi a3, a3, 1
+; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; RV64-NEXT: vse64.v v8, (a3)
+; RV64-NEXT: ld a3, 120(a2)
+; RV64-NEXT: sb a3, 0(a0)
+; RV64-NEXT: srli a4, a3, 56
+; RV64-NEXT: sb a4, 7(a0)
+; RV64-NEXT: srli a4, a3, 48
+; RV64-NEXT: sb a4, 6(a0)
+; RV64-NEXT: srli a4, a3, 40
+; RV64-NEXT: sb a4, 5(a0)
+; RV64-NEXT: srli a4, a3, 32
+; RV64-NEXT: sb a4, 4(a0)
+; RV64-NEXT: srli a4, a3, 24
+; RV64-NEXT: sb a4, 3(a0)
+; RV64-NEXT: srli a4, a3, 16
+; RV64-NEXT: sb a4, 2(a0)
+; RV64-NEXT: srli a3, a3, 8
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: addi a0, a0, 8
+; RV64-NEXT: slli a3, a1, 50
+; RV64-NEXT: bgez a3, .LBB29_15
+; RV64-NEXT: .LBB29_48: # %cond.store37
+; RV64-NEXT: addi a3, sp, 1920
+; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; RV64-NEXT: vse64.v v8, (a3)
+; RV64-NEXT: ld a2, 0(a2)
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: srli a3, a2, 56
+; RV64-NEXT: sb a3, 7(a0)
+; RV64-NEXT: srli a3, a2, 48
+; RV64-NEXT: sb a3, 6(a0)
+; RV64-NEXT: srli a3, a2, 40
+; RV64-NEXT: sb a3, 5(a0)
+; RV64-NEXT: srli a3, a2, 32
+; RV64-NEXT: sb a3, 4(a0)
+; RV64-NEXT: srli a3, a2, 24
+; RV64-NEXT: sb a3, 3(a0)
+; RV64-NEXT: srli a3, a2, 16
+; RV64-NEXT: sb a3, 2(a0)
+; RV64-NEXT: srli a2, a2, 8
+; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: addi a0, a0, 8
+; RV64-NEXT: slli a2, a1, 49
+; RV64-NEXT: bgez a2, .LBB29_16
+; RV64-NEXT: .LBB29_49: # %cond.store40
+; RV64-NEXT: addi a2, sp, 1792
+; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; RV64-NEXT: vse64.v v8, (a2)
+; RV64-NEXT: ld a2, 1904(sp)
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: srli a3, a2, 56
+; RV64-NEXT: sb a3, 7(a0)
+; RV64-NEXT: srli a3, a2, 48
+; RV64-NEXT: sb a3, 6(a0)
+; RV64-NEXT: srli a3, a2, 40
+; RV64-NEXT: sb a3, 5(a0)
+; RV64-NEXT: srli a3, a2, 32
+; RV64-NEXT: sb a3, 4(a0)
+; RV64-NEXT: srli a3, a2, 24
+; RV64-NEXT: sb a3, 3(a0)
+; RV64-NEXT: srli a3, a2, 16
+; RV64-NEXT: sb a3, 2(a0)
+; RV64-NEXT: srli a2, a2, 8
+; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: addi a0, a0, 8
+; RV64-NEXT: slli a2, a1, 48
+; RV64-NEXT: bgez a2, .LBB29_17
+; RV64-NEXT: .LBB29_50: # %cond.store43
+; RV64-NEXT: addi a2, sp, 1664
+; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; RV64-NEXT: vse64.v v8, (a2)
+; RV64-NEXT: ld a2, 1784(sp)
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: srli a3, a2, 56
+; RV64-NEXT: sb a3, 7(a0)
+; RV64-NEXT: srli a3, a2, 48
+; RV64-NEXT: sb a3, 6(a0)
+; RV64-NEXT: srli a3, a2, 40
+; RV64-NEXT: sb a3, 5(a0)
+; RV64-NEXT: srli a3, a2, 32
+; RV64-NEXT: sb a3, 4(a0)
+; RV64-NEXT: srli a3, a2, 24
+; RV64-NEXT: sb a3, 3(a0)
+; RV64-NEXT: srli a3, a2, 16
+; RV64-NEXT: sb a3, 2(a0)
+; RV64-NEXT: srli a2, a2, 8
+; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: addi a0, a0, 8
+; RV64-NEXT: slli a2, a1, 47
+; RV64-NEXT: bgez a2, .LBB29_18
+; RV64-NEXT: .LBB29_51: # %cond.store46
+; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vmv.x.s a2, v16
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: srli a3, a2, 56
+; RV64-NEXT: sb a3, 7(a0)
+; RV64-NEXT: srli a3, a2, 48
+; RV64-NEXT: sb a3, 6(a0)
+; RV64-NEXT: srli a3, a2, 40
+; RV64-NEXT: sb a3, 5(a0)
+; RV64-NEXT: srli a3, a2, 32
+; RV64-NEXT: sb a3, 4(a0)
+; RV64-NEXT: srli a3, a2, 24
+; RV64-NEXT: sb a3, 3(a0)
+; RV64-NEXT: srli a3, a2, 16
+; RV64-NEXT: sb a3, 2(a0)
+; RV64-NEXT: srli a2, a2, 8
+; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: addi a0, a0, 8
+; RV64-NEXT: slli a2, a1, 46
+; RV64-NEXT: bgez a2, .LBB29_19
+; RV64-NEXT: .LBB29_52: # %cond.store49
+; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vslidedown.vi v8, v16, 1
+; RV64-NEXT: vmv.x.s a2, v8
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: srli a3, a2, 56
+; RV64-NEXT: sb a3, 7(a0)
+; RV64-NEXT: srli a3, a2, 48
+; RV64-NEXT: sb a3, 6(a0)
+; RV64-NEXT: srli a3, a2, 40
+; RV64-NEXT: sb a3, 5(a0)
+; RV64-NEXT: srli a3, a2, 32
+; RV64-NEXT: sb a3, 4(a0)
+; RV64-NEXT: srli a3, a2, 24
+; RV64-NEXT: sb a3, 3(a0)
+; RV64-NEXT: srli a3, a2, 16
+; RV64-NEXT: sb a3, 2(a0)
+; RV64-NEXT: srli a2, a2, 8
+; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: addi a0, a0, 8
+; RV64-NEXT: slli a2, a1, 45
+; RV64-NEXT: bgez a2, .LBB29_20
+; RV64-NEXT: .LBB29_53: # %cond.store52
+; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma
+; RV64-NEXT: vslidedown.vi v8, v16, 2
+; RV64-NEXT: vmv.x.s a2, v8
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: srli a3, a2, 56
+; RV64-NEXT: sb a3, 7(a0)
+; RV64-NEXT: srli a3, a2, 48
+; RV64-NEXT: sb a3, 6(a0)
+; RV64-NEXT: srli a3, a2, 40
+; RV64-NEXT: sb a3, 5(a0)
+; RV64-NEXT: srli a3, a2, 32
+; RV64-NEXT: sb a3, 4(a0)
+; RV64-NEXT: srli a3, a2, 24
+; RV64-NEXT: sb a3, 3(a0)
+; RV64-NEXT: srli a3, a2, 16
+; RV64-NEXT: sb a3, 2(a0)
+; RV64-NEXT: srli a2, a2, 8
+; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: addi a0, a0, 8
+; RV64-NEXT: slli a2, a1, 44
+; RV64-NEXT: bgez a2, .LBB29_21
+; RV64-NEXT: .LBB29_54: # %cond.store55
+; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma
+; RV64-NEXT: vslidedown.vi v8, v16, 3
+; RV64-NEXT: vmv.x.s a2, v8
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: srli a3, a2, 56
+; RV64-NEXT: sb a3, 7(a0)
+; RV64-NEXT: srli a3, a2, 48
+; RV64-NEXT: sb a3, 6(a0)
+; RV64-NEXT: srli a3, a2, 40
+; RV64-NEXT: sb a3, 5(a0)
+; RV64-NEXT: srli a3, a2, 32
+; RV64-NEXT: sb a3, 4(a0)
+; RV64-NEXT: srli a3, a2, 24
+; RV64-NEXT: sb a3, 3(a0)
+; RV64-NEXT: srli a3, a2, 16
+; RV64-NEXT: sb a3, 2(a0)
+; RV64-NEXT: srli a2, a2, 8
+; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: addi a0, a0, 8
+; RV64-NEXT: slli a2, a1, 43
+; RV64-NEXT: bgez a2, .LBB29_22
+; RV64-NEXT: .LBB29_55: # %cond.store58
+; RV64-NEXT: addi a2, sp, 1536
+; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; RV64-NEXT: vse64.v v16, (a2)
+; RV64-NEXT: ld a2, 1568(sp)
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: srli a3, a2, 56
+; RV64-NEXT: sb a3, 7(a0)
+; RV64-NEXT: srli a3, a2, 48
+; RV64-NEXT: sb a3, 6(a0)
+; RV64-NEXT: srli a3, a2, 40
+; RV64-NEXT: sb a3, 5(a0)
+; RV64-NEXT: srli a3, a2, 32
+; RV64-NEXT: sb a3, 4(a0)
+; RV64-NEXT: srli a3, a2, 24
+; RV64-NEXT: sb a3, 3(a0)
+; RV64-NEXT: srli a3, a2, 16
+; RV64-NEXT: sb a3, 2(a0)
+; RV64-NEXT: srli a2, a2, 8
+; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: addi a0, a0, 8
+; RV64-NEXT: slli a2, a1, 42
+; RV64-NEXT: bgez a2, .LBB29_23
+; RV64-NEXT: .LBB29_56: # %cond.store61
+; RV64-NEXT: addi a2, sp, 1408
+; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; RV64-NEXT: vse64.v v16, (a2)
+; RV64-NEXT: ld a2, 1448(sp)
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: srli a3, a2, 56
+; RV64-NEXT: sb a3, 7(a0)
+; RV64-NEXT: srli a3, a2, 48
+; RV64-NEXT: sb a3, 6(a0)
+; RV64-NEXT: srli a3, a2, 40
+; RV64-NEXT: sb a3, 5(a0)
+; RV64-NEXT: srli a3, a2, 32
+; RV64-NEXT: sb a3, 4(a0)
+; RV64-NEXT: srli a3, a2, 24
+; RV64-NEXT: sb a3, 3(a0)
+; RV64-NEXT: srli a3, a2, 16
+; RV64-NEXT: sb a3, 2(a0)
+; RV64-NEXT: srli a2, a2, 8
+; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: addi a0, a0, 8
+; RV64-NEXT: slli a2, a1, 41
+; RV64-NEXT: bgez a2, .LBB29_24
+; RV64-NEXT: .LBB29_57: # %cond.store64
+; RV64-NEXT: addi a2, sp, 1280
+; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; RV64-NEXT: vse64.v v16, (a2)
+; RV64-NEXT: ld a2, 1328(sp)
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: srli a3, a2, 56
+; RV64-NEXT: sb a3, 7(a0)
+; RV64-NEXT: srli a3, a2, 48
+; RV64-NEXT: sb a3, 6(a0)
+; RV64-NEXT: srli a3, a2, 40
+; RV64-NEXT: sb a3, 5(a0)
+; RV64-NEXT: srli a3, a2, 32
+; RV64-NEXT: sb a3, 4(a0)
+; RV64-NEXT: srli a3, a2, 24
+; RV64-NEXT: sb a3, 3(a0)
+; RV64-NEXT: srli a3, a2, 16
+; RV64-NEXT: sb a3, 2(a0)
+; RV64-NEXT: srli a2, a2, 8
+; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: addi a0, a0, 8
+; RV64-NEXT: slli a2, a1, 40
+; RV64-NEXT: bgez a2, .LBB29_25
+; RV64-NEXT: .LBB29_58: # %cond.store67
+; RV64-NEXT: addi a2, sp, 1152
+; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; RV64-NEXT: vse64.v v16, (a2)
+; RV64-NEXT: ld a2, 1208(sp)
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: srli a3, a2, 56
+; RV64-NEXT: sb a3, 7(a0)
+; RV64-NEXT: srli a3, a2, 48
+; RV64-NEXT: sb a3, 6(a0)
+; RV64-NEXT: srli a3, a2, 40
+; RV64-NEXT: sb a3, 5(a0)
+; RV64-NEXT: srli a3, a2, 32
+; RV64-NEXT: sb a3, 4(a0)
+; RV64-NEXT: srli a3, a2, 24
+; RV64-NEXT: sb a3, 3(a0)
+; RV64-NEXT: srli a3, a2, 16
+; RV64-NEXT: sb a3, 2(a0)
+; RV64-NEXT: srli a2, a2, 8
+; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: addi a0, a0, 8
+; RV64-NEXT: slli a2, a1, 39
+; RV64-NEXT: bgez a2, .LBB29_26
+; RV64-NEXT: .LBB29_59: # %cond.store70
+; RV64-NEXT: addi a2, sp, 1024
+; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; RV64-NEXT: vse64.v v16, (a2)
+; RV64-NEXT: ld a2, 1088(sp)
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: srli a3, a2, 56
+; RV64-NEXT: sb a3, 7(a0)
+; RV64-NEXT: srli a3, a2, 48
+; RV64-NEXT: sb a3, 6(a0)
+; RV64-NEXT: srli a3, a2, 40
+; RV64-NEXT: sb a3, 5(a0)
+; RV64-NEXT: srli a3, a2, 32
+; RV64-NEXT: sb a3, 4(a0)
+; RV64-NEXT: srli a3, a2, 24
+; RV64-NEXT: sb a3, 3(a0)
+; RV64-NEXT: srli a3, a2, 16
+; RV64-NEXT: sb a3, 2(a0)
+; RV64-NEXT: srli a2, a2, 8
+; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: addi a0, a0, 8
+; RV64-NEXT: slli a2, a1, 38
+; RV64-NEXT: bgez a2, .LBB29_27
+; RV64-NEXT: .LBB29_60: # %cond.store73
+; RV64-NEXT: addi a2, sp, 896
+; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; RV64-NEXT: vse64.v v16, (a2)
+; RV64-NEXT: ld a2, 968(sp)
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: srli a3, a2, 56
+; RV64-NEXT: sb a3, 7(a0)
+; RV64-NEXT: srli a3, a2, 48
+; RV64-NEXT: sb a3, 6(a0)
+; RV64-NEXT: srli a3, a2, 40
+; RV64-NEXT: sb a3, 5(a0)
+; RV64-NEXT: srli a3, a2, 32
+; RV64-NEXT: sb a3, 4(a0)
+; RV64-NEXT: srli a3, a2, 24
+; RV64-NEXT: sb a3, 3(a0)
+; RV64-NEXT: srli a3, a2, 16
+; RV64-NEXT: sb a3, 2(a0)
+; RV64-NEXT: srli a2, a2, 8
+; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: addi a0, a0, 8
+; RV64-NEXT: slli a2, a1, 37
+; RV64-NEXT: bgez a2, .LBB29_28
+; RV64-NEXT: .LBB29_61: # %cond.store76
+; RV64-NEXT: addi a2, sp, 768
+; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; RV64-NEXT: vse64.v v16, (a2)
+; RV64-NEXT: ld a2, 848(sp)
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: srli a3, a2, 56
+; RV64-NEXT: sb a3, 7(a0)
+; RV64-NEXT: srli a3, a2, 48
+; RV64-NEXT: sb a3, 6(a0)
+; RV64-NEXT: srli a3, a2, 40
+; RV64-NEXT: sb a3, 5(a0)
+; RV64-NEXT: srli a3, a2, 32
+; RV64-NEXT: sb a3, 4(a0)
+; RV64-NEXT: srli a3, a2, 24
+; RV64-NEXT: sb a3, 3(a0)
+; RV64-NEXT: srli a3, a2, 16
+; RV64-NEXT: sb a3, 2(a0)
+; RV64-NEXT: srli a2, a2, 8
+; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: addi a0, a0, 8
+; RV64-NEXT: slli a2, a1, 36
+; RV64-NEXT: bgez a2, .LBB29_29
+; RV64-NEXT: .LBB29_62: # %cond.store79
+; RV64-NEXT: addi a2, sp, 640
+; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; RV64-NEXT: vse64.v v16, (a2)
+; RV64-NEXT: ld a2, 728(sp)
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: srli a3, a2, 56
+; RV64-NEXT: sb a3, 7(a0)
+; RV64-NEXT: srli a3, a2, 48
+; RV64-NEXT: sb a3, 6(a0)
+; RV64-NEXT: srli a3, a2, 40
+; RV64-NEXT: sb a3, 5(a0)
+; RV64-NEXT: srli a3, a2, 32
+; RV64-NEXT: sb a3, 4(a0)
+; RV64-NEXT: srli a3, a2, 24
+; RV64-NEXT: sb a3, 3(a0)
+; RV64-NEXT: srli a3, a2, 16
+; RV64-NEXT: sb a3, 2(a0)
+; RV64-NEXT: srli a2, a2, 8
+; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: addi a0, a0, 8
+; RV64-NEXT: slli a2, a1, 35
+; RV64-NEXT: bgez a2, .LBB29_30
+; RV64-NEXT: .LBB29_63: # %cond.store82
+; RV64-NEXT: addi a2, sp, 512
+; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; RV64-NEXT: vse64.v v16, (a2)
+; RV64-NEXT: ld a2, 608(sp)
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: srli a3, a2, 56
+; RV64-NEXT: sb a3, 7(a0)
+; RV64-NEXT: srli a3, a2, 48
+; RV64-NEXT: sb a3, 6(a0)
+; RV64-NEXT: srli a3, a2, 40
+; RV64-NEXT: sb a3, 5(a0)
+; RV64-NEXT: srli a3, a2, 32
+; RV64-NEXT: sb a3, 4(a0)
+; RV64-NEXT: srli a3, a2, 24
+; RV64-NEXT: sb a3, 3(a0)
+; RV64-NEXT: srli a3, a2, 16
+; RV64-NEXT: sb a3, 2(a0)
+; RV64-NEXT: srli a2, a2, 8
+; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: addi a0, a0, 8
+; RV64-NEXT: slli a2, a1, 34
+; RV64-NEXT: bgez a2, .LBB29_31
+; RV64-NEXT: .LBB29_64: # %cond.store85
+; RV64-NEXT: addi a2, sp, 384
+; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; RV64-NEXT: vse64.v v16, (a2)
+; RV64-NEXT: ld a2, 488(sp)
+; RV64-NEXT: sb a2, 0(a0)
+; RV64-NEXT: srli a3, a2, 56
+; RV64-NEXT: sb a3, 7(a0)
+; RV64-NEXT: srli a3, a2, 48
+; RV64-NEXT: sb a3, 6(a0)
+; RV64-NEXT: srli a3, a2, 40
+; RV64-NEXT: sb a3, 5(a0)
+; RV64-NEXT: srli a3, a2, 32
+; RV64-NEXT: sb a3, 4(a0)
+; RV64-NEXT: srli a3, a2, 24
+; RV64-NEXT: sb a3, 3(a0)
+; RV64-NEXT: srli a3, a2, 16
+; RV64-NEXT: sb a3, 2(a0)
+; RV64-NEXT: srli a2, a2, 8
+; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: addi a0, a0, 8
+; RV64-NEXT: slli a2, a1, 33
+; RV64-NEXT: bltz a2, .LBB29_32
+; RV64-NEXT: j .LBB29_33
+;
+; RV32-LABEL: test_compresstore_i64_v32:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: vmv.x.s a1, v0
+; RV32-NEXT: andi a2, a1, 1
+; RV32-NEXT: bnez a2, .LBB29_33
+; RV32-NEXT: # %bb.1: # %else
+; RV32-NEXT: andi a2, a1, 2
+; RV32-NEXT: bnez a2, .LBB29_34
+; RV32-NEXT: .LBB29_2: # %else2
+; RV32-NEXT: andi a2, a1, 4
+; RV32-NEXT: bnez a2, .LBB29_35
+; RV32-NEXT: .LBB29_3: # %else5
+; RV32-NEXT: andi a2, a1, 8
+; RV32-NEXT: bnez a2, .LBB29_36
+; RV32-NEXT: .LBB29_4: # %else8
+; RV32-NEXT: andi a2, a1, 16
+; RV32-NEXT: bnez a2, .LBB29_37
+; RV32-NEXT: .LBB29_5: # %else11
+; RV32-NEXT: andi a2, a1, 32
+; RV32-NEXT: bnez a2, .LBB29_38
+; RV32-NEXT: .LBB29_6: # %else14
+; RV32-NEXT: andi a2, a1, 64
+; RV32-NEXT: bnez a2, .LBB29_39
+; RV32-NEXT: .LBB29_7: # %else17
+; RV32-NEXT: andi a2, a1, 128
+; RV32-NEXT: bnez a2, .LBB29_40
+; RV32-NEXT: .LBB29_8: # %else20
+; RV32-NEXT: andi a2, a1, 256
+; RV32-NEXT: bnez a2, .LBB29_41
+; RV32-NEXT: .LBB29_9: # %else23
+; RV32-NEXT: andi a2, a1, 512
+; RV32-NEXT: bnez a2, .LBB29_42
+; RV32-NEXT: .LBB29_10: # %else26
+; RV32-NEXT: andi a2, a1, 1024
+; RV32-NEXT: bnez a2, .LBB29_43
+; RV32-NEXT: .LBB29_11: # %else29
+; RV32-NEXT: slli a2, a1, 20
+; RV32-NEXT: bltz a2, .LBB29_44
+; RV32-NEXT: .LBB29_12: # %else32
+; RV32-NEXT: slli a2, a1, 19
+; RV32-NEXT: bltz a2, .LBB29_45
+; RV32-NEXT: .LBB29_13: # %else35
+; RV32-NEXT: slli a2, a1, 18
+; RV32-NEXT: bltz a2, .LBB29_46
+; RV32-NEXT: .LBB29_14: # %else38
+; RV32-NEXT: slli a2, a1, 17
+; RV32-NEXT: bltz a2, .LBB29_47
+; RV32-NEXT: .LBB29_15: # %else41
+; RV32-NEXT: slli a2, a1, 16
+; RV32-NEXT: bltz a2, .LBB29_48
+; RV32-NEXT: .LBB29_16: # %else44
+; RV32-NEXT: slli a2, a1, 15
+; RV32-NEXT: bltz a2, .LBB29_49
+; RV32-NEXT: .LBB29_17: # %else47
+; RV32-NEXT: slli a2, a1, 14
+; RV32-NEXT: bltz a2, .LBB29_50
+; RV32-NEXT: .LBB29_18: # %else50
+; RV32-NEXT: slli a2, a1, 13
+; RV32-NEXT: bltz a2, .LBB29_51
+; RV32-NEXT: .LBB29_19: # %else53
+; RV32-NEXT: slli a2, a1, 12
+; RV32-NEXT: bltz a2, .LBB29_52
+; RV32-NEXT: .LBB29_20: # %else56
+; RV32-NEXT: slli a2, a1, 11
+; RV32-NEXT: bltz a2, .LBB29_53
+; RV32-NEXT: .LBB29_21: # %else59
+; RV32-NEXT: slli a2, a1, 10
+; RV32-NEXT: bltz a2, .LBB29_54
+; RV32-NEXT: .LBB29_22: # %else62
+; RV32-NEXT: slli a2, a1, 9
+; RV32-NEXT: bltz a2, .LBB29_55
+; RV32-NEXT: .LBB29_23: # %else65
+; RV32-NEXT: slli a2, a1, 8
+; RV32-NEXT: bltz a2, .LBB29_56
+; RV32-NEXT: .LBB29_24: # %else68
+; RV32-NEXT: slli a2, a1, 7
+; RV32-NEXT: bltz a2, .LBB29_57
+; RV32-NEXT: .LBB29_25: # %else71
+; RV32-NEXT: slli a2, a1, 6
+; RV32-NEXT: bltz a2, .LBB29_58
+; RV32-NEXT: .LBB29_26: # %else74
+; RV32-NEXT: slli a2, a1, 5
+; RV32-NEXT: bltz a2, .LBB29_59
+; RV32-NEXT: .LBB29_27: # %else77
+; RV32-NEXT: slli a2, a1, 4
+; RV32-NEXT: bltz a2, .LBB29_60
+; RV32-NEXT: .LBB29_28: # %else80
+; RV32-NEXT: slli a2, a1, 3
+; RV32-NEXT: bltz a2, .LBB29_61
+; RV32-NEXT: .LBB29_29: # %else83
+; RV32-NEXT: slli a2, a1, 2
+; RV32-NEXT: bltz a2, .LBB29_62
+; RV32-NEXT: .LBB29_30: # %else86
+; RV32-NEXT: slli a2, a1, 1
+; RV32-NEXT: bltz a2, .LBB29_63
+; RV32-NEXT: .LBB29_31: # %else89
+; RV32-NEXT: bltz a1, .LBB29_64
+; RV32-NEXT: .LBB29_32: # %else92
+; RV32-NEXT: ret
+; RV32-NEXT: .LBB29_33: # %cond.store
+; RV32-NEXT: li a2, 32
+; RV32-NEXT: vsetivli zero, 1, e64, m8, ta, ma
+; RV32-NEXT: vsrl.vx v24, v8, a2
+; RV32-NEXT: vmv.x.s a2, v24
+; RV32-NEXT: vmv.x.s a3, v8
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: sb a2, 4(a0)
+; RV32-NEXT: srli a4, a3, 24
+; RV32-NEXT: sb a4, 3(a0)
+; RV32-NEXT: srli a4, a3, 16
+; RV32-NEXT: sb a4, 2(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: srli a3, a2, 24
+; RV32-NEXT: sb a3, 7(a0)
+; RV32-NEXT: srli a3, a2, 16
+; RV32-NEXT: sb a3, 6(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 5(a0)
+; RV32-NEXT: addi a0, a0, 8
+; RV32-NEXT: andi a2, a1, 2
+; RV32-NEXT: beqz a2, .LBB29_2
+; RV32-NEXT: .LBB29_34: # %cond.store1
+; RV32-NEXT: vsetivli zero, 1, e64, m8, ta, ma
+; RV32-NEXT: vslidedown.vi v24, v8, 1
+; RV32-NEXT: li a2, 32
+; RV32-NEXT: vsrl.vx v0, v24, a2
+; RV32-NEXT: vmv.x.s a2, v0
+; RV32-NEXT: vmv.x.s a3, v24
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: sb a2, 4(a0)
+; RV32-NEXT: srli a4, a3, 24
+; RV32-NEXT: sb a4, 3(a0)
+; RV32-NEXT: srli a4, a3, 16
+; RV32-NEXT: sb a4, 2(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: srli a3, a2, 24
+; RV32-NEXT: sb a3, 7(a0)
+; RV32-NEXT: srli a3, a2, 16
+; RV32-NEXT: sb a3, 6(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 5(a0)
+; RV32-NEXT: addi a0, a0, 8
+; RV32-NEXT: andi a2, a1, 4
+; RV32-NEXT: beqz a2, .LBB29_3
+; RV32-NEXT: .LBB29_35: # %cond.store4
+; RV32-NEXT: vsetivli zero, 1, e64, m8, ta, ma
+; RV32-NEXT: vslidedown.vi v24, v8, 2
+; RV32-NEXT: li a2, 32
+; RV32-NEXT: vsrl.vx v0, v24, a2
+; RV32-NEXT: vmv.x.s a2, v0
+; RV32-NEXT: vmv.x.s a3, v24
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: sb a2, 4(a0)
+; RV32-NEXT: srli a4, a3, 24
+; RV32-NEXT: sb a4, 3(a0)
+; RV32-NEXT: srli a4, a3, 16
+; RV32-NEXT: sb a4, 2(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: srli a3, a2, 24
+; RV32-NEXT: sb a3, 7(a0)
+; RV32-NEXT: srli a3, a2, 16
+; RV32-NEXT: sb a3, 6(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 5(a0)
+; RV32-NEXT: addi a0, a0, 8
+; RV32-NEXT: andi a2, a1, 8
+; RV32-NEXT: beqz a2, .LBB29_4
+; RV32-NEXT: .LBB29_36: # %cond.store7
+; RV32-NEXT: vsetivli zero, 1, e64, m8, ta, ma
+; RV32-NEXT: vslidedown.vi v24, v8, 3
+; RV32-NEXT: li a2, 32
+; RV32-NEXT: vsrl.vx v0, v24, a2
+; RV32-NEXT: vmv.x.s a2, v0
+; RV32-NEXT: vmv.x.s a3, v24
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: sb a2, 4(a0)
+; RV32-NEXT: srli a4, a3, 24
+; RV32-NEXT: sb a4, 3(a0)
+; RV32-NEXT: srli a4, a3, 16
+; RV32-NEXT: sb a4, 2(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: srli a3, a2, 24
+; RV32-NEXT: sb a3, 7(a0)
+; RV32-NEXT: srli a3, a2, 16
+; RV32-NEXT: sb a3, 6(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 5(a0)
+; RV32-NEXT: addi a0, a0, 8
+; RV32-NEXT: andi a2, a1, 16
+; RV32-NEXT: beqz a2, .LBB29_5
+; RV32-NEXT: .LBB29_37: # %cond.store10
+; RV32-NEXT: vsetivli zero, 1, e64, m8, ta, ma
+; RV32-NEXT: vslidedown.vi v24, v8, 4
+; RV32-NEXT: li a2, 32
+; RV32-NEXT: vsrl.vx v0, v24, a2
+; RV32-NEXT: vmv.x.s a2, v0
+; RV32-NEXT: vmv.x.s a3, v24
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: sb a2, 4(a0)
+; RV32-NEXT: srli a4, a3, 24
+; RV32-NEXT: sb a4, 3(a0)
+; RV32-NEXT: srli a4, a3, 16
+; RV32-NEXT: sb a4, 2(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: srli a3, a2, 24
+; RV32-NEXT: sb a3, 7(a0)
+; RV32-NEXT: srli a3, a2, 16
+; RV32-NEXT: sb a3, 6(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 5(a0)
+; RV32-NEXT: addi a0, a0, 8
+; RV32-NEXT: andi a2, a1, 32
+; RV32-NEXT: beqz a2, .LBB29_6
+; RV32-NEXT: .LBB29_38: # %cond.store13
+; RV32-NEXT: vsetivli zero, 1, e64, m8, ta, ma
+; RV32-NEXT: vslidedown.vi v24, v8, 5
+; RV32-NEXT: li a2, 32
+; RV32-NEXT: vsrl.vx v0, v24, a2
+; RV32-NEXT: vmv.x.s a2, v0
+; RV32-NEXT: vmv.x.s a3, v24
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: sb a2, 4(a0)
+; RV32-NEXT: srli a4, a3, 24
+; RV32-NEXT: sb a4, 3(a0)
+; RV32-NEXT: srli a4, a3, 16
+; RV32-NEXT: sb a4, 2(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: srli a3, a2, 24
+; RV32-NEXT: sb a3, 7(a0)
+; RV32-NEXT: srli a3, a2, 16
+; RV32-NEXT: sb a3, 6(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 5(a0)
+; RV32-NEXT: addi a0, a0, 8
+; RV32-NEXT: andi a2, a1, 64
+; RV32-NEXT: beqz a2, .LBB29_7
+; RV32-NEXT: .LBB29_39: # %cond.store16
+; RV32-NEXT: vsetivli zero, 1, e64, m8, ta, ma
+; RV32-NEXT: vslidedown.vi v24, v8, 6
+; RV32-NEXT: li a2, 32
+; RV32-NEXT: vsrl.vx v0, v24, a2
+; RV32-NEXT: vmv.x.s a2, v0
+; RV32-NEXT: vmv.x.s a3, v24
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: sb a2, 4(a0)
+; RV32-NEXT: srli a4, a3, 24
+; RV32-NEXT: sb a4, 3(a0)
+; RV32-NEXT: srli a4, a3, 16
+; RV32-NEXT: sb a4, 2(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: srli a3, a2, 24
+; RV32-NEXT: sb a3, 7(a0)
+; RV32-NEXT: srli a3, a2, 16
+; RV32-NEXT: sb a3, 6(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 5(a0)
+; RV32-NEXT: addi a0, a0, 8
+; RV32-NEXT: andi a2, a1, 128
+; RV32-NEXT: beqz a2, .LBB29_8
+; RV32-NEXT: .LBB29_40: # %cond.store19
+; RV32-NEXT: vsetivli zero, 1, e64, m8, ta, ma
+; RV32-NEXT: vslidedown.vi v24, v8, 7
+; RV32-NEXT: li a2, 32
+; RV32-NEXT: vsrl.vx v0, v24, a2
+; RV32-NEXT: vmv.x.s a2, v0
+; RV32-NEXT: vmv.x.s a3, v24
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: sb a2, 4(a0)
+; RV32-NEXT: srli a4, a3, 24
+; RV32-NEXT: sb a4, 3(a0)
+; RV32-NEXT: srli a4, a3, 16
+; RV32-NEXT: sb a4, 2(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: srli a3, a2, 24
+; RV32-NEXT: sb a3, 7(a0)
+; RV32-NEXT: srli a3, a2, 16
+; RV32-NEXT: sb a3, 6(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 5(a0)
+; RV32-NEXT: addi a0, a0, 8
+; RV32-NEXT: andi a2, a1, 256
+; RV32-NEXT: beqz a2, .LBB29_9
+; RV32-NEXT: .LBB29_41: # %cond.store22
+; RV32-NEXT: vsetivli zero, 1, e64, m8, ta, ma
+; RV32-NEXT: vslidedown.vi v24, v8, 8
+; RV32-NEXT: li a2, 32
+; RV32-NEXT: vsrl.vx v0, v24, a2
+; RV32-NEXT: vmv.x.s a2, v0
+; RV32-NEXT: vmv.x.s a3, v24
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: sb a2, 4(a0)
+; RV32-NEXT: srli a4, a3, 24
+; RV32-NEXT: sb a4, 3(a0)
+; RV32-NEXT: srli a4, a3, 16
+; RV32-NEXT: sb a4, 2(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: srli a3, a2, 24
+; RV32-NEXT: sb a3, 7(a0)
+; RV32-NEXT: srli a3, a2, 16
+; RV32-NEXT: sb a3, 6(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 5(a0)
+; RV32-NEXT: addi a0, a0, 8
+; RV32-NEXT: andi a2, a1, 512
+; RV32-NEXT: beqz a2, .LBB29_10
+; RV32-NEXT: .LBB29_42: # %cond.store25
+; RV32-NEXT: vsetivli zero, 1, e64, m8, ta, ma
+; RV32-NEXT: vslidedown.vi v24, v8, 9
+; RV32-NEXT: li a2, 32
+; RV32-NEXT: vsrl.vx v0, v24, a2
+; RV32-NEXT: vmv.x.s a2, v0
+; RV32-NEXT: vmv.x.s a3, v24
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: sb a2, 4(a0)
+; RV32-NEXT: srli a4, a3, 24
+; RV32-NEXT: sb a4, 3(a0)
+; RV32-NEXT: srli a4, a3, 16
+; RV32-NEXT: sb a4, 2(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: srli a3, a2, 24
+; RV32-NEXT: sb a3, 7(a0)
+; RV32-NEXT: srli a3, a2, 16
+; RV32-NEXT: sb a3, 6(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 5(a0)
+; RV32-NEXT: addi a0, a0, 8
+; RV32-NEXT: andi a2, a1, 1024
+; RV32-NEXT: beqz a2, .LBB29_11
+; RV32-NEXT: .LBB29_43: # %cond.store28
+; RV32-NEXT: vsetivli zero, 1, e64, m8, ta, ma
+; RV32-NEXT: vslidedown.vi v24, v8, 10
+; RV32-NEXT: li a2, 32
+; RV32-NEXT: vsrl.vx v0, v24, a2
+; RV32-NEXT: vmv.x.s a2, v0
+; RV32-NEXT: vmv.x.s a3, v24
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: sb a2, 4(a0)
+; RV32-NEXT: srli a4, a3, 24
+; RV32-NEXT: sb a4, 3(a0)
+; RV32-NEXT: srli a4, a3, 16
+; RV32-NEXT: sb a4, 2(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: srli a3, a2, 24
+; RV32-NEXT: sb a3, 7(a0)
+; RV32-NEXT: srli a3, a2, 16
+; RV32-NEXT: sb a3, 6(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 5(a0)
+; RV32-NEXT: addi a0, a0, 8
+; RV32-NEXT: slli a2, a1, 20
+; RV32-NEXT: bgez a2, .LBB29_12
+; RV32-NEXT: .LBB29_44: # %cond.store31
+; RV32-NEXT: vsetivli zero, 1, e64, m8, ta, ma
+; RV32-NEXT: vslidedown.vi v24, v8, 11
+; RV32-NEXT: li a2, 32
+; RV32-NEXT: vsrl.vx v0, v24, a2
+; RV32-NEXT: vmv.x.s a2, v0
+; RV32-NEXT: vmv.x.s a3, v24
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: sb a2, 4(a0)
+; RV32-NEXT: srli a4, a3, 24
+; RV32-NEXT: sb a4, 3(a0)
+; RV32-NEXT: srli a4, a3, 16
+; RV32-NEXT: sb a4, 2(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: srli a3, a2, 24
+; RV32-NEXT: sb a3, 7(a0)
+; RV32-NEXT: srli a3, a2, 16
+; RV32-NEXT: sb a3, 6(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 5(a0)
+; RV32-NEXT: addi a0, a0, 8
+; RV32-NEXT: slli a2, a1, 19
+; RV32-NEXT: bgez a2, .LBB29_13
+; RV32-NEXT: .LBB29_45: # %cond.store34
+; RV32-NEXT: vsetivli zero, 1, e64, m8, ta, ma
+; RV32-NEXT: vslidedown.vi v24, v8, 12
+; RV32-NEXT: li a2, 32
+; RV32-NEXT: vsrl.vx v0, v24, a2
+; RV32-NEXT: vmv.x.s a2, v0
+; RV32-NEXT: vmv.x.s a3, v24
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: sb a2, 4(a0)
+; RV32-NEXT: srli a4, a3, 24
+; RV32-NEXT: sb a4, 3(a0)
+; RV32-NEXT: srli a4, a3, 16
+; RV32-NEXT: sb a4, 2(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: srli a3, a2, 24
+; RV32-NEXT: sb a3, 7(a0)
+; RV32-NEXT: srli a3, a2, 16
+; RV32-NEXT: sb a3, 6(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 5(a0)
+; RV32-NEXT: addi a0, a0, 8
+; RV32-NEXT: slli a2, a1, 18
+; RV32-NEXT: bgez a2, .LBB29_14
+; RV32-NEXT: .LBB29_46: # %cond.store37
+; RV32-NEXT: vsetivli zero, 1, e64, m8, ta, ma
+; RV32-NEXT: vslidedown.vi v24, v8, 13
+; RV32-NEXT: li a2, 32
+; RV32-NEXT: vsrl.vx v0, v24, a2
+; RV32-NEXT: vmv.x.s a2, v0
+; RV32-NEXT: vmv.x.s a3, v24
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: sb a2, 4(a0)
+; RV32-NEXT: srli a4, a3, 24
+; RV32-NEXT: sb a4, 3(a0)
+; RV32-NEXT: srli a4, a3, 16
+; RV32-NEXT: sb a4, 2(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: srli a3, a2, 24
+; RV32-NEXT: sb a3, 7(a0)
+; RV32-NEXT: srli a3, a2, 16
+; RV32-NEXT: sb a3, 6(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 5(a0)
+; RV32-NEXT: addi a0, a0, 8
+; RV32-NEXT: slli a2, a1, 17
+; RV32-NEXT: bgez a2, .LBB29_15
+; RV32-NEXT: .LBB29_47: # %cond.store40
+; RV32-NEXT: vsetivli zero, 1, e64, m8, ta, ma
+; RV32-NEXT: vslidedown.vi v24, v8, 14
+; RV32-NEXT: li a2, 32
+; RV32-NEXT: vsrl.vx v0, v24, a2
+; RV32-NEXT: vmv.x.s a2, v0
+; RV32-NEXT: vmv.x.s a3, v24
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: sb a2, 4(a0)
+; RV32-NEXT: srli a4, a3, 24
+; RV32-NEXT: sb a4, 3(a0)
+; RV32-NEXT: srli a4, a3, 16
+; RV32-NEXT: sb a4, 2(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: srli a3, a2, 24
+; RV32-NEXT: sb a3, 7(a0)
+; RV32-NEXT: srli a3, a2, 16
+; RV32-NEXT: sb a3, 6(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 5(a0)
+; RV32-NEXT: addi a0, a0, 8
+; RV32-NEXT: slli a2, a1, 16
+; RV32-NEXT: bgez a2, .LBB29_16
+; RV32-NEXT: .LBB29_48: # %cond.store43
+; RV32-NEXT: vsetivli zero, 1, e64, m8, ta, ma
+; RV32-NEXT: vslidedown.vi v8, v8, 15
+; RV32-NEXT: li a2, 32
+; RV32-NEXT: vsrl.vx v24, v8, a2
+; RV32-NEXT: vmv.x.s a2, v24
+; RV32-NEXT: vmv.x.s a3, v8
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: sb a2, 4(a0)
+; RV32-NEXT: srli a4, a3, 24
+; RV32-NEXT: sb a4, 3(a0)
+; RV32-NEXT: srli a4, a3, 16
+; RV32-NEXT: sb a4, 2(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: srli a3, a2, 24
+; RV32-NEXT: sb a3, 7(a0)
+; RV32-NEXT: srli a3, a2, 16
+; RV32-NEXT: sb a3, 6(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 5(a0)
+; RV32-NEXT: addi a0, a0, 8
+; RV32-NEXT: slli a2, a1, 15
+; RV32-NEXT: bgez a2, .LBB29_17
+; RV32-NEXT: .LBB29_49: # %cond.store46
+; RV32-NEXT: li a2, 32
+; RV32-NEXT: vsetivli zero, 1, e64, m8, ta, ma
+; RV32-NEXT: vsrl.vx v8, v16, a2
+; RV32-NEXT: vmv.x.s a2, v8
+; RV32-NEXT: vmv.x.s a3, v16
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: sb a2, 4(a0)
+; RV32-NEXT: srli a4, a3, 24
+; RV32-NEXT: sb a4, 3(a0)
+; RV32-NEXT: srli a4, a3, 16
+; RV32-NEXT: sb a4, 2(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: srli a3, a2, 24
+; RV32-NEXT: sb a3, 7(a0)
+; RV32-NEXT: srli a3, a2, 16
+; RV32-NEXT: sb a3, 6(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 5(a0)
+; RV32-NEXT: addi a0, a0, 8
+; RV32-NEXT: slli a2, a1, 14
+; RV32-NEXT: bgez a2, .LBB29_18
+; RV32-NEXT: .LBB29_50: # %cond.store49
+; RV32-NEXT: vsetivli zero, 1, e64, m8, ta, ma
+; RV32-NEXT: vslidedown.vi v8, v16, 1
+; RV32-NEXT: li a2, 32
+; RV32-NEXT: vsrl.vx v24, v8, a2
+; RV32-NEXT: vmv.x.s a2, v24
+; RV32-NEXT: vmv.x.s a3, v8
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: sb a2, 4(a0)
+; RV32-NEXT: srli a4, a3, 24
+; RV32-NEXT: sb a4, 3(a0)
+; RV32-NEXT: srli a4, a3, 16
+; RV32-NEXT: sb a4, 2(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: srli a3, a2, 24
+; RV32-NEXT: sb a3, 7(a0)
+; RV32-NEXT: srli a3, a2, 16
+; RV32-NEXT: sb a3, 6(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 5(a0)
+; RV32-NEXT: addi a0, a0, 8
+; RV32-NEXT: slli a2, a1, 13
+; RV32-NEXT: bgez a2, .LBB29_19
+; RV32-NEXT: .LBB29_51: # %cond.store52
+; RV32-NEXT: vsetivli zero, 1, e64, m8, ta, ma
+; RV32-NEXT: vslidedown.vi v8, v16, 2
+; RV32-NEXT: li a2, 32
+; RV32-NEXT: vsrl.vx v24, v8, a2
+; RV32-NEXT: vmv.x.s a2, v24
+; RV32-NEXT: vmv.x.s a3, v8
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: sb a2, 4(a0)
+; RV32-NEXT: srli a4, a3, 24
+; RV32-NEXT: sb a4, 3(a0)
+; RV32-NEXT: srli a4, a3, 16
+; RV32-NEXT: sb a4, 2(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: srli a3, a2, 24
+; RV32-NEXT: sb a3, 7(a0)
+; RV32-NEXT: srli a3, a2, 16
+; RV32-NEXT: sb a3, 6(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 5(a0)
+; RV32-NEXT: addi a0, a0, 8
+; RV32-NEXT: slli a2, a1, 12
+; RV32-NEXT: bgez a2, .LBB29_20
+; RV32-NEXT: .LBB29_52: # %cond.store55
+; RV32-NEXT: vsetivli zero, 1, e64, m8, ta, ma
+; RV32-NEXT: vslidedown.vi v8, v16, 3
+; RV32-NEXT: li a2, 32
+; RV32-NEXT: vsrl.vx v24, v8, a2
+; RV32-NEXT: vmv.x.s a2, v24
+; RV32-NEXT: vmv.x.s a3, v8
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: sb a2, 4(a0)
+; RV32-NEXT: srli a4, a3, 24
+; RV32-NEXT: sb a4, 3(a0)
+; RV32-NEXT: srli a4, a3, 16
+; RV32-NEXT: sb a4, 2(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: srli a3, a2, 24
+; RV32-NEXT: sb a3, 7(a0)
+; RV32-NEXT: srli a3, a2, 16
+; RV32-NEXT: sb a3, 6(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 5(a0)
+; RV32-NEXT: addi a0, a0, 8
+; RV32-NEXT: slli a2, a1, 11
+; RV32-NEXT: bgez a2, .LBB29_21
+; RV32-NEXT: .LBB29_53: # %cond.store58
+; RV32-NEXT: vsetivli zero, 1, e64, m8, ta, ma
+; RV32-NEXT: vslidedown.vi v8, v16, 4
+; RV32-NEXT: li a2, 32
+; RV32-NEXT: vsrl.vx v24, v8, a2
+; RV32-NEXT: vmv.x.s a2, v24
+; RV32-NEXT: vmv.x.s a3, v8
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: sb a2, 4(a0)
+; RV32-NEXT: srli a4, a3, 24
+; RV32-NEXT: sb a4, 3(a0)
+; RV32-NEXT: srli a4, a3, 16
+; RV32-NEXT: sb a4, 2(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: srli a3, a2, 24
+; RV32-NEXT: sb a3, 7(a0)
+; RV32-NEXT: srli a3, a2, 16
+; RV32-NEXT: sb a3, 6(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 5(a0)
+; RV32-NEXT: addi a0, a0, 8
+; RV32-NEXT: slli a2, a1, 10
+; RV32-NEXT: bgez a2, .LBB29_22
+; RV32-NEXT: .LBB29_54: # %cond.store61
+; RV32-NEXT: vsetivli zero, 1, e64, m8, ta, ma
+; RV32-NEXT: vslidedown.vi v8, v16, 5
+; RV32-NEXT: li a2, 32
+; RV32-NEXT: vsrl.vx v24, v8, a2
+; RV32-NEXT: vmv.x.s a2, v24
+; RV32-NEXT: vmv.x.s a3, v8
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: sb a2, 4(a0)
+; RV32-NEXT: srli a4, a3, 24
+; RV32-NEXT: sb a4, 3(a0)
+; RV32-NEXT: srli a4, a3, 16
+; RV32-NEXT: sb a4, 2(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: srli a3, a2, 24
+; RV32-NEXT: sb a3, 7(a0)
+; RV32-NEXT: srli a3, a2, 16
+; RV32-NEXT: sb a3, 6(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 5(a0)
+; RV32-NEXT: addi a0, a0, 8
+; RV32-NEXT: slli a2, a1, 9
+; RV32-NEXT: bgez a2, .LBB29_23
+; RV32-NEXT: .LBB29_55: # %cond.store64
+; RV32-NEXT: vsetivli zero, 1, e64, m8, ta, ma
+; RV32-NEXT: vslidedown.vi v8, v16, 6
+; RV32-NEXT: li a2, 32
+; RV32-NEXT: vsrl.vx v24, v8, a2
+; RV32-NEXT: vmv.x.s a2, v24
+; RV32-NEXT: vmv.x.s a3, v8
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: sb a2, 4(a0)
+; RV32-NEXT: srli a4, a3, 24
+; RV32-NEXT: sb a4, 3(a0)
+; RV32-NEXT: srli a4, a3, 16
+; RV32-NEXT: sb a4, 2(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: srli a3, a2, 24
+; RV32-NEXT: sb a3, 7(a0)
+; RV32-NEXT: srli a3, a2, 16
+; RV32-NEXT: sb a3, 6(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 5(a0)
+; RV32-NEXT: addi a0, a0, 8
+; RV32-NEXT: slli a2, a1, 8
+; RV32-NEXT: bgez a2, .LBB29_24
+; RV32-NEXT: .LBB29_56: # %cond.store67
+; RV32-NEXT: vsetivli zero, 1, e64, m8, ta, ma
+; RV32-NEXT: vslidedown.vi v8, v16, 7
+; RV32-NEXT: li a2, 32
+; RV32-NEXT: vsrl.vx v24, v8, a2
+; RV32-NEXT: vmv.x.s a2, v24
+; RV32-NEXT: vmv.x.s a3, v8
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: sb a2, 4(a0)
+; RV32-NEXT: srli a4, a3, 24
+; RV32-NEXT: sb a4, 3(a0)
+; RV32-NEXT: srli a4, a3, 16
+; RV32-NEXT: sb a4, 2(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: srli a3, a2, 24
+; RV32-NEXT: sb a3, 7(a0)
+; RV32-NEXT: srli a3, a2, 16
+; RV32-NEXT: sb a3, 6(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 5(a0)
+; RV32-NEXT: addi a0, a0, 8
+; RV32-NEXT: slli a2, a1, 7
+; RV32-NEXT: bgez a2, .LBB29_25
+; RV32-NEXT: .LBB29_57: # %cond.store70
+; RV32-NEXT: vsetivli zero, 1, e64, m8, ta, ma
+; RV32-NEXT: vslidedown.vi v8, v16, 8
+; RV32-NEXT: li a2, 32
+; RV32-NEXT: vsrl.vx v24, v8, a2
+; RV32-NEXT: vmv.x.s a2, v24
+; RV32-NEXT: vmv.x.s a3, v8
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: sb a2, 4(a0)
+; RV32-NEXT: srli a4, a3, 24
+; RV32-NEXT: sb a4, 3(a0)
+; RV32-NEXT: srli a4, a3, 16
+; RV32-NEXT: sb a4, 2(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: srli a3, a2, 24
+; RV32-NEXT: sb a3, 7(a0)
+; RV32-NEXT: srli a3, a2, 16
+; RV32-NEXT: sb a3, 6(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 5(a0)
+; RV32-NEXT: addi a0, a0, 8
+; RV32-NEXT: slli a2, a1, 6
+; RV32-NEXT: bgez a2, .LBB29_26
+; RV32-NEXT: .LBB29_58: # %cond.store73
+; RV32-NEXT: vsetivli zero, 1, e64, m8, ta, ma
+; RV32-NEXT: vslidedown.vi v8, v16, 9
+; RV32-NEXT: li a2, 32
+; RV32-NEXT: vsrl.vx v24, v8, a2
+; RV32-NEXT: vmv.x.s a2, v24
+; RV32-NEXT: vmv.x.s a3, v8
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: sb a2, 4(a0)
+; RV32-NEXT: srli a4, a3, 24
+; RV32-NEXT: sb a4, 3(a0)
+; RV32-NEXT: srli a4, a3, 16
+; RV32-NEXT: sb a4, 2(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: srli a3, a2, 24
+; RV32-NEXT: sb a3, 7(a0)
+; RV32-NEXT: srli a3, a2, 16
+; RV32-NEXT: sb a3, 6(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 5(a0)
+; RV32-NEXT: addi a0, a0, 8
+; RV32-NEXT: slli a2, a1, 5
+; RV32-NEXT: bgez a2, .LBB29_27
+; RV32-NEXT: .LBB29_59: # %cond.store76
+; RV32-NEXT: vsetivli zero, 1, e64, m8, ta, ma
+; RV32-NEXT: vslidedown.vi v8, v16, 10
+; RV32-NEXT: li a2, 32
+; RV32-NEXT: vsrl.vx v24, v8, a2
+; RV32-NEXT: vmv.x.s a2, v24
+; RV32-NEXT: vmv.x.s a3, v8
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: sb a2, 4(a0)
+; RV32-NEXT: srli a4, a3, 24
+; RV32-NEXT: sb a4, 3(a0)
+; RV32-NEXT: srli a4, a3, 16
+; RV32-NEXT: sb a4, 2(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: srli a3, a2, 24
+; RV32-NEXT: sb a3, 7(a0)
+; RV32-NEXT: srli a3, a2, 16
+; RV32-NEXT: sb a3, 6(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 5(a0)
+; RV32-NEXT: addi a0, a0, 8
+; RV32-NEXT: slli a2, a1, 4
+; RV32-NEXT: bgez a2, .LBB29_28
+; RV32-NEXT: .LBB29_60: # %cond.store79
+; RV32-NEXT: vsetivli zero, 1, e64, m8, ta, ma
+; RV32-NEXT: vslidedown.vi v8, v16, 11
+; RV32-NEXT: li a2, 32
+; RV32-NEXT: vsrl.vx v24, v8, a2
+; RV32-NEXT: vmv.x.s a2, v24
+; RV32-NEXT: vmv.x.s a3, v8
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: sb a2, 4(a0)
+; RV32-NEXT: srli a4, a3, 24
+; RV32-NEXT: sb a4, 3(a0)
+; RV32-NEXT: srli a4, a3, 16
+; RV32-NEXT: sb a4, 2(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: srli a3, a2, 24
+; RV32-NEXT: sb a3, 7(a0)
+; RV32-NEXT: srli a3, a2, 16
+; RV32-NEXT: sb a3, 6(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 5(a0)
+; RV32-NEXT: addi a0, a0, 8
+; RV32-NEXT: slli a2, a1, 3
+; RV32-NEXT: bgez a2, .LBB29_29
+; RV32-NEXT: .LBB29_61: # %cond.store82
+; RV32-NEXT: vsetivli zero, 1, e64, m8, ta, ma
+; RV32-NEXT: vslidedown.vi v8, v16, 12
+; RV32-NEXT: li a2, 32
+; RV32-NEXT: vsrl.vx v24, v8, a2
+; RV32-NEXT: vmv.x.s a2, v24
+; RV32-NEXT: vmv.x.s a3, v8
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: sb a2, 4(a0)
+; RV32-NEXT: srli a4, a3, 24
+; RV32-NEXT: sb a4, 3(a0)
+; RV32-NEXT: srli a4, a3, 16
+; RV32-NEXT: sb a4, 2(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: srli a3, a2, 24
+; RV32-NEXT: sb a3, 7(a0)
+; RV32-NEXT: srli a3, a2, 16
+; RV32-NEXT: sb a3, 6(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 5(a0)
+; RV32-NEXT: addi a0, a0, 8
+; RV32-NEXT: slli a2, a1, 2
+; RV32-NEXT: bgez a2, .LBB29_30
+; RV32-NEXT: .LBB29_62: # %cond.store85
+; RV32-NEXT: vsetivli zero, 1, e64, m8, ta, ma
+; RV32-NEXT: vslidedown.vi v8, v16, 13
+; RV32-NEXT: li a2, 32
+; RV32-NEXT: vsrl.vx v24, v8, a2
+; RV32-NEXT: vmv.x.s a2, v24
+; RV32-NEXT: vmv.x.s a3, v8
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: sb a2, 4(a0)
+; RV32-NEXT: srli a4, a3, 24
+; RV32-NEXT: sb a4, 3(a0)
+; RV32-NEXT: srli a4, a3, 16
+; RV32-NEXT: sb a4, 2(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: srli a3, a2, 24
+; RV32-NEXT: sb a3, 7(a0)
+; RV32-NEXT: srli a3, a2, 16
+; RV32-NEXT: sb a3, 6(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 5(a0)
+; RV32-NEXT: addi a0, a0, 8
+; RV32-NEXT: slli a2, a1, 1
+; RV32-NEXT: bgez a2, .LBB29_31
+; RV32-NEXT: .LBB29_63: # %cond.store88
+; RV32-NEXT: vsetivli zero, 1, e64, m8, ta, ma
+; RV32-NEXT: vslidedown.vi v8, v16, 14
+; RV32-NEXT: li a2, 32
+; RV32-NEXT: vsrl.vx v24, v8, a2
+; RV32-NEXT: vmv.x.s a2, v24
+; RV32-NEXT: vmv.x.s a3, v8
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: sb a2, 4(a0)
+; RV32-NEXT: srli a4, a3, 24
+; RV32-NEXT: sb a4, 3(a0)
+; RV32-NEXT: srli a4, a3, 16
+; RV32-NEXT: sb a4, 2(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: srli a3, a2, 24
+; RV32-NEXT: sb a3, 7(a0)
+; RV32-NEXT: srli a3, a2, 16
+; RV32-NEXT: sb a3, 6(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 5(a0)
+; RV32-NEXT: addi a0, a0, 8
+; RV32-NEXT: bgez a1, .LBB29_32
+; RV32-NEXT: .LBB29_64: # %cond.store91
+; RV32-NEXT: vsetivli zero, 1, e64, m8, ta, ma
+; RV32-NEXT: vslidedown.vi v8, v16, 15
+; RV32-NEXT: li a1, 32
+; RV32-NEXT: vsrl.vx v16, v8, a1
+; RV32-NEXT: vmv.x.s a1, v16
+; RV32-NEXT: vmv.x.s a2, v8
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: sb a1, 4(a0)
+; RV32-NEXT: srli a3, a2, 24
+; RV32-NEXT: sb a3, 3(a0)
+; RV32-NEXT: srli a3, a2, 16
+; RV32-NEXT: sb a3, 2(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: srli a2, a1, 24
+; RV32-NEXT: sb a2, 7(a0)
+; RV32-NEXT: srli a2, a1, 16
+; RV32-NEXT: sb a2, 6(a0)
+; RV32-NEXT: srli a1, a1, 8
+; RV32-NEXT: sb a1, 5(a0)
+; RV32-NEXT: ret
+entry:
+ tail call void @llvm.masked.compressstore.v32i64(<32 x i64> %data, ptr %p, <32 x i1> %mask)
+ ret void
+}
+
+declare void @llvm.masked.compressstore.v1i8(<1 x i8>, ptr, <1 x i1>)
+declare void @llvm.masked.compressstore.v2i8(<2 x i8>, ptr, <2 x i1>)
+declare void @llvm.masked.compressstore.v4i8(<4 x i8>, ptr, <4 x i1>)
+declare void @llvm.masked.compressstore.v8i8(<8 x i8>, ptr, <8 x i1>)
+declare void @llvm.masked.compressstore.v16i8(<16 x i8>, ptr, <16 x i1>)
+declare void @llvm.masked.compressstore.v32i8(<32 x i8>, ptr, <32 x i1>)
+declare void @llvm.masked.compressstore.v64i8(<64 x i8>, ptr, <64 x i1>)
+declare void @llvm.masked.compressstore.v128i8(<128 x i8>, ptr, <128 x i1>)
+declare void @llvm.masked.compressstore.v256i8(<256 x i8>, ptr, <256 x i1>)
+
+declare void @llvm.masked.compressstore.v1i16(<1 x i16>, ptr, <1 x i1>)
+declare void @llvm.masked.compressstore.v2i16(<2 x i16>, ptr, <2 x i1>)
+declare void @llvm.masked.compressstore.v4i16(<4 x i16>, ptr, <4 x i1>)
+declare void @llvm.masked.compressstore.v8i16(<8 x i16>, ptr, <8 x i1>)
+declare void @llvm.masked.compressstore.v16i16(<16 x i16>, ptr, <16 x i1>)
+declare void @llvm.masked.compressstore.v32i16(<32 x i16>, ptr, <32 x i1>)
+declare void @llvm.masked.compressstore.v64i16(<64 x i16>, ptr, <64 x i1>)
+declare void @llvm.masked.compressstore.v128i16(<128 x i16>, ptr, <128 x i1>)
+
+declare void @llvm.masked.compressstore.v1i32(<1 x i32>, ptr, <1 x i1>)
+declare void @llvm.masked.compressstore.v2i32(<2 x i32>, ptr, <2 x i1>)
+declare void @llvm.masked.compressstore.v4i32(<4 x i32>, ptr, <4 x i1>)
+declare void @llvm.masked.compressstore.v8i32(<8 x i32>, ptr, <8 x i1>)
+declare void @llvm.masked.compressstore.v16i32(<16 x i32>, ptr, <16 x i1>)
+declare void @llvm.masked.compressstore.v32i32(<32 x i32>, ptr, <32 x i1>)
+declare void @llvm.masked.compressstore.v64i32(<64 x i32>, ptr, <64 x i1>)
+
+declare void @llvm.masked.compressstore.v1i64(<1 x i64>, ptr, <1 x i1>)
+declare void @llvm.masked.compressstore.v2i64(<2 x i64>, ptr, <2 x i1>)
+declare void @llvm.masked.compressstore.v4i64(<4 x i64>, ptr, <4 x i1>)
+declare void @llvm.masked.compressstore.v8i64(<8 x i64>, ptr, <8 x i1>)
+declare void @llvm.masked.compressstore.v16i64(<16 x i64>, ptr, <16 x i1>)
+declare void @llvm.masked.compressstore.v32i64(<32 x i64>, ptr, <32 x i1>)
+
+;define void @test_compresstore_iYYYY_vXXXX(ptr %p, <XXXX x i1> %mask, <XXXX x iYYYY> %data) {
+;entry:
+; tail call void @llvm.masked.compressstore.vXXXXiYYYY(<XXXX x iYYYY> %data, ptr %p, <XXXX x i1> %mask)
+; ret void
+;}
+;declare void @llvm.masked.compressstore.vXXXXiYYYY(<XXXX x iYYYY>, ptr, <XXXX x i1>)
>From 9354dad46b7c71cef9c1c5d26230e452bf2cbc98 Mon Sep 17 00:00:00 2001
From: Kolya Panchenko <kolya.panchenko at sifive.com>
Date: Thu, 29 Feb 2024 13:22:01 -0800
Subject: [PATCH 2/5] Added alignment argument to TTI for compress/expand
---
.../Target/RISCV/RISCVTargetTransformInfo.cpp | 20 +++++-
.../Target/RISCV/RISCVTargetTransformInfo.h | 2 +-
llvm/test/CodeGen/RISCV/rvv/compressstore.ll | 67 +++++++++----------
3 files changed, 50 insertions(+), 39 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 60bada663957bb..afec82c3333b32 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -1621,9 +1621,27 @@ bool RISCVTTIImpl::isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
C2.ScaleCost, C2.ImmCost, C2.SetupCost);
}
-bool RISCVTTIImpl::isLegalMaskedCompressStore(Type *DataTy) {
+bool RISCVTTIImpl::isLegalMaskedCompressStore(Type *DataTy, Align Alignment) {
auto *VTy = dyn_cast<VectorType>(DataTy);
if (!VTy || VTy->isScalableTy() || !ST->hasVInstructions())
return false;
+
+ Type *ScalarTy = VTy->getScalarType();
+ if (ScalarTy->isFloatTy() || ScalarTy->isDoubleTy())
+ return true;
+
+ if (!ScalarTy->isIntegerTy())
+ return false;
+
+ switch (ScalarTy->getIntegerBitWidth()) {
+ case 8:
+ case 16:
+ case 32:
+ case 64:
+ break;
+ default:
+ return false;
+ }
+
return getRegUsageForType(VTy) <= 8;
}
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
index 6433027cce0e27..8daf6845dc8bc9 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -261,7 +261,7 @@ class RISCVTTIImpl : public BasicTTIImplBase<RISCVTTIImpl> {
return TLI->isLegalStridedLoadStore(DataTypeVT, Alignment);
}
- bool isLegalMaskedCompressStore(Type *DataTy);
+ bool isLegalMaskedCompressStore(Type *DataTy, Align Alignment);
bool isVScaleKnownToBeAPowerOfTwo() const {
return TLI->isVScaleKnownToBeAPowerOfTwo();
diff --git a/llvm/test/CodeGen/RISCV/rvv/compressstore.ll b/llvm/test/CodeGen/RISCV/rvv/compressstore.ll
index f227f39740a003..5739b98a0d7247 100644
--- a/llvm/test/CodeGen/RISCV/rvv/compressstore.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/compressstore.ll
@@ -23,7 +23,7 @@ define void @test_compresstore_i8_v1(ptr %p, <1 x i1> %mask, <1 x i8> %data) {
; RV32-NEXT: vse8.v v9, (a0)
; RV32-NEXT: ret
entry:
- tail call void @llvm.masked.compressstore.v1i8(<1 x i8> %data, ptr %p, <1 x i1> %mask)
+ tail call void @llvm.masked.compressstore.v1i8(<1 x i8> %data, ptr align 1 %p, <1 x i1> %mask)
ret void
}
@@ -46,7 +46,7 @@ define void @test_compresstore_i8_v2(ptr %p, <2 x i1> %mask, <2 x i8> %data) {
; RV32-NEXT: vse8.v v9, (a0)
; RV32-NEXT: ret
entry:
- tail call void @llvm.masked.compressstore.v2i8(<2 x i8> %data, ptr %p, <2 x i1> %mask)
+ tail call void @llvm.masked.compressstore.v2i8(<2 x i8> %data, ptr align 1 %p, <2 x i1> %mask)
ret void
}
@@ -69,7 +69,7 @@ define void @test_compresstore_i8_v4(ptr %p, <4 x i1> %mask, <4 x i8> %data) {
; RV32-NEXT: vse8.v v9, (a0)
; RV32-NEXT: ret
entry:
- tail call void @llvm.masked.compressstore.v4i8(<4 x i8> %data, ptr %p, <4 x i1> %mask)
+ tail call void @llvm.masked.compressstore.v4i8(<4 x i8> %data, ptr align 1 %p, <4 x i1> %mask)
ret void
}
@@ -92,7 +92,7 @@ define void @test_compresstore_i8_v8(ptr %p, <8 x i1> %mask, <8 x i8> %data) {
; RV32-NEXT: vse8.v v9, (a0)
; RV32-NEXT: ret
entry:
- tail call void @llvm.masked.compressstore.v8i8(<8 x i8> %data, ptr %p, <8 x i1> %mask)
+ tail call void @llvm.masked.compressstore.v8i8(<8 x i8> %data, ptr align 1 %p, <8 x i1> %mask)
ret void
}
@@ -115,7 +115,7 @@ define void @test_compresstore_i8_v16(ptr %p, <16 x i1> %mask, <16 x i8> %data)
; RV32-NEXT: vse8.v v9, (a0)
; RV32-NEXT: ret
entry:
- tail call void @llvm.masked.compressstore.v16i8(<16 x i8> %data, ptr %p, <16 x i1> %mask)
+ tail call void @llvm.masked.compressstore.v16i8(<16 x i8> %data, ptr align 1 %p, <16 x i1> %mask)
ret void
}
@@ -140,7 +140,7 @@ define void @test_compresstore_i8_v32(ptr %p, <32 x i1> %mask, <32 x i8> %data)
; RV32-NEXT: vse8.v v10, (a0)
; RV32-NEXT: ret
entry:
- tail call void @llvm.masked.compressstore.v32i8(<32 x i8> %data, ptr %p, <32 x i1> %mask)
+ tail call void @llvm.masked.compressstore.v32i8(<32 x i8> %data, ptr align 1 %p, <32 x i1> %mask)
ret void
}
@@ -165,7 +165,7 @@ define void @test_compresstore_i8_v64(ptr %p, <64 x i1> %mask, <64 x i8> %data)
; RV32-NEXT: vse8.v v12, (a0)
; RV32-NEXT: ret
entry:
- tail call void @llvm.masked.compressstore.v64i8(<64 x i8> %data, ptr %p, <64 x i1> %mask)
+ tail call void @llvm.masked.compressstore.v64i8(<64 x i8> %data, ptr align 1 %p, <64 x i1> %mask)
ret void
}
@@ -190,7 +190,7 @@ define void @test_compresstore_i8_v128(ptr %p, <128 x i1> %mask, <128 x i8> %dat
; RV32-NEXT: vse8.v v16, (a0)
; RV32-NEXT: ret
entry:
- tail call void @llvm.masked.compressstore.v128i8(<128 x i8> %data, ptr %p, <128 x i1> %mask)
+ tail call void @llvm.masked.compressstore.v128i8(<128 x i8> %data, ptr align 1 %p, <128 x i1> %mask)
ret void
}
@@ -8604,7 +8604,7 @@ define void @test_compresstore_i8_v256(ptr %p, <256 x i1> %mask, <256 x i8> %dat
; RV32-NEXT: .LBB8_526: # %cond.store760
; RV32-NEXT: j .LBB8_283
entry:
- tail call void @llvm.masked.compressstore.v256i8(<256 x i8> %data, ptr %p, <256 x i1> %mask)
+ tail call void @llvm.masked.compressstore.v256i8(<256 x i8> %data, ptr align 1 %p, <256 x i1> %mask)
ret void
}
@@ -8629,7 +8629,7 @@ define void @test_compresstore_i16_v1(ptr %p, <1 x i1> %mask, <1 x i16> %data) {
; RV32-NEXT: vse16.v v9, (a0)
; RV32-NEXT: ret
entry:
- tail call void @llvm.masked.compressstore.v1i16(<1 x i16> %data, ptr %p, <1 x i1> %mask)
+ tail call void @llvm.masked.compressstore.v1i16(<1 x i16> %data, ptr align 2 %p, <1 x i1> %mask)
ret void
}
@@ -8652,7 +8652,7 @@ define void @test_compresstore_i16_v2(ptr %p, <2 x i1> %mask, <2 x i16> %data) {
; RV32-NEXT: vse16.v v9, (a0)
; RV32-NEXT: ret
entry:
- tail call void @llvm.masked.compressstore.v2i16(<2 x i16> %data, ptr %p, <2 x i1> %mask)
+ tail call void @llvm.masked.compressstore.v2i16(<2 x i16> %data, ptr align 2 %p, <2 x i1> %mask)
ret void
}
@@ -8675,7 +8675,7 @@ define void @test_compresstore_i16_v4(ptr %p, <4 x i1> %mask, <4 x i16> %data) {
; RV32-NEXT: vse16.v v9, (a0)
; RV32-NEXT: ret
entry:
- tail call void @llvm.masked.compressstore.v4i16(<4 x i16> %data, ptr %p, <4 x i1> %mask)
+ tail call void @llvm.masked.compressstore.v4i16(<4 x i16> %data, ptr align 2 %p, <4 x i1> %mask)
ret void
}
@@ -8698,7 +8698,7 @@ define void @test_compresstore_i16_v8(ptr %p, <8 x i1> %mask, <8 x i16> %data) {
; RV32-NEXT: vse16.v v9, (a0)
; RV32-NEXT: ret
entry:
- tail call void @llvm.masked.compressstore.v8i16(<8 x i16> %data, ptr %p, <8 x i1> %mask)
+ tail call void @llvm.masked.compressstore.v8i16(<8 x i16> %data, ptr align 2 %p, <8 x i1> %mask)
ret void
}
@@ -8721,7 +8721,7 @@ define void @test_compresstore_i16_v16(ptr %p, <16 x i1> %mask, <16 x i16> %data
; RV32-NEXT: vse16.v v10, (a0)
; RV32-NEXT: ret
entry:
- tail call void @llvm.masked.compressstore.v16i16(<16 x i16> %data, ptr %p, <16 x i1> %mask)
+ tail call void @llvm.masked.compressstore.v16i16(<16 x i16> %data, ptr align 2 %p, <16 x i1> %mask)
ret void
}
@@ -8746,7 +8746,7 @@ define void @test_compresstore_i16_v32(ptr %p, <32 x i1> %mask, <32 x i16> %data
; RV32-NEXT: vse16.v v12, (a0)
; RV32-NEXT: ret
entry:
- tail call void @llvm.masked.compressstore.v32i16(<32 x i16> %data, ptr %p, <32 x i1> %mask)
+ tail call void @llvm.masked.compressstore.v32i16(<32 x i16> %data, ptr align 2 %p, <32 x i1> %mask)
ret void
}
@@ -8771,7 +8771,7 @@ define void @test_compresstore_i16_v64(ptr %p, <64 x i1> %mask, <64 x i16> %data
; RV32-NEXT: vse16.v v16, (a0)
; RV32-NEXT: ret
entry:
- tail call void @llvm.masked.compressstore.v64i16(<64 x i16> %data, ptr %p, <64 x i1> %mask)
+ tail call void @llvm.masked.compressstore.v64i16(<64 x i16> %data, ptr align 2 %p, <64 x i1> %mask)
ret void
}
@@ -13007,7 +13007,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: .LBB16_258: # %cond.store376
; RV32-NEXT: j .LBB16_141
entry:
- tail call void @llvm.masked.compressstore.v128i16(<128 x i16> %data, ptr %p, <128 x i1> %mask)
+ tail call void @llvm.masked.compressstore.v128i16(<128 x i16> %data, ptr align 2 %p, <128 x i1> %mask)
ret void
}
@@ -13032,7 +13032,7 @@ define void @test_compresstore_i32_v1(ptr %p, <1 x i1> %mask, <1 x i32> %data) {
; RV32-NEXT: vse32.v v9, (a0)
; RV32-NEXT: ret
entry:
- tail call void @llvm.masked.compressstore.v1i32(<1 x i32> %data, ptr %p, <1 x i1> %mask)
+ tail call void @llvm.masked.compressstore.v1i32(<1 x i32> %data, ptr align 4 %p, <1 x i1> %mask)
ret void
}
@@ -13055,7 +13055,7 @@ define void @test_compresstore_i32_v2(ptr %p, <2 x i1> %mask, <2 x i32> %data) {
; RV32-NEXT: vse32.v v9, (a0)
; RV32-NEXT: ret
entry:
- tail call void @llvm.masked.compressstore.v2i32(<2 x i32> %data, ptr %p, <2 x i1> %mask)
+ tail call void @llvm.masked.compressstore.v2i32(<2 x i32> %data, ptr align 4 %p, <2 x i1> %mask)
ret void
}
@@ -13078,7 +13078,7 @@ define void @test_compresstore_i32_v4(ptr %p, <4 x i1> %mask, <4 x i32> %data) {
; RV32-NEXT: vse32.v v9, (a0)
; RV32-NEXT: ret
entry:
- tail call void @llvm.masked.compressstore.v4i32(<4 x i32> %data, ptr %p, <4 x i1> %mask)
+ tail call void @llvm.masked.compressstore.v4i32(<4 x i32> %data, ptr align 4 %p, <4 x i1> %mask)
ret void
}
@@ -13101,7 +13101,7 @@ define void @test_compresstore_i32_v8(ptr %p, <8 x i1> %mask, <8 x i32> %data) {
; RV32-NEXT: vse32.v v10, (a0)
; RV32-NEXT: ret
entry:
- tail call void @llvm.masked.compressstore.v8i32(<8 x i32> %data, ptr %p, <8 x i1> %mask)
+ tail call void @llvm.masked.compressstore.v8i32(<8 x i32> %data, ptr align 4 %p, <8 x i1> %mask)
ret void
}
@@ -13124,7 +13124,7 @@ define void @test_compresstore_i32_v16(ptr %p, <16 x i1> %mask, <16 x i32> %data
; RV32-NEXT: vse32.v v12, (a0)
; RV32-NEXT: ret
entry:
- tail call void @llvm.masked.compressstore.v16i32(<16 x i32> %data, ptr %p, <16 x i1> %mask)
+ tail call void @llvm.masked.compressstore.v16i32(<16 x i32> %data, ptr align 4 %p, <16 x i1> %mask)
ret void
}
@@ -13149,7 +13149,7 @@ define void @test_compresstore_i32_v32(ptr %p, <32 x i1> %mask, <32 x i32> %data
; RV32-NEXT: vse32.v v16, (a0)
; RV32-NEXT: ret
entry:
- tail call void @llvm.masked.compressstore.v32i32(<32 x i32> %data, ptr %p, <32 x i1> %mask)
+ tail call void @llvm.masked.compressstore.v32i32(<32 x i32> %data, ptr align 4 %p, <32 x i1> %mask)
ret void
}
@@ -15663,7 +15663,7 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV32-NEXT: bltz a1, .LBB23_70
; RV32-NEXT: j .LBB23_71
entry:
- tail call void @llvm.masked.compressstore.v64i32(<64 x i32> %data, ptr %p, <64 x i1> %mask)
+ tail call void @llvm.masked.compressstore.v64i32(<64 x i32> %data, ptr align 4 %p, <64 x i1> %mask)
ret void
}
@@ -15688,7 +15688,7 @@ define void @test_compresstore_i64_v1(ptr %p, <1 x i1> %mask, <1 x i64> %data) {
; RV32-NEXT: vse64.v v9, (a0)
; RV32-NEXT: ret
entry:
- tail call void @llvm.masked.compressstore.v1i64(<1 x i64> %data, ptr %p, <1 x i1> %mask)
+ tail call void @llvm.masked.compressstore.v1i64(<1 x i64> %data, ptr align 8 %p, <1 x i1> %mask)
ret void
}
@@ -15711,7 +15711,7 @@ define void @test_compresstore_i64_v2(ptr %p, <2 x i1> %mask, <2 x i64> %data) {
; RV32-NEXT: vse64.v v9, (a0)
; RV32-NEXT: ret
entry:
- tail call void @llvm.masked.compressstore.v2i64(<2 x i64> %data, ptr %p, <2 x i1> %mask)
+ tail call void @llvm.masked.compressstore.v2i64(<2 x i64> %data, ptr align 8 %p, <2 x i1> %mask)
ret void
}
@@ -15734,7 +15734,7 @@ define void @test_compresstore_i64_v4(ptr %p, <4 x i1> %mask, <4 x i64> %data) {
; RV32-NEXT: vse64.v v10, (a0)
; RV32-NEXT: ret
entry:
- tail call void @llvm.masked.compressstore.v4i64(<4 x i64> %data, ptr %p, <4 x i1> %mask)
+ tail call void @llvm.masked.compressstore.v4i64(<4 x i64> %data, ptr align 8 %p, <4 x i1> %mask)
ret void
}
@@ -15757,7 +15757,7 @@ define void @test_compresstore_i64_v8(ptr %p, <8 x i1> %mask, <8 x i64> %data) {
; RV32-NEXT: vse64.v v12, (a0)
; RV32-NEXT: ret
entry:
- tail call void @llvm.masked.compressstore.v8i64(<8 x i64> %data, ptr %p, <8 x i1> %mask)
+ tail call void @llvm.masked.compressstore.v8i64(<8 x i64> %data, ptr align 8 %p, <8 x i1> %mask)
ret void
}
@@ -15780,7 +15780,7 @@ define void @test_compresstore_i64_v16(ptr %p, <16 x i1> %mask, <16 x i64> %data
; RV32-NEXT: vse64.v v16, (a0)
; RV32-NEXT: ret
entry:
- tail call void @llvm.masked.compressstore.v16i64(<16 x i64> %data, ptr %p, <16 x i1> %mask)
+ tail call void @llvm.masked.compressstore.v16i64(<16 x i64> %data, ptr align 8 %p, <16 x i1> %mask)
ret void
}
@@ -17499,7 +17499,7 @@ define void @test_compresstore_i64_v32(ptr %p, <32 x i1> %mask, <32 x i64> %data
; RV32-NEXT: sb a1, 5(a0)
; RV32-NEXT: ret
entry:
- tail call void @llvm.masked.compressstore.v32i64(<32 x i64> %data, ptr %p, <32 x i1> %mask)
+ tail call void @llvm.masked.compressstore.v32i64(<32 x i64> %data, ptr align 8 %p, <32 x i1> %mask)
ret void
}
@@ -17536,10 +17536,3 @@ declare void @llvm.masked.compressstore.v4i64(<4 x i64>, ptr, <4 x i1>)
declare void @llvm.masked.compressstore.v8i64(<8 x i64>, ptr, <8 x i1>)
declare void @llvm.masked.compressstore.v16i64(<16 x i64>, ptr, <16 x i1>)
declare void @llvm.masked.compressstore.v32i64(<32 x i64>, ptr, <32 x i1>)
-
-;define void @test_compresstore_iYYYY_vXXXX(ptr %p, <XXXX x i1> %mask, <XXXX x iYYYY> %data) {
-;entry:
-; tail call void @llvm.masked.compressstore.vXXXXiYYYY(<XXXX x iYYYY> %data, ptr %p, <XXXX x i1> %mask)
-; ret void
-;}
-;declare void @llvm.masked.compressstore.vXXXXiYYYY(<XXXX x iYYYY>, ptr, <XXXX x i1>)
>From 4b521bba791f1f96c9d843be621a45f15e945eef Mon Sep 17 00:00:00 2001
From: Kolya Panchenko <kolya.panchenko at sifive.com>
Date: Fri, 1 Mar 2024 07:11:12 -0800
Subject: [PATCH 3/5] Properly check if scalar type is legal for RVV
---
.../Target/RISCV/RISCVTargetTransformInfo.cpp | 17 ++---------------
1 file changed, 2 insertions(+), 15 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index afec82c3333b32..c28f22ff6a6146 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -1626,22 +1626,9 @@ bool RISCVTTIImpl::isLegalMaskedCompressStore(Type *DataTy, Align Alignment) {
if (!VTy || VTy->isScalableTy() || !ST->hasVInstructions())
return false;
- Type *ScalarTy = VTy->getScalarType();
- if (ScalarTy->isFloatTy() || ScalarTy->isDoubleTy())
- return true;
-
- if (!ScalarTy->isIntegerTy())
+ if (!TLI->isLegalElementTypeForRVV(
+ TLI->getValueType(DL, VTy->getElementType())))
return false;
- switch (ScalarTy->getIntegerBitWidth()) {
- case 8:
- case 16:
- case 32:
- case 64:
- break;
- default:
- return false;
- }
-
return getRegUsageForType(VTy) <= 8;
}
>From 276abb762311180284de46ccec5c76440e97cf47 Mon Sep 17 00:00:00 2001
From: Kolya Panchenko <kolya.panchenko at sifive.com>
Date: Wed, 6 Mar 2024 10:25:46 -0800
Subject: [PATCH 4/5] rebase
---
llvm/test/CodeGen/RISCV/rvv/compressstore.ll | 3574 +++--------------
.../rvv/fixed-vectors-compressstore-fp.ll | 1004 +----
.../rvv/fixed-vectors-compressstore-int.ll | 928 +----
3 files changed, 839 insertions(+), 4667 deletions(-)
diff --git a/llvm/test/CodeGen/RISCV/rvv/compressstore.ll b/llvm/test/CodeGen/RISCV/rvv/compressstore.ll
index 5739b98a0d7247..d1d4138e539776 100644
--- a/llvm/test/CodeGen/RISCV/rvv/compressstore.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/compressstore.ll
@@ -8830,10 +8830,8 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: .LBB16_16: # %cond.store43
; RV64-NEXT: vsetivli zero, 1, e16, m2, ta, ma
; RV64-NEXT: vslidedown.vi v24, v8, 15
-; RV64-NEXT: vmv.x.s a1, v24
-; RV64-NEXT: sb a1, 0(a0)
-; RV64-NEXT: srli a1, a1, 8
-; RV64-NEXT: sb a1, 1(a0)
+; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64-NEXT: vse16.v v24, (a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: .LBB16_17: # %else44
; RV64-NEXT: addi sp, sp, -2032
@@ -8900,9 +8898,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV64-NEXT: vse16.v v8, (a4)
; RV64-NEXT: lh a1, 0(a1)
-; RV64-NEXT: sb a1, 0(a0)
-; RV64-NEXT: srli a1, a1, 8
-; RV64-NEXT: sb a1, 1(a0)
+; RV64-NEXT: sh a1, 0(a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: .LBB16_32: # %else86
; RV64-NEXT: slli a3, a2, 33
@@ -8966,9 +8962,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV64-NEXT: vse16.v v8, (a4)
; RV64-NEXT: lh a1, 0(a1)
-; RV64-NEXT: sb a1, 0(a0)
-; RV64-NEXT: srli a1, a1, 8
-; RV64-NEXT: sb a1, 1(a0)
+; RV64-NEXT: sh a1, 0(a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: .LBB16_50: # %else137
; RV64-NEXT: slli a1, a2, 16
@@ -9026,9 +9020,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: vsetvli zero, a1, e16, m8, ta, ma
; RV64-NEXT: vse16.v v8, (a4)
; RV64-NEXT: lh a1, 252(a3)
-; RV64-NEXT: sb a1, 0(a0)
-; RV64-NEXT: srli a1, a1, 8
-; RV64-NEXT: sb a1, 1(a0)
+; RV64-NEXT: sh a1, 0(a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: .LBB16_66: # %else182
; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
@@ -9043,9 +9035,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: vsetvli zero, a1, e16, m8, ta, ma
; RV64-NEXT: vse16.v v8, (a4)
; RV64-NEXT: lh a1, 126(a3)
-; RV64-NEXT: sb a1, 0(a0)
-; RV64-NEXT: srli a1, a1, 8
-; RV64-NEXT: sb a1, 1(a0)
+; RV64-NEXT: sh a1, 0(a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: .LBB16_68: # %else185
; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
@@ -9102,10 +9092,8 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: .LBB16_85: # %cond.store235
; RV64-NEXT: vsetivli zero, 1, e16, m2, ta, ma
; RV64-NEXT: vslidedown.vi v8, v16, 15
-; RV64-NEXT: vmv.x.s a2, v8
-; RV64-NEXT: sb a2, 0(a0)
-; RV64-NEXT: srli a2, a2, 8
-; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64-NEXT: vse16.v v8, (a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: .LBB16_86: # %else236
; RV64-NEXT: slli a3, a1, 47
@@ -9118,60 +9106,46 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: bltz a3, .LBB16_212
; RV64-NEXT: .LBB16_88: # %else242
; RV64-NEXT: slli a3, a1, 45
-; RV64-NEXT: bgez a3, .LBB16_89
-; RV64-NEXT: j .LBB16_213
+; RV64-NEXT: bltz a3, .LBB16_213
; RV64-NEXT: .LBB16_89: # %else245
; RV64-NEXT: slli a3, a1, 44
-; RV64-NEXT: bgez a3, .LBB16_90
-; RV64-NEXT: j .LBB16_214
+; RV64-NEXT: bltz a3, .LBB16_214
; RV64-NEXT: .LBB16_90: # %else248
; RV64-NEXT: slli a3, a1, 43
-; RV64-NEXT: bgez a3, .LBB16_91
-; RV64-NEXT: j .LBB16_215
+; RV64-NEXT: bltz a3, .LBB16_215
; RV64-NEXT: .LBB16_91: # %else251
; RV64-NEXT: slli a3, a1, 42
-; RV64-NEXT: bgez a3, .LBB16_92
-; RV64-NEXT: j .LBB16_216
+; RV64-NEXT: bltz a3, .LBB16_216
; RV64-NEXT: .LBB16_92: # %else254
; RV64-NEXT: slli a3, a1, 41
-; RV64-NEXT: bgez a3, .LBB16_93
-; RV64-NEXT: j .LBB16_217
+; RV64-NEXT: bltz a3, .LBB16_217
; RV64-NEXT: .LBB16_93: # %else257
; RV64-NEXT: slli a3, a1, 40
-; RV64-NEXT: bgez a3, .LBB16_94
-; RV64-NEXT: j .LBB16_218
+; RV64-NEXT: bltz a3, .LBB16_218
; RV64-NEXT: .LBB16_94: # %else260
; RV64-NEXT: slli a3, a1, 39
-; RV64-NEXT: bgez a3, .LBB16_95
-; RV64-NEXT: j .LBB16_219
+; RV64-NEXT: bltz a3, .LBB16_219
; RV64-NEXT: .LBB16_95: # %else263
; RV64-NEXT: slli a3, a1, 38
-; RV64-NEXT: bgez a3, .LBB16_96
-; RV64-NEXT: j .LBB16_220
+; RV64-NEXT: bltz a3, .LBB16_220
; RV64-NEXT: .LBB16_96: # %else266
; RV64-NEXT: slli a3, a1, 37
-; RV64-NEXT: bgez a3, .LBB16_97
-; RV64-NEXT: j .LBB16_221
+; RV64-NEXT: bltz a3, .LBB16_221
; RV64-NEXT: .LBB16_97: # %else269
; RV64-NEXT: slli a3, a1, 36
-; RV64-NEXT: bgez a3, .LBB16_98
-; RV64-NEXT: j .LBB16_222
+; RV64-NEXT: bltz a3, .LBB16_222
; RV64-NEXT: .LBB16_98: # %else272
; RV64-NEXT: slli a3, a1, 35
-; RV64-NEXT: bgez a3, .LBB16_99
-; RV64-NEXT: j .LBB16_223
+; RV64-NEXT: bltz a3, .LBB16_223
; RV64-NEXT: .LBB16_99: # %else275
; RV64-NEXT: slli a3, a1, 34
-; RV64-NEXT: bgez a3, .LBB16_100
-; RV64-NEXT: j .LBB16_224
+; RV64-NEXT: bltz a3, .LBB16_224
; RV64-NEXT: .LBB16_100: # %else278
; RV64-NEXT: slli a3, a1, 33
-; RV64-NEXT: bgez a3, .LBB16_101
-; RV64-NEXT: j .LBB16_225
+; RV64-NEXT: bltz a3, .LBB16_225
; RV64-NEXT: .LBB16_101: # %else281
; RV64-NEXT: slli a3, a1, 32
-; RV64-NEXT: bgez a3, .LBB16_102
-; RV64-NEXT: j .LBB16_226
+; RV64-NEXT: bltz a3, .LBB16_226
; RV64-NEXT: .LBB16_102: # %else284
; RV64-NEXT: slli a3, a1, 31
; RV64-NEXT: bgez a3, .LBB16_104
@@ -9182,47 +9156,36 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV64-NEXT: vse16.v v16, (a4)
; RV64-NEXT: lh a2, 0(a2)
-; RV64-NEXT: sb a2, 0(a0)
-; RV64-NEXT: srli a2, a2, 8
-; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: sh a2, 0(a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: .LBB16_104: # %else287
; RV64-NEXT: slli a3, a1, 30
; RV64-NEXT: addi a2, sp, 2018
-; RV64-NEXT: bgez a3, .LBB16_105
-; RV64-NEXT: j .LBB16_227
-; RV64-NEXT: .LBB16_105: # %else290
+; RV64-NEXT: bltz a3, .LBB16_227
+; RV64-NEXT: # %bb.105: # %else290
; RV64-NEXT: slli a3, a1, 29
-; RV64-NEXT: bgez a3, .LBB16_106
-; RV64-NEXT: j .LBB16_228
+; RV64-NEXT: bltz a3, .LBB16_228
; RV64-NEXT: .LBB16_106: # %else293
; RV64-NEXT: slli a3, a1, 28
-; RV64-NEXT: bgez a3, .LBB16_107
-; RV64-NEXT: j .LBB16_229
+; RV64-NEXT: bltz a3, .LBB16_229
; RV64-NEXT: .LBB16_107: # %else296
; RV64-NEXT: slli a3, a1, 27
-; RV64-NEXT: bgez a3, .LBB16_108
-; RV64-NEXT: j .LBB16_230
+; RV64-NEXT: bltz a3, .LBB16_230
; RV64-NEXT: .LBB16_108: # %else299
; RV64-NEXT: slli a3, a1, 26
-; RV64-NEXT: bgez a3, .LBB16_109
-; RV64-NEXT: j .LBB16_231
+; RV64-NEXT: bltz a3, .LBB16_231
; RV64-NEXT: .LBB16_109: # %else302
; RV64-NEXT: slli a3, a1, 25
-; RV64-NEXT: bgez a3, .LBB16_110
-; RV64-NEXT: j .LBB16_232
+; RV64-NEXT: bltz a3, .LBB16_232
; RV64-NEXT: .LBB16_110: # %else305
; RV64-NEXT: slli a3, a1, 24
-; RV64-NEXT: bgez a3, .LBB16_111
-; RV64-NEXT: j .LBB16_233
+; RV64-NEXT: bltz a3, .LBB16_233
; RV64-NEXT: .LBB16_111: # %else308
; RV64-NEXT: slli a3, a1, 23
-; RV64-NEXT: bgez a3, .LBB16_112
-; RV64-NEXT: j .LBB16_234
+; RV64-NEXT: bltz a3, .LBB16_234
; RV64-NEXT: .LBB16_112: # %else311
; RV64-NEXT: slli a3, a1, 22
-; RV64-NEXT: bgez a3, .LBB16_113
-; RV64-NEXT: j .LBB16_235
+; RV64-NEXT: bltz a3, .LBB16_235
; RV64-NEXT: .LBB16_113: # %else314
; RV64-NEXT: slli a3, a1, 21
; RV64-NEXT: bgez a3, .LBB16_114
@@ -9315,9 +9278,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: vsetvli zero, a1, e16, m8, ta, ma
; RV64-NEXT: vse16.v v16, (a2)
; RV64-NEXT: lh a1, 254(sp)
-; RV64-NEXT: sb a1, 0(a0)
-; RV64-NEXT: srli a1, a1, 8
-; RV64-NEXT: sb a1, 1(a0)
+; RV64-NEXT: sh a1, 0(a0)
; RV64-NEXT: .LBB16_136: # %else380
; RV64-NEXT: lui a0, 3
; RV64-NEXT: addiw a0, a0, 256
@@ -9330,151 +9291,113 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: addi sp, sp, 2032
; RV64-NEXT: ret
; RV64-NEXT: .LBB16_137: # %cond.store
-; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; RV64-NEXT: vmv.x.s a1, v8
-; RV64-NEXT: sb a1, 0(a0)
-; RV64-NEXT: srli a1, a1, 8
-; RV64-NEXT: sb a1, 1(a0)
+; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64-NEXT: vse16.v v8, (a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: andi a1, a2, 2
; RV64-NEXT: beqz a1, .LBB16_2
; RV64-NEXT: .LBB16_138: # %cond.store1
; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64-NEXT: vslidedown.vi v24, v8, 1
-; RV64-NEXT: vmv.x.s a1, v24
-; RV64-NEXT: sb a1, 0(a0)
-; RV64-NEXT: srli a1, a1, 8
-; RV64-NEXT: sb a1, 1(a0)
+; RV64-NEXT: vse16.v v24, (a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: andi a1, a2, 4
; RV64-NEXT: beqz a1, .LBB16_3
; RV64-NEXT: .LBB16_139: # %cond.store4
; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64-NEXT: vslidedown.vi v24, v8, 2
-; RV64-NEXT: vmv.x.s a1, v24
-; RV64-NEXT: sb a1, 0(a0)
-; RV64-NEXT: srli a1, a1, 8
-; RV64-NEXT: sb a1, 1(a0)
+; RV64-NEXT: vse16.v v24, (a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: andi a1, a2, 8
; RV64-NEXT: beqz a1, .LBB16_4
; RV64-NEXT: .LBB16_140: # %cond.store7
; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64-NEXT: vslidedown.vi v24, v8, 3
-; RV64-NEXT: vmv.x.s a1, v24
-; RV64-NEXT: sb a1, 0(a0)
-; RV64-NEXT: srli a1, a1, 8
-; RV64-NEXT: sb a1, 1(a0)
+; RV64-NEXT: vse16.v v24, (a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: andi a1, a2, 16
; RV64-NEXT: beqz a1, .LBB16_5
; RV64-NEXT: .LBB16_141: # %cond.store10
; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64-NEXT: vslidedown.vi v24, v8, 4
-; RV64-NEXT: vmv.x.s a1, v24
-; RV64-NEXT: sb a1, 0(a0)
-; RV64-NEXT: srli a1, a1, 8
-; RV64-NEXT: sb a1, 1(a0)
+; RV64-NEXT: vse16.v v24, (a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: andi a1, a2, 32
; RV64-NEXT: beqz a1, .LBB16_6
; RV64-NEXT: .LBB16_142: # %cond.store13
; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64-NEXT: vslidedown.vi v24, v8, 5
-; RV64-NEXT: vmv.x.s a1, v24
-; RV64-NEXT: sb a1, 0(a0)
-; RV64-NEXT: srli a1, a1, 8
-; RV64-NEXT: sb a1, 1(a0)
+; RV64-NEXT: vse16.v v24, (a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: andi a1, a2, 64
; RV64-NEXT: beqz a1, .LBB16_7
; RV64-NEXT: .LBB16_143: # %cond.store16
; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64-NEXT: vslidedown.vi v24, v8, 6
-; RV64-NEXT: vmv.x.s a1, v24
-; RV64-NEXT: sb a1, 0(a0)
-; RV64-NEXT: srli a1, a1, 8
-; RV64-NEXT: sb a1, 1(a0)
+; RV64-NEXT: vse16.v v24, (a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: andi a1, a2, 128
; RV64-NEXT: beqz a1, .LBB16_8
; RV64-NEXT: .LBB16_144: # %cond.store19
; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64-NEXT: vslidedown.vi v24, v8, 7
-; RV64-NEXT: vmv.x.s a1, v24
-; RV64-NEXT: sb a1, 0(a0)
-; RV64-NEXT: srli a1, a1, 8
-; RV64-NEXT: sb a1, 1(a0)
+; RV64-NEXT: vse16.v v24, (a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: andi a1, a2, 256
; RV64-NEXT: beqz a1, .LBB16_9
; RV64-NEXT: .LBB16_145: # %cond.store22
; RV64-NEXT: vsetivli zero, 1, e16, m2, ta, ma
; RV64-NEXT: vslidedown.vi v24, v8, 8
-; RV64-NEXT: vmv.x.s a1, v24
-; RV64-NEXT: sb a1, 0(a0)
-; RV64-NEXT: srli a1, a1, 8
-; RV64-NEXT: sb a1, 1(a0)
+; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64-NEXT: vse16.v v24, (a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: andi a1, a2, 512
; RV64-NEXT: beqz a1, .LBB16_10
; RV64-NEXT: .LBB16_146: # %cond.store25
; RV64-NEXT: vsetivli zero, 1, e16, m2, ta, ma
; RV64-NEXT: vslidedown.vi v24, v8, 9
-; RV64-NEXT: vmv.x.s a1, v24
-; RV64-NEXT: sb a1, 0(a0)
-; RV64-NEXT: srli a1, a1, 8
-; RV64-NEXT: sb a1, 1(a0)
+; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64-NEXT: vse16.v v24, (a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: andi a1, a2, 1024
; RV64-NEXT: beqz a1, .LBB16_11
; RV64-NEXT: .LBB16_147: # %cond.store28
; RV64-NEXT: vsetivli zero, 1, e16, m2, ta, ma
; RV64-NEXT: vslidedown.vi v24, v8, 10
-; RV64-NEXT: vmv.x.s a1, v24
-; RV64-NEXT: sb a1, 0(a0)
-; RV64-NEXT: srli a1, a1, 8
-; RV64-NEXT: sb a1, 1(a0)
+; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64-NEXT: vse16.v v24, (a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: slli a1, a2, 52
; RV64-NEXT: bgez a1, .LBB16_12
; RV64-NEXT: .LBB16_148: # %cond.store31
; RV64-NEXT: vsetivli zero, 1, e16, m2, ta, ma
; RV64-NEXT: vslidedown.vi v24, v8, 11
-; RV64-NEXT: vmv.x.s a1, v24
-; RV64-NEXT: sb a1, 0(a0)
-; RV64-NEXT: srli a1, a1, 8
-; RV64-NEXT: sb a1, 1(a0)
+; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64-NEXT: vse16.v v24, (a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: slli a1, a2, 51
; RV64-NEXT: bgez a1, .LBB16_13
; RV64-NEXT: .LBB16_149: # %cond.store34
; RV64-NEXT: vsetivli zero, 1, e16, m2, ta, ma
; RV64-NEXT: vslidedown.vi v24, v8, 12
-; RV64-NEXT: vmv.x.s a1, v24
-; RV64-NEXT: sb a1, 0(a0)
-; RV64-NEXT: srli a1, a1, 8
-; RV64-NEXT: sb a1, 1(a0)
+; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64-NEXT: vse16.v v24, (a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: slli a1, a2, 50
; RV64-NEXT: bgez a1, .LBB16_14
; RV64-NEXT: .LBB16_150: # %cond.store37
; RV64-NEXT: vsetivli zero, 1, e16, m2, ta, ma
; RV64-NEXT: vslidedown.vi v24, v8, 13
-; RV64-NEXT: vmv.x.s a1, v24
-; RV64-NEXT: sb a1, 0(a0)
-; RV64-NEXT: srli a1, a1, 8
-; RV64-NEXT: sb a1, 1(a0)
+; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64-NEXT: vse16.v v24, (a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: slli a1, a2, 49
; RV64-NEXT: bgez a1, .LBB16_15
; RV64-NEXT: .LBB16_151: # %cond.store40
; RV64-NEXT: vsetivli zero, 1, e16, m2, ta, ma
; RV64-NEXT: vslidedown.vi v24, v8, 14
-; RV64-NEXT: vmv.x.s a1, v24
-; RV64-NEXT: sb a1, 0(a0)
-; RV64-NEXT: srli a1, a1, 8
-; RV64-NEXT: sb a1, 1(a0)
+; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64-NEXT: vse16.v v24, (a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: slli a1, a2, 48
; RV64-NEXT: bltz a1, .LBB16_16
@@ -9486,9 +9409,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV64-NEXT: vse16.v v8, (a4)
; RV64-NEXT: lh a3, 1638(a1)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sh a3, 0(a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: slli a3, a2, 46
; RV64-NEXT: bgez a3, .LBB16_19
@@ -9500,9 +9421,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV64-NEXT: vse16.v v8, (a4)
; RV64-NEXT: lh a3, 1512(a1)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sh a3, 0(a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: slli a3, a2, 45
; RV64-NEXT: bgez a3, .LBB16_20
@@ -9514,9 +9433,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV64-NEXT: vse16.v v8, (a4)
; RV64-NEXT: lh a3, 1386(a1)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sh a3, 0(a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: slli a3, a2, 44
; RV64-NEXT: bgez a3, .LBB16_21
@@ -9528,9 +9445,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV64-NEXT: vse16.v v8, (a4)
; RV64-NEXT: lh a3, 1260(a1)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sh a3, 0(a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: slli a3, a2, 43
; RV64-NEXT: bgez a3, .LBB16_22
@@ -9542,9 +9457,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV64-NEXT: vse16.v v8, (a4)
; RV64-NEXT: lh a3, 1134(a1)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sh a3, 0(a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: slli a3, a2, 42
; RV64-NEXT: bgez a3, .LBB16_23
@@ -9556,9 +9469,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV64-NEXT: vse16.v v8, (a4)
; RV64-NEXT: lh a3, 1008(a1)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sh a3, 0(a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: slli a3, a2, 41
; RV64-NEXT: bgez a3, .LBB16_24
@@ -9570,9 +9481,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV64-NEXT: vse16.v v8, (a4)
; RV64-NEXT: lh a3, 882(a1)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sh a3, 0(a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: slli a3, a2, 40
; RV64-NEXT: bgez a3, .LBB16_25
@@ -9584,9 +9493,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV64-NEXT: vse16.v v8, (a4)
; RV64-NEXT: lh a3, 756(a1)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sh a3, 0(a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: slli a3, a2, 39
; RV64-NEXT: bgez a3, .LBB16_26
@@ -9598,9 +9505,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV64-NEXT: vse16.v v8, (a4)
; RV64-NEXT: lh a3, 630(a1)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sh a3, 0(a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: slli a3, a2, 38
; RV64-NEXT: bgez a3, .LBB16_27
@@ -9612,9 +9517,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV64-NEXT: vse16.v v8, (a4)
; RV64-NEXT: lh a3, 504(a1)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sh a3, 0(a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: slli a3, a2, 37
; RV64-NEXT: bgez a3, .LBB16_28
@@ -9626,9 +9529,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV64-NEXT: vse16.v v8, (a4)
; RV64-NEXT: lh a3, 378(a1)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sh a3, 0(a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: slli a3, a2, 36
; RV64-NEXT: bgez a3, .LBB16_29
@@ -9640,9 +9541,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV64-NEXT: vse16.v v8, (a4)
; RV64-NEXT: lh a3, 252(a1)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sh a3, 0(a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: slli a3, a2, 35
; RV64-NEXT: bgez a3, .LBB16_30
@@ -9654,9 +9553,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV64-NEXT: vse16.v v8, (a4)
; RV64-NEXT: lh a3, 126(a1)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sh a3, 0(a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: slli a3, a2, 34
; RV64-NEXT: bltz a3, .LBB16_31
@@ -9669,9 +9566,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV64-NEXT: vse16.v v8, (a4)
; RV64-NEXT: lh a3, 2016(a1)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sh a3, 0(a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: slli a3, a2, 32
; RV64-NEXT: bgez a3, .LBB16_34
@@ -9683,9 +9578,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV64-NEXT: vse16.v v8, (a4)
; RV64-NEXT: lh a3, 1890(a1)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sh a3, 0(a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: slli a3, a2, 31
; RV64-NEXT: bgez a3, .LBB16_35
@@ -9697,9 +9590,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV64-NEXT: vse16.v v8, (a4)
; RV64-NEXT: lh a3, 1764(a1)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sh a3, 0(a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: slli a3, a2, 30
; RV64-NEXT: bgez a3, .LBB16_36
@@ -9711,9 +9602,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV64-NEXT: vse16.v v8, (a4)
; RV64-NEXT: lh a3, 1638(a1)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sh a3, 0(a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: slli a3, a2, 29
; RV64-NEXT: bgez a3, .LBB16_37
@@ -9725,9 +9614,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV64-NEXT: vse16.v v8, (a4)
; RV64-NEXT: lh a3, 1512(a1)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sh a3, 0(a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: slli a3, a2, 28
; RV64-NEXT: bgez a3, .LBB16_38
@@ -9739,9 +9626,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV64-NEXT: vse16.v v8, (a4)
; RV64-NEXT: lh a3, 1386(a1)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sh a3, 0(a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: slli a3, a2, 27
; RV64-NEXT: bgez a3, .LBB16_39
@@ -9753,9 +9638,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV64-NEXT: vse16.v v8, (a4)
; RV64-NEXT: lh a3, 1260(a1)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sh a3, 0(a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: slli a3, a2, 26
; RV64-NEXT: bgez a3, .LBB16_40
@@ -9767,9 +9650,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV64-NEXT: vse16.v v8, (a4)
; RV64-NEXT: lh a3, 1134(a1)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sh a3, 0(a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: slli a3, a2, 25
; RV64-NEXT: bgez a3, .LBB16_41
@@ -9781,9 +9662,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV64-NEXT: vse16.v v8, (a4)
; RV64-NEXT: lh a3, 1008(a1)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sh a3, 0(a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: slli a3, a2, 24
; RV64-NEXT: bgez a3, .LBB16_42
@@ -9795,9 +9674,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV64-NEXT: vse16.v v8, (a4)
; RV64-NEXT: lh a3, 882(a1)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sh a3, 0(a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: slli a3, a2, 23
; RV64-NEXT: bgez a3, .LBB16_43
@@ -9809,9 +9686,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV64-NEXT: vse16.v v8, (a4)
; RV64-NEXT: lh a3, 756(a1)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sh a3, 0(a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: slli a3, a2, 22
; RV64-NEXT: bgez a3, .LBB16_44
@@ -9823,9 +9698,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV64-NEXT: vse16.v v8, (a4)
; RV64-NEXT: lh a3, 630(a1)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sh a3, 0(a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: slli a3, a2, 21
; RV64-NEXT: bgez a3, .LBB16_45
@@ -9837,9 +9710,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV64-NEXT: vse16.v v8, (a4)
; RV64-NEXT: lh a3, 504(a1)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sh a3, 0(a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: slli a3, a2, 20
; RV64-NEXT: bgez a3, .LBB16_46
@@ -9851,9 +9722,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV64-NEXT: vse16.v v8, (a4)
; RV64-NEXT: lh a3, 378(a1)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sh a3, 0(a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: slli a3, a2, 19
; RV64-NEXT: bgez a3, .LBB16_47
@@ -9865,9 +9734,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV64-NEXT: vse16.v v8, (a4)
; RV64-NEXT: lh a3, 252(a1)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sh a3, 0(a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: slli a3, a2, 18
; RV64-NEXT: bgez a3, .LBB16_48
@@ -9879,9 +9746,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV64-NEXT: vse16.v v8, (a4)
; RV64-NEXT: lh a3, 126(a1)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sh a3, 0(a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: slli a3, a2, 17
; RV64-NEXT: bltz a3, .LBB16_49
@@ -9894,9 +9759,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: vsetvli zero, a1, e16, m8, ta, ma
; RV64-NEXT: vse16.v v8, (a4)
; RV64-NEXT: lh a1, 2016(a3)
-; RV64-NEXT: sb a1, 0(a0)
-; RV64-NEXT: srli a1, a1, 8
-; RV64-NEXT: sb a1, 1(a0)
+; RV64-NEXT: sh a1, 0(a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: slli a1, a2, 15
; RV64-NEXT: bgez a1, .LBB16_52
@@ -9907,9 +9770,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: vsetvli zero, a1, e16, m8, ta, ma
; RV64-NEXT: vse16.v v8, (a4)
; RV64-NEXT: lh a1, 1890(a3)
-; RV64-NEXT: sb a1, 0(a0)
-; RV64-NEXT: srli a1, a1, 8
-; RV64-NEXT: sb a1, 1(a0)
+; RV64-NEXT: sh a1, 0(a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: slli a1, a2, 14
; RV64-NEXT: bgez a1, .LBB16_53
@@ -9921,9 +9782,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: vsetvli zero, a1, e16, m8, ta, ma
; RV64-NEXT: vse16.v v8, (a4)
; RV64-NEXT: lh a1, 1764(a3)
-; RV64-NEXT: sb a1, 0(a0)
-; RV64-NEXT: srli a1, a1, 8
-; RV64-NEXT: sb a1, 1(a0)
+; RV64-NEXT: sh a1, 0(a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: slli a1, a2, 13
; RV64-NEXT: bgez a1, .LBB16_54
@@ -9935,9 +9794,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: vsetvli zero, a1, e16, m8, ta, ma
; RV64-NEXT: vse16.v v8, (a4)
; RV64-NEXT: lh a1, 1638(a3)
-; RV64-NEXT: sb a1, 0(a0)
-; RV64-NEXT: srli a1, a1, 8
-; RV64-NEXT: sb a1, 1(a0)
+; RV64-NEXT: sh a1, 0(a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: slli a1, a2, 12
; RV64-NEXT: bgez a1, .LBB16_55
@@ -9949,9 +9806,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: vsetvli zero, a1, e16, m8, ta, ma
; RV64-NEXT: vse16.v v8, (a4)
; RV64-NEXT: lh a1, 1512(a3)
-; RV64-NEXT: sb a1, 0(a0)
-; RV64-NEXT: srli a1, a1, 8
-; RV64-NEXT: sb a1, 1(a0)
+; RV64-NEXT: sh a1, 0(a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: slli a1, a2, 11
; RV64-NEXT: bgez a1, .LBB16_56
@@ -9963,9 +9818,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: vsetvli zero, a1, e16, m8, ta, ma
; RV64-NEXT: vse16.v v8, (a4)
; RV64-NEXT: lh a1, 1386(a3)
-; RV64-NEXT: sb a1, 0(a0)
-; RV64-NEXT: srli a1, a1, 8
-; RV64-NEXT: sb a1, 1(a0)
+; RV64-NEXT: sh a1, 0(a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: slli a1, a2, 10
; RV64-NEXT: bgez a1, .LBB16_57
@@ -9977,9 +9830,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: vsetvli zero, a1, e16, m8, ta, ma
; RV64-NEXT: vse16.v v8, (a4)
; RV64-NEXT: lh a1, 1260(a3)
-; RV64-NEXT: sb a1, 0(a0)
-; RV64-NEXT: srli a1, a1, 8
-; RV64-NEXT: sb a1, 1(a0)
+; RV64-NEXT: sh a1, 0(a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: slli a1, a2, 9
; RV64-NEXT: bgez a1, .LBB16_58
@@ -9991,9 +9842,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: vsetvli zero, a1, e16, m8, ta, ma
; RV64-NEXT: vse16.v v8, (a4)
; RV64-NEXT: lh a1, 1134(a3)
-; RV64-NEXT: sb a1, 0(a0)
-; RV64-NEXT: srli a1, a1, 8
-; RV64-NEXT: sb a1, 1(a0)
+; RV64-NEXT: sh a1, 0(a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: slli a1, a2, 8
; RV64-NEXT: bgez a1, .LBB16_59
@@ -10005,9 +9854,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: vsetvli zero, a1, e16, m8, ta, ma
; RV64-NEXT: vse16.v v8, (a4)
; RV64-NEXT: lh a1, 1008(a3)
-; RV64-NEXT: sb a1, 0(a0)
-; RV64-NEXT: srli a1, a1, 8
-; RV64-NEXT: sb a1, 1(a0)
+; RV64-NEXT: sh a1, 0(a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: slli a1, a2, 7
; RV64-NEXT: bgez a1, .LBB16_60
@@ -10019,9 +9866,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: vsetvli zero, a1, e16, m8, ta, ma
; RV64-NEXT: vse16.v v8, (a4)
; RV64-NEXT: lh a1, 882(a3)
-; RV64-NEXT: sb a1, 0(a0)
-; RV64-NEXT: srli a1, a1, 8
-; RV64-NEXT: sb a1, 1(a0)
+; RV64-NEXT: sh a1, 0(a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: slli a1, a2, 6
; RV64-NEXT: bgez a1, .LBB16_61
@@ -10033,9 +9878,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: vsetvli zero, a1, e16, m8, ta, ma
; RV64-NEXT: vse16.v v8, (a4)
; RV64-NEXT: lh a1, 756(a3)
-; RV64-NEXT: sb a1, 0(a0)
-; RV64-NEXT: srli a1, a1, 8
-; RV64-NEXT: sb a1, 1(a0)
+; RV64-NEXT: sh a1, 0(a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: slli a1, a2, 5
; RV64-NEXT: bgez a1, .LBB16_62
@@ -10047,9 +9890,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: vsetvli zero, a1, e16, m8, ta, ma
; RV64-NEXT: vse16.v v8, (a4)
; RV64-NEXT: lh a1, 630(a3)
-; RV64-NEXT: sb a1, 0(a0)
-; RV64-NEXT: srli a1, a1, 8
-; RV64-NEXT: sb a1, 1(a0)
+; RV64-NEXT: sh a1, 0(a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: slli a1, a2, 4
; RV64-NEXT: bgez a1, .LBB16_63
@@ -10061,9 +9902,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: vsetvli zero, a1, e16, m8, ta, ma
; RV64-NEXT: vse16.v v8, (a4)
; RV64-NEXT: lh a1, 504(a3)
-; RV64-NEXT: sb a1, 0(a0)
-; RV64-NEXT: srli a1, a1, 8
-; RV64-NEXT: sb a1, 1(a0)
+; RV64-NEXT: sh a1, 0(a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: slli a1, a2, 3
; RV64-NEXT: bgez a1, .LBB16_64
@@ -10075,9 +9914,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: vsetvli zero, a1, e16, m8, ta, ma
; RV64-NEXT: vse16.v v8, (a4)
; RV64-NEXT: lh a1, 378(a3)
-; RV64-NEXT: sb a1, 0(a0)
-; RV64-NEXT: srli a1, a1, 8
-; RV64-NEXT: sb a1, 1(a0)
+; RV64-NEXT: sh a1, 0(a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: slli a1, a2, 2
; RV64-NEXT: bltz a1, .LBB16_65
@@ -10090,158 +9927,118 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: vsetvli zero, a2, e16, m8, ta, ma
; RV64-NEXT: vse16.v v8, (a4)
; RV64-NEXT: lh a2, 0(a3)
-; RV64-NEXT: sb a2, 0(a0)
-; RV64-NEXT: srli a2, a2, 8
-; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: sh a2, 0(a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: andi a2, a1, 1
; RV64-NEXT: beqz a2, .LBB16_70
; RV64-NEXT: .LBB16_196: # %cond.store190
; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64-NEXT: vmv.x.s a2, v16
-; RV64-NEXT: sb a2, 0(a0)
-; RV64-NEXT: srli a2, a2, 8
-; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: vse16.v v16, (a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: andi a2, a1, 2
; RV64-NEXT: beqz a2, .LBB16_71
; RV64-NEXT: .LBB16_197: # %cond.store193
; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64-NEXT: vslidedown.vi v8, v16, 1
-; RV64-NEXT: vmv.x.s a2, v8
-; RV64-NEXT: sb a2, 0(a0)
-; RV64-NEXT: srli a2, a2, 8
-; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: vse16.v v8, (a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: andi a2, a1, 4
; RV64-NEXT: beqz a2, .LBB16_72
; RV64-NEXT: .LBB16_198: # %cond.store196
; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64-NEXT: vslidedown.vi v8, v16, 2
-; RV64-NEXT: vmv.x.s a2, v8
-; RV64-NEXT: sb a2, 0(a0)
-; RV64-NEXT: srli a2, a2, 8
-; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: vse16.v v8, (a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: andi a2, a1, 8
; RV64-NEXT: beqz a2, .LBB16_73
; RV64-NEXT: .LBB16_199: # %cond.store199
; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64-NEXT: vslidedown.vi v8, v16, 3
-; RV64-NEXT: vmv.x.s a2, v8
-; RV64-NEXT: sb a2, 0(a0)
-; RV64-NEXT: srli a2, a2, 8
-; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: vse16.v v8, (a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: andi a2, a1, 16
; RV64-NEXT: beqz a2, .LBB16_74
; RV64-NEXT: .LBB16_200: # %cond.store202
; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64-NEXT: vslidedown.vi v8, v16, 4
-; RV64-NEXT: vmv.x.s a2, v8
-; RV64-NEXT: sb a2, 0(a0)
-; RV64-NEXT: srli a2, a2, 8
-; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: vse16.v v8, (a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: andi a2, a1, 32
; RV64-NEXT: beqz a2, .LBB16_75
; RV64-NEXT: .LBB16_201: # %cond.store205
; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64-NEXT: vslidedown.vi v8, v16, 5
-; RV64-NEXT: vmv.x.s a2, v8
-; RV64-NEXT: sb a2, 0(a0)
-; RV64-NEXT: srli a2, a2, 8
-; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: vse16.v v8, (a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: andi a2, a1, 64
; RV64-NEXT: beqz a2, .LBB16_76
; RV64-NEXT: .LBB16_202: # %cond.store208
; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64-NEXT: vslidedown.vi v8, v16, 6
-; RV64-NEXT: vmv.x.s a2, v8
-; RV64-NEXT: sb a2, 0(a0)
-; RV64-NEXT: srli a2, a2, 8
-; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: vse16.v v8, (a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: andi a2, a1, 128
; RV64-NEXT: beqz a2, .LBB16_77
; RV64-NEXT: .LBB16_203: # %cond.store211
; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64-NEXT: vslidedown.vi v8, v16, 7
-; RV64-NEXT: vmv.x.s a2, v8
-; RV64-NEXT: sb a2, 0(a0)
-; RV64-NEXT: srli a2, a2, 8
-; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: vse16.v v8, (a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: andi a2, a1, 256
; RV64-NEXT: beqz a2, .LBB16_78
; RV64-NEXT: .LBB16_204: # %cond.store214
; RV64-NEXT: vsetivli zero, 1, e16, m2, ta, ma
; RV64-NEXT: vslidedown.vi v8, v16, 8
-; RV64-NEXT: vmv.x.s a2, v8
-; RV64-NEXT: sb a2, 0(a0)
-; RV64-NEXT: srli a2, a2, 8
-; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64-NEXT: vse16.v v8, (a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: andi a2, a1, 512
; RV64-NEXT: beqz a2, .LBB16_79
; RV64-NEXT: .LBB16_205: # %cond.store217
; RV64-NEXT: vsetivli zero, 1, e16, m2, ta, ma
; RV64-NEXT: vslidedown.vi v8, v16, 9
-; RV64-NEXT: vmv.x.s a2, v8
-; RV64-NEXT: sb a2, 0(a0)
-; RV64-NEXT: srli a2, a2, 8
-; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64-NEXT: vse16.v v8, (a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: andi a2, a1, 1024
; RV64-NEXT: beqz a2, .LBB16_80
; RV64-NEXT: .LBB16_206: # %cond.store220
; RV64-NEXT: vsetivli zero, 1, e16, m2, ta, ma
; RV64-NEXT: vslidedown.vi v8, v16, 10
-; RV64-NEXT: vmv.x.s a2, v8
-; RV64-NEXT: sb a2, 0(a0)
-; RV64-NEXT: srli a2, a2, 8
-; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64-NEXT: vse16.v v8, (a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: slli a2, a1, 52
; RV64-NEXT: bgez a2, .LBB16_81
; RV64-NEXT: .LBB16_207: # %cond.store223
; RV64-NEXT: vsetivli zero, 1, e16, m2, ta, ma
; RV64-NEXT: vslidedown.vi v8, v16, 11
-; RV64-NEXT: vmv.x.s a2, v8
-; RV64-NEXT: sb a2, 0(a0)
-; RV64-NEXT: srli a2, a2, 8
-; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64-NEXT: vse16.v v8, (a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: slli a2, a1, 51
; RV64-NEXT: bgez a2, .LBB16_82
; RV64-NEXT: .LBB16_208: # %cond.store226
; RV64-NEXT: vsetivli zero, 1, e16, m2, ta, ma
; RV64-NEXT: vslidedown.vi v8, v16, 12
-; RV64-NEXT: vmv.x.s a2, v8
-; RV64-NEXT: sb a2, 0(a0)
-; RV64-NEXT: srli a2, a2, 8
-; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64-NEXT: vse16.v v8, (a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: slli a2, a1, 50
; RV64-NEXT: bgez a2, .LBB16_83
; RV64-NEXT: .LBB16_209: # %cond.store229
; RV64-NEXT: vsetivli zero, 1, e16, m2, ta, ma
; RV64-NEXT: vslidedown.vi v8, v16, 13
-; RV64-NEXT: vmv.x.s a2, v8
-; RV64-NEXT: sb a2, 0(a0)
-; RV64-NEXT: srli a2, a2, 8
-; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64-NEXT: vse16.v v8, (a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: slli a2, a1, 49
; RV64-NEXT: bgez a2, .LBB16_84
; RV64-NEXT: .LBB16_210: # %cond.store232
; RV64-NEXT: vsetivli zero, 1, e16, m2, ta, ma
; RV64-NEXT: vslidedown.vi v8, v16, 14
-; RV64-NEXT: vmv.x.s a2, v8
-; RV64-NEXT: sb a2, 0(a0)
-; RV64-NEXT: srli a2, a2, 8
-; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64-NEXT: vse16.v v8, (a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: slli a2, a1, 48
; RV64-NEXT: bltz a2, .LBB16_85
@@ -10254,9 +10051,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV64-NEXT: vse16.v v16, (a4)
; RV64-NEXT: lh a3, 2016(a2)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sh a3, 0(a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: slli a3, a1, 46
; RV64-NEXT: bgez a3, .LBB16_88
@@ -10268,13 +10063,10 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV64-NEXT: vse16.v v16, (a4)
; RV64-NEXT: lh a3, 1890(a2)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sh a3, 0(a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: slli a3, a1, 45
-; RV64-NEXT: bltz a3, .LBB16_213
-; RV64-NEXT: j .LBB16_89
+; RV64-NEXT: bgez a3, .LBB16_89
; RV64-NEXT: .LBB16_213: # %cond.store244
; RV64-NEXT: li a3, 64
; RV64-NEXT: li a4, 23
@@ -10283,13 +10075,10 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV64-NEXT: vse16.v v16, (a4)
; RV64-NEXT: lh a3, 1764(a2)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sh a3, 0(a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: slli a3, a1, 44
-; RV64-NEXT: bltz a3, .LBB16_214
-; RV64-NEXT: j .LBB16_90
+; RV64-NEXT: bgez a3, .LBB16_90
; RV64-NEXT: .LBB16_214: # %cond.store247
; RV64-NEXT: li a3, 64
; RV64-NEXT: lui a4, 1
@@ -10298,13 +10087,10 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV64-NEXT: vse16.v v16, (a4)
; RV64-NEXT: lh a3, 1638(a2)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sh a3, 0(a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: slli a3, a1, 43
-; RV64-NEXT: bltz a3, .LBB16_215
-; RV64-NEXT: j .LBB16_91
+; RV64-NEXT: bgez a3, .LBB16_91
; RV64-NEXT: .LBB16_215: # %cond.store250
; RV64-NEXT: li a3, 64
; RV64-NEXT: li a4, 11
@@ -10313,13 +10099,10 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV64-NEXT: vse16.v v16, (a4)
; RV64-NEXT: lh a3, 1512(a2)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sh a3, 0(a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: slli a3, a1, 42
-; RV64-NEXT: bltz a3, .LBB16_216
-; RV64-NEXT: j .LBB16_92
+; RV64-NEXT: bgez a3, .LBB16_92
; RV64-NEXT: .LBB16_216: # %cond.store253
; RV64-NEXT: li a3, 64
; RV64-NEXT: lui a4, 1
@@ -10328,13 +10111,10 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV64-NEXT: vse16.v v16, (a4)
; RV64-NEXT: lh a3, 1386(a2)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sh a3, 0(a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: slli a3, a1, 41
-; RV64-NEXT: bltz a3, .LBB16_217
-; RV64-NEXT: j .LBB16_93
+; RV64-NEXT: bgez a3, .LBB16_93
; RV64-NEXT: .LBB16_217: # %cond.store256
; RV64-NEXT: li a3, 64
; RV64-NEXT: li a4, 21
@@ -10343,13 +10123,10 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV64-NEXT: vse16.v v16, (a4)
; RV64-NEXT: lh a3, 1260(a2)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sh a3, 0(a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: slli a3, a1, 40
-; RV64-NEXT: bltz a3, .LBB16_218
-; RV64-NEXT: j .LBB16_94
+; RV64-NEXT: bgez a3, .LBB16_94
; RV64-NEXT: .LBB16_218: # %cond.store259
; RV64-NEXT: li a3, 64
; RV64-NEXT: lui a4, 1
@@ -10358,13 +10135,10 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV64-NEXT: vse16.v v16, (a4)
; RV64-NEXT: lh a3, 1134(a2)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sh a3, 0(a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: slli a3, a1, 39
-; RV64-NEXT: bltz a3, .LBB16_219
-; RV64-NEXT: j .LBB16_95
+; RV64-NEXT: bgez a3, .LBB16_95
; RV64-NEXT: .LBB16_219: # %cond.store262
; RV64-NEXT: li a3, 64
; RV64-NEXT: li a4, 5
@@ -10373,13 +10147,10 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV64-NEXT: vse16.v v16, (a4)
; RV64-NEXT: lh a3, 1008(a2)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sh a3, 0(a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: slli a3, a1, 38
-; RV64-NEXT: bltz a3, .LBB16_220
-; RV64-NEXT: j .LBB16_96
+; RV64-NEXT: bgez a3, .LBB16_96
; RV64-NEXT: .LBB16_220: # %cond.store265
; RV64-NEXT: li a3, 64
; RV64-NEXT: lui a4, 1
@@ -10388,13 +10159,10 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV64-NEXT: vse16.v v16, (a4)
; RV64-NEXT: lh a3, 882(a2)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sh a3, 0(a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: slli a3, a1, 37
-; RV64-NEXT: bltz a3, .LBB16_221
-; RV64-NEXT: j .LBB16_97
+; RV64-NEXT: bgez a3, .LBB16_97
; RV64-NEXT: .LBB16_221: # %cond.store268
; RV64-NEXT: li a3, 64
; RV64-NEXT: li a4, 19
@@ -10403,13 +10171,10 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV64-NEXT: vse16.v v16, (a4)
; RV64-NEXT: lh a3, 756(a2)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sh a3, 0(a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: slli a3, a1, 36
-; RV64-NEXT: bltz a3, .LBB16_222
-; RV64-NEXT: j .LBB16_98
+; RV64-NEXT: bgez a3, .LBB16_98
; RV64-NEXT: .LBB16_222: # %cond.store271
; RV64-NEXT: li a3, 64
; RV64-NEXT: lui a4, 1
@@ -10418,13 +10183,10 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV64-NEXT: vse16.v v16, (a4)
; RV64-NEXT: lh a3, 630(a2)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sh a3, 0(a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: slli a3, a1, 35
-; RV64-NEXT: bltz a3, .LBB16_223
-; RV64-NEXT: j .LBB16_99
+; RV64-NEXT: bgez a3, .LBB16_99
; RV64-NEXT: .LBB16_223: # %cond.store274
; RV64-NEXT: li a3, 64
; RV64-NEXT: li a4, 9
@@ -10433,13 +10195,10 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV64-NEXT: vse16.v v16, (a4)
; RV64-NEXT: lh a3, 504(a2)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sh a3, 0(a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: slli a3, a1, 34
-; RV64-NEXT: bltz a3, .LBB16_224
-; RV64-NEXT: j .LBB16_100
+; RV64-NEXT: bgez a3, .LBB16_100
; RV64-NEXT: .LBB16_224: # %cond.store277
; RV64-NEXT: li a3, 64
; RV64-NEXT: lui a4, 1
@@ -10448,13 +10207,10 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV64-NEXT: vse16.v v16, (a4)
; RV64-NEXT: lh a3, 378(a2)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sh a3, 0(a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: slli a3, a1, 33
-; RV64-NEXT: bltz a3, .LBB16_225
-; RV64-NEXT: j .LBB16_101
+; RV64-NEXT: bgez a3, .LBB16_101
; RV64-NEXT: .LBB16_225: # %cond.store280
; RV64-NEXT: li a3, 64
; RV64-NEXT: li a4, 17
@@ -10463,13 +10219,10 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV64-NEXT: vse16.v v16, (a4)
; RV64-NEXT: lh a3, 252(a2)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sh a3, 0(a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: slli a3, a1, 32
-; RV64-NEXT: bltz a3, .LBB16_226
-; RV64-NEXT: j .LBB16_102
+; RV64-NEXT: bgez a3, .LBB16_102
; RV64-NEXT: .LBB16_226: # %cond.store283
; RV64-NEXT: li a3, 64
; RV64-NEXT: lui a4, 1
@@ -10478,14 +10231,10 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV64-NEXT: vse16.v v16, (a4)
; RV64-NEXT: lh a3, 126(a2)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sh a3, 0(a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: slli a3, a1, 31
-; RV64-NEXT: bgez a3, .LBB16_257
-; RV64-NEXT: j .LBB16_103
-; RV64-NEXT: .LBB16_257: # %cond.store283
+; RV64-NEXT: bltz a3, .LBB16_103
; RV64-NEXT: j .LBB16_104
; RV64-NEXT: .LBB16_227: # %cond.store289
; RV64-NEXT: li a3, 64
@@ -10494,13 +10243,10 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV64-NEXT: vse16.v v16, (a4)
; RV64-NEXT: lh a3, 2016(a2)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sh a3, 0(a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: slli a3, a1, 29
-; RV64-NEXT: bltz a3, .LBB16_228
-; RV64-NEXT: j .LBB16_106
+; RV64-NEXT: bgez a3, .LBB16_106
; RV64-NEXT: .LBB16_228: # %cond.store292
; RV64-NEXT: li a3, 64
; RV64-NEXT: addi a4, sp, 2047
@@ -10508,13 +10254,10 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV64-NEXT: vse16.v v16, (a4)
; RV64-NEXT: lh a3, 1890(a2)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sh a3, 0(a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: slli a3, a1, 28
-; RV64-NEXT: bltz a3, .LBB16_229
-; RV64-NEXT: j .LBB16_107
+; RV64-NEXT: bgez a3, .LBB16_107
; RV64-NEXT: .LBB16_229: # %cond.store295
; RV64-NEXT: li a3, 64
; RV64-NEXT: addi a4, sp, 2047
@@ -10522,13 +10265,10 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV64-NEXT: vse16.v v16, (a4)
; RV64-NEXT: lh a3, 1764(a2)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sh a3, 0(a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: slli a3, a1, 27
-; RV64-NEXT: bltz a3, .LBB16_230
-; RV64-NEXT: j .LBB16_108
+; RV64-NEXT: bgez a3, .LBB16_108
; RV64-NEXT: .LBB16_230: # %cond.store298
; RV64-NEXT: li a3, 64
; RV64-NEXT: addi a4, sp, 2047
@@ -10536,13 +10276,10 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV64-NEXT: vse16.v v16, (a4)
; RV64-NEXT: lh a3, 1638(a2)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sh a3, 0(a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: slli a3, a1, 26
-; RV64-NEXT: bltz a3, .LBB16_231
-; RV64-NEXT: j .LBB16_109
+; RV64-NEXT: bgez a3, .LBB16_109
; RV64-NEXT: .LBB16_231: # %cond.store301
; RV64-NEXT: li a3, 64
; RV64-NEXT: addi a4, sp, 2047
@@ -10550,13 +10287,10 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV64-NEXT: vse16.v v16, (a4)
; RV64-NEXT: lh a3, 1512(a2)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sh a3, 0(a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: slli a3, a1, 25
-; RV64-NEXT: bltz a3, .LBB16_232
-; RV64-NEXT: j .LBB16_110
+; RV64-NEXT: bgez a3, .LBB16_110
; RV64-NEXT: .LBB16_232: # %cond.store304
; RV64-NEXT: li a3, 64
; RV64-NEXT: addi a4, sp, 2047
@@ -10564,13 +10298,10 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV64-NEXT: vse16.v v16, (a4)
; RV64-NEXT: lh a3, 1386(a2)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sh a3, 0(a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: slli a3, a1, 24
-; RV64-NEXT: bltz a3, .LBB16_233
-; RV64-NEXT: j .LBB16_111
+; RV64-NEXT: bgez a3, .LBB16_111
; RV64-NEXT: .LBB16_233: # %cond.store307
; RV64-NEXT: li a3, 64
; RV64-NEXT: addi a4, sp, 2047
@@ -10578,13 +10309,10 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV64-NEXT: vse16.v v16, (a4)
; RV64-NEXT: lh a3, 1260(a2)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sh a3, 0(a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: slli a3, a1, 23
-; RV64-NEXT: bltz a3, .LBB16_234
-; RV64-NEXT: j .LBB16_112
+; RV64-NEXT: bgez a3, .LBB16_112
; RV64-NEXT: .LBB16_234: # %cond.store310
; RV64-NEXT: li a3, 64
; RV64-NEXT: addi a4, sp, 2047
@@ -10592,13 +10320,10 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV64-NEXT: vse16.v v16, (a4)
; RV64-NEXT: lh a3, 1134(a2)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sh a3, 0(a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: slli a3, a1, 22
-; RV64-NEXT: bltz a3, .LBB16_235
-; RV64-NEXT: j .LBB16_113
+; RV64-NEXT: bgez a3, .LBB16_113
; RV64-NEXT: .LBB16_235: # %cond.store313
; RV64-NEXT: li a3, 64
; RV64-NEXT: addi a4, sp, 2047
@@ -10606,13 +10331,10 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV64-NEXT: vse16.v v16, (a4)
; RV64-NEXT: lh a3, 1008(a2)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sh a3, 0(a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: slli a3, a1, 21
-; RV64-NEXT: bltz a3, .LBB16_236
-; RV64-NEXT: j .LBB16_114
+; RV64-NEXT: bgez a3, .LBB16_114
; RV64-NEXT: .LBB16_236: # %cond.store316
; RV64-NEXT: li a3, 64
; RV64-NEXT: addi a4, sp, 2047
@@ -10620,9 +10342,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV64-NEXT: vse16.v v16, (a4)
; RV64-NEXT: lh a3, 882(a2)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sh a3, 0(a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: slli a3, a1, 20
; RV64-NEXT: bltz a3, .LBB16_237
@@ -10634,9 +10354,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV64-NEXT: vse16.v v16, (a4)
; RV64-NEXT: lh a3, 756(a2)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sh a3, 0(a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: slli a3, a1, 19
; RV64-NEXT: bltz a3, .LBB16_238
@@ -10648,9 +10366,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV64-NEXT: vse16.v v16, (a4)
; RV64-NEXT: lh a3, 630(a2)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sh a3, 0(a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: slli a3, a1, 18
; RV64-NEXT: bltz a3, .LBB16_239
@@ -10662,9 +10378,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV64-NEXT: vse16.v v16, (a4)
; RV64-NEXT: lh a3, 504(a2)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sh a3, 0(a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: slli a3, a1, 17
; RV64-NEXT: bltz a3, .LBB16_240
@@ -10676,9 +10390,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV64-NEXT: vse16.v v16, (a4)
; RV64-NEXT: lh a3, 378(a2)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sh a3, 0(a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: slli a3, a1, 16
; RV64-NEXT: bltz a3, .LBB16_241
@@ -10690,9 +10402,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV64-NEXT: vse16.v v16, (a4)
; RV64-NEXT: lh a3, 252(a2)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sh a3, 0(a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: slli a3, a1, 15
; RV64-NEXT: bltz a3, .LBB16_242
@@ -10704,9 +10414,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV64-NEXT: vse16.v v16, (a4)
; RV64-NEXT: lh a3, 126(a2)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sh a3, 0(a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: slli a3, a1, 14
; RV64-NEXT: bltz a3, .LBB16_243
@@ -10717,9 +10425,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV64-NEXT: vse16.v v16, (a4)
; RV64-NEXT: lh a2, 0(a2)
-; RV64-NEXT: sb a2, 0(a0)
-; RV64-NEXT: srli a2, a2, 8
-; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: sh a2, 0(a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: slli a2, a1, 13
; RV64-NEXT: bltz a2, .LBB16_244
@@ -10730,9 +10436,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: vsetvli zero, a2, e16, m8, ta, ma
; RV64-NEXT: vse16.v v16, (a3)
; RV64-NEXT: lh a2, 1892(sp)
-; RV64-NEXT: sb a2, 0(a0)
-; RV64-NEXT: srli a2, a2, 8
-; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: sh a2, 0(a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: slli a2, a1, 12
; RV64-NEXT: bltz a2, .LBB16_245
@@ -10743,9 +10447,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: vsetvli zero, a2, e16, m8, ta, ma
; RV64-NEXT: vse16.v v16, (a3)
; RV64-NEXT: lh a2, 1766(sp)
-; RV64-NEXT: sb a2, 0(a0)
-; RV64-NEXT: srli a2, a2, 8
-; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: sh a2, 0(a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: slli a2, a1, 11
; RV64-NEXT: bltz a2, .LBB16_246
@@ -10756,9 +10458,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: vsetvli zero, a2, e16, m8, ta, ma
; RV64-NEXT: vse16.v v16, (a3)
; RV64-NEXT: lh a2, 1640(sp)
-; RV64-NEXT: sb a2, 0(a0)
-; RV64-NEXT: srli a2, a2, 8
-; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: sh a2, 0(a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: slli a2, a1, 10
; RV64-NEXT: bltz a2, .LBB16_247
@@ -10769,9 +10469,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: vsetvli zero, a2, e16, m8, ta, ma
; RV64-NEXT: vse16.v v16, (a3)
; RV64-NEXT: lh a2, 1514(sp)
-; RV64-NEXT: sb a2, 0(a0)
-; RV64-NEXT: srli a2, a2, 8
-; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: sh a2, 0(a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: slli a2, a1, 9
; RV64-NEXT: bltz a2, .LBB16_248
@@ -10782,9 +10480,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: vsetvli zero, a2, e16, m8, ta, ma
; RV64-NEXT: vse16.v v16, (a3)
; RV64-NEXT: lh a2, 1388(sp)
-; RV64-NEXT: sb a2, 0(a0)
-; RV64-NEXT: srli a2, a2, 8
-; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: sh a2, 0(a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: slli a2, a1, 8
; RV64-NEXT: bltz a2, .LBB16_249
@@ -10795,9 +10491,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: vsetvli zero, a2, e16, m8, ta, ma
; RV64-NEXT: vse16.v v16, (a3)
; RV64-NEXT: lh a2, 1262(sp)
-; RV64-NEXT: sb a2, 0(a0)
-; RV64-NEXT: srli a2, a2, 8
-; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: sh a2, 0(a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: slli a2, a1, 7
; RV64-NEXT: bltz a2, .LBB16_250
@@ -10808,9 +10502,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: vsetvli zero, a2, e16, m8, ta, ma
; RV64-NEXT: vse16.v v16, (a3)
; RV64-NEXT: lh a2, 1136(sp)
-; RV64-NEXT: sb a2, 0(a0)
-; RV64-NEXT: srli a2, a2, 8
-; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: sh a2, 0(a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: slli a2, a1, 6
; RV64-NEXT: bltz a2, .LBB16_251
@@ -10821,9 +10513,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: vsetvli zero, a2, e16, m8, ta, ma
; RV64-NEXT: vse16.v v16, (a3)
; RV64-NEXT: lh a2, 1010(sp)
-; RV64-NEXT: sb a2, 0(a0)
-; RV64-NEXT: srli a2, a2, 8
-; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: sh a2, 0(a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: slli a2, a1, 5
; RV64-NEXT: bltz a2, .LBB16_252
@@ -10834,9 +10524,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: vsetvli zero, a2, e16, m8, ta, ma
; RV64-NEXT: vse16.v v16, (a3)
; RV64-NEXT: lh a2, 884(sp)
-; RV64-NEXT: sb a2, 0(a0)
-; RV64-NEXT: srli a2, a2, 8
-; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: sh a2, 0(a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: slli a2, a1, 4
; RV64-NEXT: bltz a2, .LBB16_253
@@ -10847,9 +10535,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: vsetvli zero, a2, e16, m8, ta, ma
; RV64-NEXT: vse16.v v16, (a3)
; RV64-NEXT: lh a2, 758(sp)
-; RV64-NEXT: sb a2, 0(a0)
-; RV64-NEXT: srli a2, a2, 8
-; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: sh a2, 0(a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: slli a2, a1, 3
; RV64-NEXT: bltz a2, .LBB16_254
@@ -10860,9 +10546,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: vsetvli zero, a2, e16, m8, ta, ma
; RV64-NEXT: vse16.v v16, (a3)
; RV64-NEXT: lh a2, 632(sp)
-; RV64-NEXT: sb a2, 0(a0)
-; RV64-NEXT: srli a2, a2, 8
-; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: sh a2, 0(a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: slli a2, a1, 2
; RV64-NEXT: bltz a2, .LBB16_255
@@ -10873,9 +10557,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: vsetvli zero, a2, e16, m8, ta, ma
; RV64-NEXT: vse16.v v16, (a3)
; RV64-NEXT: lh a2, 506(sp)
-; RV64-NEXT: sb a2, 0(a0)
-; RV64-NEXT: srli a2, a2, 8
-; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: sh a2, 0(a0)
; RV64-NEXT: addi a0, a0, 2
; RV64-NEXT: slli a2, a1, 1
; RV64-NEXT: bltz a2, .LBB16_256
@@ -10886,13 +10568,11 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: vsetvli zero, a2, e16, m8, ta, ma
; RV64-NEXT: vse16.v v16, (a3)
; RV64-NEXT: lh a2, 380(sp)
-; RV64-NEXT: sb a2, 0(a0)
-; RV64-NEXT: srli a2, a2, 8
-; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: sh a2, 0(a0)
; RV64-NEXT: addi a0, a0, 2
-; RV64-NEXT: bgez a1, .LBB16_258
+; RV64-NEXT: bgez a1, .LBB16_257
; RV64-NEXT: j .LBB16_135
-; RV64-NEXT: .LBB16_258: # %cond.store376
+; RV64-NEXT: .LBB16_257: # %cond.store376
; RV64-NEXT: j .LBB16_136
;
; RV32-LABEL: test_compresstore_i16_v128:
@@ -10949,10 +10629,8 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: .LBB16_16: # %cond.store43
; RV32-NEXT: vsetivli zero, 1, e16, m2, ta, ma
; RV32-NEXT: vslidedown.vi v24, v8, 15
-; RV32-NEXT: vmv.x.s a1, v24
-; RV32-NEXT: sb a1, 0(a0)
-; RV32-NEXT: srli a1, a1, 8
-; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT: vse16.v v24, (a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: .LBB16_17: # %else44
; RV32-NEXT: addi sp, sp, -2032
@@ -11016,9 +10694,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: vsetvli zero, a1, e16, m8, ta, ma
; RV32-NEXT: vse16.v v8, (a4)
; RV32-NEXT: lh a1, 126(a2)
-; RV32-NEXT: sb a1, 0(a0)
-; RV32-NEXT: srli a1, a1, 8
-; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: sh a1, 0(a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: .LBB16_31: # %else83
; RV32-NEXT: slli a4, a3, 2
@@ -11032,9 +10708,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: vsetvli zero, a4, e16, m8, ta, ma
; RV32-NEXT: vse16.v v8, (a5)
; RV32-NEXT: lh a2, 0(a2)
-; RV32-NEXT: sb a2, 0(a0)
-; RV32-NEXT: srli a2, a2, 8
-; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: sh a2, 0(a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: .LBB16_33: # %else86
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
@@ -11052,9 +10726,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: vsetvli zero, a2, e16, m8, ta, ma
; RV32-NEXT: vse16.v v8, (a5)
; RV32-NEXT: lh a2, 2016(a4)
-; RV32-NEXT: sb a2, 0(a0)
-; RV32-NEXT: srli a2, a2, 8
-; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: sh a2, 0(a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: .LBB16_35: # %else89
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
@@ -11113,9 +10785,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV32-NEXT: vse16.v v8, (a5)
; RV32-NEXT: lh a3, 0(a4)
-; RV32-NEXT: sb a3, 0(a0)
-; RV32-NEXT: srli a3, a3, 8
-; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: sh a3, 0(a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: .LBB16_52: # %else137
; RV32-NEXT: slli a3, a2, 16
@@ -11173,9 +10843,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV32-NEXT: vse16.v v8, (a5)
; RV32-NEXT: lh a3, 252(a4)
-; RV32-NEXT: sb a3, 0(a0)
-; RV32-NEXT: srli a3, a3, 8
-; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: sh a3, 0(a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: .LBB16_68: # %else182
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
@@ -11190,9 +10858,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV32-NEXT: vse16.v v8, (a5)
; RV32-NEXT: lh a3, 126(a4)
-; RV32-NEXT: sb a3, 0(a0)
-; RV32-NEXT: srli a3, a3, 8
-; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: sh a3, 0(a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: .LBB16_70: # %else185
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
@@ -11249,10 +10915,8 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: .LBB16_87: # %cond.store235
; RV32-NEXT: vsetivli zero, 1, e16, m2, ta, ma
; RV32-NEXT: vslidedown.vi v8, v16, 15
-; RV32-NEXT: vmv.x.s a2, v8
-; RV32-NEXT: sb a2, 0(a0)
-; RV32-NEXT: srli a2, a2, 8
-; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT: vse16.v v8, (a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: .LBB16_88: # %else236
; RV32-NEXT: slli a4, a3, 15
@@ -11262,52 +10926,40 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: bltz a4, .LBB16_214
; RV32-NEXT: # %bb.89: # %else239
; RV32-NEXT: slli a4, a3, 14
-; RV32-NEXT: bgez a4, .LBB16_90
-; RV32-NEXT: j .LBB16_215
+; RV32-NEXT: bltz a4, .LBB16_215
; RV32-NEXT: .LBB16_90: # %else242
; RV32-NEXT: slli a4, a3, 13
-; RV32-NEXT: bgez a4, .LBB16_91
-; RV32-NEXT: j .LBB16_216
+; RV32-NEXT: bltz a4, .LBB16_216
; RV32-NEXT: .LBB16_91: # %else245
; RV32-NEXT: slli a4, a3, 12
-; RV32-NEXT: bgez a4, .LBB16_92
-; RV32-NEXT: j .LBB16_217
+; RV32-NEXT: bltz a4, .LBB16_217
; RV32-NEXT: .LBB16_92: # %else248
; RV32-NEXT: slli a4, a3, 11
-; RV32-NEXT: bgez a4, .LBB16_93
-; RV32-NEXT: j .LBB16_218
+; RV32-NEXT: bltz a4, .LBB16_218
; RV32-NEXT: .LBB16_93: # %else251
; RV32-NEXT: slli a4, a3, 10
-; RV32-NEXT: bgez a4, .LBB16_94
-; RV32-NEXT: j .LBB16_219
+; RV32-NEXT: bltz a4, .LBB16_219
; RV32-NEXT: .LBB16_94: # %else254
; RV32-NEXT: slli a4, a3, 9
-; RV32-NEXT: bgez a4, .LBB16_95
-; RV32-NEXT: j .LBB16_220
+; RV32-NEXT: bltz a4, .LBB16_220
; RV32-NEXT: .LBB16_95: # %else257
; RV32-NEXT: slli a4, a3, 8
-; RV32-NEXT: bgez a4, .LBB16_96
-; RV32-NEXT: j .LBB16_221
+; RV32-NEXT: bltz a4, .LBB16_221
; RV32-NEXT: .LBB16_96: # %else260
; RV32-NEXT: slli a4, a3, 7
-; RV32-NEXT: bgez a4, .LBB16_97
-; RV32-NEXT: j .LBB16_222
+; RV32-NEXT: bltz a4, .LBB16_222
; RV32-NEXT: .LBB16_97: # %else263
; RV32-NEXT: slli a4, a3, 6
-; RV32-NEXT: bgez a4, .LBB16_98
-; RV32-NEXT: j .LBB16_223
+; RV32-NEXT: bltz a4, .LBB16_223
; RV32-NEXT: .LBB16_98: # %else266
; RV32-NEXT: slli a4, a3, 5
-; RV32-NEXT: bgez a4, .LBB16_99
-; RV32-NEXT: j .LBB16_224
+; RV32-NEXT: bltz a4, .LBB16_224
; RV32-NEXT: .LBB16_99: # %else269
; RV32-NEXT: slli a4, a3, 4
-; RV32-NEXT: bgez a4, .LBB16_100
-; RV32-NEXT: j .LBB16_225
+; RV32-NEXT: bltz a4, .LBB16_225
; RV32-NEXT: .LBB16_100: # %else272
; RV32-NEXT: slli a4, a3, 3
-; RV32-NEXT: bgez a4, .LBB16_101
-; RV32-NEXT: j .LBB16_226
+; RV32-NEXT: bltz a4, .LBB16_226
; RV32-NEXT: .LBB16_101: # %else275
; RV32-NEXT: slli a4, a3, 2
; RV32-NEXT: bgez a4, .LBB16_103
@@ -11319,9 +10971,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: vsetvli zero, a4, e16, m8, ta, ma
; RV32-NEXT: vse16.v v16, (a5)
; RV32-NEXT: lh a4, 378(a2)
-; RV32-NEXT: sb a4, 0(a0)
-; RV32-NEXT: srli a4, a4, 8
-; RV32-NEXT: sb a4, 1(a0)
+; RV32-NEXT: sh a4, 0(a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: .LBB16_103: # %else278
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
@@ -11336,9 +10986,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: vsetvli zero, a1, e16, m8, ta, ma
; RV32-NEXT: vse16.v v16, (a4)
; RV32-NEXT: lh a1, 252(a2)
-; RV32-NEXT: sb a1, 0(a0)
-; RV32-NEXT: srli a1, a1, 8
-; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: sh a1, 0(a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: .LBB16_105: # %else281
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
@@ -11352,9 +11000,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV32-NEXT: vse16.v v16, (a4)
; RV32-NEXT: lh a3, 126(a2)
-; RV32-NEXT: sb a3, 0(a0)
-; RV32-NEXT: srli a3, a3, 8
-; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: sh a3, 0(a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: .LBB16_107: # %else284
; RV32-NEXT: andi a3, a1, 1
@@ -11366,79 +11012,60 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV32-NEXT: vse16.v v16, (a4)
; RV32-NEXT: lh a2, 0(a2)
-; RV32-NEXT: sb a2, 0(a0)
-; RV32-NEXT: srli a2, a2, 8
-; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: sh a2, 0(a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: .LBB16_109: # %else287
; RV32-NEXT: andi a3, a1, 2
; RV32-NEXT: addi a2, sp, 2018
-; RV32-NEXT: beqz a3, .LBB16_110
-; RV32-NEXT: j .LBB16_227
-; RV32-NEXT: .LBB16_110: # %else290
+; RV32-NEXT: bnez a3, .LBB16_227
+; RV32-NEXT: # %bb.110: # %else290
; RV32-NEXT: andi a3, a1, 4
-; RV32-NEXT: beqz a3, .LBB16_111
-; RV32-NEXT: j .LBB16_228
+; RV32-NEXT: bnez a3, .LBB16_228
; RV32-NEXT: .LBB16_111: # %else293
; RV32-NEXT: andi a3, a1, 8
-; RV32-NEXT: beqz a3, .LBB16_112
-; RV32-NEXT: j .LBB16_229
+; RV32-NEXT: bnez a3, .LBB16_229
; RV32-NEXT: .LBB16_112: # %else296
; RV32-NEXT: andi a3, a1, 16
-; RV32-NEXT: beqz a3, .LBB16_113
-; RV32-NEXT: j .LBB16_230
+; RV32-NEXT: bnez a3, .LBB16_230
; RV32-NEXT: .LBB16_113: # %else299
; RV32-NEXT: andi a3, a1, 32
-; RV32-NEXT: beqz a3, .LBB16_114
-; RV32-NEXT: j .LBB16_231
+; RV32-NEXT: bnez a3, .LBB16_231
; RV32-NEXT: .LBB16_114: # %else302
; RV32-NEXT: andi a3, a1, 64
-; RV32-NEXT: beqz a3, .LBB16_115
-; RV32-NEXT: j .LBB16_232
+; RV32-NEXT: bnez a3, .LBB16_232
; RV32-NEXT: .LBB16_115: # %else305
; RV32-NEXT: andi a3, a1, 128
-; RV32-NEXT: beqz a3, .LBB16_116
-; RV32-NEXT: j .LBB16_233
+; RV32-NEXT: bnez a3, .LBB16_233
; RV32-NEXT: .LBB16_116: # %else308
; RV32-NEXT: andi a3, a1, 256
-; RV32-NEXT: beqz a3, .LBB16_117
-; RV32-NEXT: j .LBB16_234
+; RV32-NEXT: bnez a3, .LBB16_234
; RV32-NEXT: .LBB16_117: # %else311
; RV32-NEXT: andi a3, a1, 512
-; RV32-NEXT: beqz a3, .LBB16_118
-; RV32-NEXT: j .LBB16_235
+; RV32-NEXT: bnez a3, .LBB16_235
; RV32-NEXT: .LBB16_118: # %else314
; RV32-NEXT: andi a3, a1, 1024
-; RV32-NEXT: beqz a3, .LBB16_119
-; RV32-NEXT: j .LBB16_236
+; RV32-NEXT: bnez a3, .LBB16_236
; RV32-NEXT: .LBB16_119: # %else317
; RV32-NEXT: slli a3, a1, 20
-; RV32-NEXT: bgez a3, .LBB16_120
-; RV32-NEXT: j .LBB16_237
+; RV32-NEXT: bltz a3, .LBB16_237
; RV32-NEXT: .LBB16_120: # %else320
; RV32-NEXT: slli a3, a1, 19
-; RV32-NEXT: bgez a3, .LBB16_121
-; RV32-NEXT: j .LBB16_238
+; RV32-NEXT: bltz a3, .LBB16_238
; RV32-NEXT: .LBB16_121: # %else323
; RV32-NEXT: slli a3, a1, 18
-; RV32-NEXT: bgez a3, .LBB16_122
-; RV32-NEXT: j .LBB16_239
+; RV32-NEXT: bltz a3, .LBB16_239
; RV32-NEXT: .LBB16_122: # %else326
; RV32-NEXT: slli a3, a1, 17
-; RV32-NEXT: bgez a3, .LBB16_123
-; RV32-NEXT: j .LBB16_240
+; RV32-NEXT: bltz a3, .LBB16_240
; RV32-NEXT: .LBB16_123: # %else329
; RV32-NEXT: slli a3, a1, 16
-; RV32-NEXT: bgez a3, .LBB16_124
-; RV32-NEXT: j .LBB16_241
+; RV32-NEXT: bltz a3, .LBB16_241
; RV32-NEXT: .LBB16_124: # %else332
; RV32-NEXT: slli a3, a1, 15
-; RV32-NEXT: bgez a3, .LBB16_125
-; RV32-NEXT: j .LBB16_242
+; RV32-NEXT: bltz a3, .LBB16_242
; RV32-NEXT: .LBB16_125: # %else335
; RV32-NEXT: slli a3, a1, 14
-; RV32-NEXT: bgez a3, .LBB16_126
-; RV32-NEXT: j .LBB16_243
+; RV32-NEXT: bltz a3, .LBB16_243
; RV32-NEXT: .LBB16_126: # %else338
; RV32-NEXT: slli a2, a1, 13
; RV32-NEXT: bgez a2, .LBB16_127
@@ -11499,9 +11126,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: vsetvli zero, a1, e16, m8, ta, ma
; RV32-NEXT: vse16.v v16, (a2)
; RV32-NEXT: lh a1, 254(sp)
-; RV32-NEXT: sb a1, 0(a0)
-; RV32-NEXT: srli a1, a1, 8
-; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: sh a1, 0(a0)
; RV32-NEXT: .LBB16_141: # %else380
; RV32-NEXT: lui a0, 3
; RV32-NEXT: addi a0, a0, 256
@@ -11514,151 +11139,113 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: addi sp, sp, 2032
; RV32-NEXT: ret
; RV32-NEXT: .LBB16_142: # %cond.store
-; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; RV32-NEXT: vmv.x.s a1, v8
-; RV32-NEXT: sb a1, 0(a0)
-; RV32-NEXT: srli a1, a1, 8
-; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT: vse16.v v8, (a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: andi a1, a3, 2
; RV32-NEXT: beqz a1, .LBB16_2
; RV32-NEXT: .LBB16_143: # %cond.store1
; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV32-NEXT: vslidedown.vi v24, v8, 1
-; RV32-NEXT: vmv.x.s a1, v24
-; RV32-NEXT: sb a1, 0(a0)
-; RV32-NEXT: srli a1, a1, 8
-; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: vse16.v v24, (a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: andi a1, a3, 4
; RV32-NEXT: beqz a1, .LBB16_3
; RV32-NEXT: .LBB16_144: # %cond.store4
; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV32-NEXT: vslidedown.vi v24, v8, 2
-; RV32-NEXT: vmv.x.s a1, v24
-; RV32-NEXT: sb a1, 0(a0)
-; RV32-NEXT: srli a1, a1, 8
-; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: vse16.v v24, (a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: andi a1, a3, 8
; RV32-NEXT: beqz a1, .LBB16_4
; RV32-NEXT: .LBB16_145: # %cond.store7
; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV32-NEXT: vslidedown.vi v24, v8, 3
-; RV32-NEXT: vmv.x.s a1, v24
-; RV32-NEXT: sb a1, 0(a0)
-; RV32-NEXT: srli a1, a1, 8
-; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: vse16.v v24, (a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: andi a1, a3, 16
; RV32-NEXT: beqz a1, .LBB16_5
; RV32-NEXT: .LBB16_146: # %cond.store10
; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV32-NEXT: vslidedown.vi v24, v8, 4
-; RV32-NEXT: vmv.x.s a1, v24
-; RV32-NEXT: sb a1, 0(a0)
-; RV32-NEXT: srli a1, a1, 8
-; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: vse16.v v24, (a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: andi a1, a3, 32
; RV32-NEXT: beqz a1, .LBB16_6
; RV32-NEXT: .LBB16_147: # %cond.store13
; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV32-NEXT: vslidedown.vi v24, v8, 5
-; RV32-NEXT: vmv.x.s a1, v24
-; RV32-NEXT: sb a1, 0(a0)
-; RV32-NEXT: srli a1, a1, 8
-; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: vse16.v v24, (a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: andi a1, a3, 64
; RV32-NEXT: beqz a1, .LBB16_7
; RV32-NEXT: .LBB16_148: # %cond.store16
; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV32-NEXT: vslidedown.vi v24, v8, 6
-; RV32-NEXT: vmv.x.s a1, v24
-; RV32-NEXT: sb a1, 0(a0)
-; RV32-NEXT: srli a1, a1, 8
-; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: vse16.v v24, (a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: andi a1, a3, 128
; RV32-NEXT: beqz a1, .LBB16_8
; RV32-NEXT: .LBB16_149: # %cond.store19
; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV32-NEXT: vslidedown.vi v24, v8, 7
-; RV32-NEXT: vmv.x.s a1, v24
-; RV32-NEXT: sb a1, 0(a0)
-; RV32-NEXT: srli a1, a1, 8
-; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: vse16.v v24, (a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: andi a1, a3, 256
; RV32-NEXT: beqz a1, .LBB16_9
; RV32-NEXT: .LBB16_150: # %cond.store22
; RV32-NEXT: vsetivli zero, 1, e16, m2, ta, ma
; RV32-NEXT: vslidedown.vi v24, v8, 8
-; RV32-NEXT: vmv.x.s a1, v24
-; RV32-NEXT: sb a1, 0(a0)
-; RV32-NEXT: srli a1, a1, 8
-; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT: vse16.v v24, (a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: andi a1, a3, 512
; RV32-NEXT: beqz a1, .LBB16_10
; RV32-NEXT: .LBB16_151: # %cond.store25
; RV32-NEXT: vsetivli zero, 1, e16, m2, ta, ma
; RV32-NEXT: vslidedown.vi v24, v8, 9
-; RV32-NEXT: vmv.x.s a1, v24
-; RV32-NEXT: sb a1, 0(a0)
-; RV32-NEXT: srli a1, a1, 8
-; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT: vse16.v v24, (a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: andi a1, a3, 1024
; RV32-NEXT: beqz a1, .LBB16_11
; RV32-NEXT: .LBB16_152: # %cond.store28
; RV32-NEXT: vsetivli zero, 1, e16, m2, ta, ma
; RV32-NEXT: vslidedown.vi v24, v8, 10
-; RV32-NEXT: vmv.x.s a1, v24
-; RV32-NEXT: sb a1, 0(a0)
-; RV32-NEXT: srli a1, a1, 8
-; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT: vse16.v v24, (a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: slli a1, a3, 20
; RV32-NEXT: bgez a1, .LBB16_12
; RV32-NEXT: .LBB16_153: # %cond.store31
; RV32-NEXT: vsetivli zero, 1, e16, m2, ta, ma
; RV32-NEXT: vslidedown.vi v24, v8, 11
-; RV32-NEXT: vmv.x.s a1, v24
-; RV32-NEXT: sb a1, 0(a0)
-; RV32-NEXT: srli a1, a1, 8
-; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT: vse16.v v24, (a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: slli a1, a3, 19
; RV32-NEXT: bgez a1, .LBB16_13
; RV32-NEXT: .LBB16_154: # %cond.store34
; RV32-NEXT: vsetivli zero, 1, e16, m2, ta, ma
; RV32-NEXT: vslidedown.vi v24, v8, 12
-; RV32-NEXT: vmv.x.s a1, v24
-; RV32-NEXT: sb a1, 0(a0)
-; RV32-NEXT: srli a1, a1, 8
-; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT: vse16.v v24, (a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: slli a1, a3, 18
; RV32-NEXT: bgez a1, .LBB16_14
; RV32-NEXT: .LBB16_155: # %cond.store37
; RV32-NEXT: vsetivli zero, 1, e16, m2, ta, ma
; RV32-NEXT: vslidedown.vi v24, v8, 13
-; RV32-NEXT: vmv.x.s a1, v24
-; RV32-NEXT: sb a1, 0(a0)
-; RV32-NEXT: srli a1, a1, 8
-; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT: vse16.v v24, (a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: slli a1, a3, 17
; RV32-NEXT: bgez a1, .LBB16_15
; RV32-NEXT: .LBB16_156: # %cond.store40
; RV32-NEXT: vsetivli zero, 1, e16, m2, ta, ma
; RV32-NEXT: vslidedown.vi v24, v8, 14
-; RV32-NEXT: vmv.x.s a1, v24
-; RV32-NEXT: sb a1, 0(a0)
-; RV32-NEXT: srli a1, a1, 8
-; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT: vse16.v v24, (a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: slli a1, a3, 16
; RV32-NEXT: bltz a1, .LBB16_16
@@ -11670,9 +11257,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: vsetvli zero, a1, e16, m8, ta, ma
; RV32-NEXT: vse16.v v8, (a4)
; RV32-NEXT: lh a1, 1638(a2)
-; RV32-NEXT: sb a1, 0(a0)
-; RV32-NEXT: srli a1, a1, 8
-; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: sh a1, 0(a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: slli a1, a3, 14
; RV32-NEXT: bgez a1, .LBB16_19
@@ -11684,9 +11269,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: vsetvli zero, a1, e16, m8, ta, ma
; RV32-NEXT: vse16.v v8, (a4)
; RV32-NEXT: lh a1, 1512(a2)
-; RV32-NEXT: sb a1, 0(a0)
-; RV32-NEXT: srli a1, a1, 8
-; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: sh a1, 0(a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: slli a1, a3, 13
; RV32-NEXT: bgez a1, .LBB16_20
@@ -11698,9 +11281,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: vsetvli zero, a1, e16, m8, ta, ma
; RV32-NEXT: vse16.v v8, (a4)
; RV32-NEXT: lh a1, 1386(a2)
-; RV32-NEXT: sb a1, 0(a0)
-; RV32-NEXT: srli a1, a1, 8
-; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: sh a1, 0(a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: slli a1, a3, 12
; RV32-NEXT: bgez a1, .LBB16_21
@@ -11712,9 +11293,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: vsetvli zero, a1, e16, m8, ta, ma
; RV32-NEXT: vse16.v v8, (a4)
; RV32-NEXT: lh a1, 1260(a2)
-; RV32-NEXT: sb a1, 0(a0)
-; RV32-NEXT: srli a1, a1, 8
-; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: sh a1, 0(a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: slli a1, a3, 11
; RV32-NEXT: bgez a1, .LBB16_22
@@ -11726,9 +11305,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: vsetvli zero, a1, e16, m8, ta, ma
; RV32-NEXT: vse16.v v8, (a4)
; RV32-NEXT: lh a1, 1134(a2)
-; RV32-NEXT: sb a1, 0(a0)
-; RV32-NEXT: srli a1, a1, 8
-; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: sh a1, 0(a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: slli a1, a3, 10
; RV32-NEXT: bgez a1, .LBB16_23
@@ -11740,9 +11317,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: vsetvli zero, a1, e16, m8, ta, ma
; RV32-NEXT: vse16.v v8, (a4)
; RV32-NEXT: lh a1, 1008(a2)
-; RV32-NEXT: sb a1, 0(a0)
-; RV32-NEXT: srli a1, a1, 8
-; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: sh a1, 0(a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: slli a1, a3, 9
; RV32-NEXT: bgez a1, .LBB16_24
@@ -11754,9 +11329,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: vsetvli zero, a1, e16, m8, ta, ma
; RV32-NEXT: vse16.v v8, (a4)
; RV32-NEXT: lh a1, 882(a2)
-; RV32-NEXT: sb a1, 0(a0)
-; RV32-NEXT: srli a1, a1, 8
-; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: sh a1, 0(a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: slli a1, a3, 8
; RV32-NEXT: bgez a1, .LBB16_25
@@ -11768,9 +11341,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: vsetvli zero, a1, e16, m8, ta, ma
; RV32-NEXT: vse16.v v8, (a4)
; RV32-NEXT: lh a1, 756(a2)
-; RV32-NEXT: sb a1, 0(a0)
-; RV32-NEXT: srli a1, a1, 8
-; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: sh a1, 0(a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: slli a1, a3, 7
; RV32-NEXT: bgez a1, .LBB16_26
@@ -11782,9 +11353,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: vsetvli zero, a1, e16, m8, ta, ma
; RV32-NEXT: vse16.v v8, (a4)
; RV32-NEXT: lh a1, 630(a2)
-; RV32-NEXT: sb a1, 0(a0)
-; RV32-NEXT: srli a1, a1, 8
-; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: sh a1, 0(a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: slli a1, a3, 6
; RV32-NEXT: bgez a1, .LBB16_27
@@ -11796,9 +11365,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: vsetvli zero, a1, e16, m8, ta, ma
; RV32-NEXT: vse16.v v8, (a4)
; RV32-NEXT: lh a1, 504(a2)
-; RV32-NEXT: sb a1, 0(a0)
-; RV32-NEXT: srli a1, a1, 8
-; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: sh a1, 0(a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: slli a1, a3, 5
; RV32-NEXT: bgez a1, .LBB16_28
@@ -11810,9 +11377,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: vsetvli zero, a1, e16, m8, ta, ma
; RV32-NEXT: vse16.v v8, (a4)
; RV32-NEXT: lh a1, 378(a2)
-; RV32-NEXT: sb a1, 0(a0)
-; RV32-NEXT: srli a1, a1, 8
-; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: sh a1, 0(a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: slli a1, a3, 4
; RV32-NEXT: bgez a1, .LBB16_29
@@ -11824,9 +11389,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: vsetvli zero, a1, e16, m8, ta, ma
; RV32-NEXT: vse16.v v8, (a4)
; RV32-NEXT: lh a1, 252(a2)
-; RV32-NEXT: sb a1, 0(a0)
-; RV32-NEXT: srli a1, a1, 8
-; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: sh a1, 0(a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: slli a1, a3, 3
; RV32-NEXT: bltz a1, .LBB16_30
@@ -11839,9 +11402,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV32-NEXT: vse16.v v8, (a5)
; RV32-NEXT: lh a3, 1890(a4)
-; RV32-NEXT: sb a3, 0(a0)
-; RV32-NEXT: srli a3, a3, 8
-; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: sh a3, 0(a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: andi a3, a2, 1
; RV32-NEXT: beqz a3, .LBB16_37
@@ -11853,9 +11414,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV32-NEXT: vse16.v v8, (a5)
; RV32-NEXT: lh a3, 1764(a4)
-; RV32-NEXT: sb a3, 0(a0)
-; RV32-NEXT: srli a3, a3, 8
-; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: sh a3, 0(a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: andi a3, a2, 2
; RV32-NEXT: beqz a3, .LBB16_38
@@ -11867,9 +11426,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV32-NEXT: vse16.v v8, (a5)
; RV32-NEXT: lh a3, 1638(a4)
-; RV32-NEXT: sb a3, 0(a0)
-; RV32-NEXT: srli a3, a3, 8
-; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: sh a3, 0(a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: andi a3, a2, 4
; RV32-NEXT: beqz a3, .LBB16_39
@@ -11881,9 +11438,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV32-NEXT: vse16.v v8, (a5)
; RV32-NEXT: lh a3, 1512(a4)
-; RV32-NEXT: sb a3, 0(a0)
-; RV32-NEXT: srli a3, a3, 8
-; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: sh a3, 0(a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: andi a3, a2, 8
; RV32-NEXT: beqz a3, .LBB16_40
@@ -11895,9 +11450,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV32-NEXT: vse16.v v8, (a5)
; RV32-NEXT: lh a3, 1386(a4)
-; RV32-NEXT: sb a3, 0(a0)
-; RV32-NEXT: srli a3, a3, 8
-; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: sh a3, 0(a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: andi a3, a2, 16
; RV32-NEXT: beqz a3, .LBB16_41
@@ -11909,9 +11462,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV32-NEXT: vse16.v v8, (a5)
; RV32-NEXT: lh a3, 1260(a4)
-; RV32-NEXT: sb a3, 0(a0)
-; RV32-NEXT: srli a3, a3, 8
-; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: sh a3, 0(a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: andi a3, a2, 32
; RV32-NEXT: beqz a3, .LBB16_42
@@ -11923,9 +11474,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV32-NEXT: vse16.v v8, (a5)
; RV32-NEXT: lh a3, 1134(a4)
-; RV32-NEXT: sb a3, 0(a0)
-; RV32-NEXT: srli a3, a3, 8
-; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: sh a3, 0(a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: andi a3, a2, 64
; RV32-NEXT: beqz a3, .LBB16_43
@@ -11937,9 +11486,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV32-NEXT: vse16.v v8, (a5)
; RV32-NEXT: lh a3, 1008(a4)
-; RV32-NEXT: sb a3, 0(a0)
-; RV32-NEXT: srli a3, a3, 8
-; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: sh a3, 0(a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: andi a3, a2, 128
; RV32-NEXT: beqz a3, .LBB16_44
@@ -11951,9 +11498,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV32-NEXT: vse16.v v8, (a5)
; RV32-NEXT: lh a3, 882(a4)
-; RV32-NEXT: sb a3, 0(a0)
-; RV32-NEXT: srli a3, a3, 8
-; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: sh a3, 0(a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: andi a3, a2, 256
; RV32-NEXT: beqz a3, .LBB16_45
@@ -11965,9 +11510,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV32-NEXT: vse16.v v8, (a5)
; RV32-NEXT: lh a3, 756(a4)
-; RV32-NEXT: sb a3, 0(a0)
-; RV32-NEXT: srli a3, a3, 8
-; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: sh a3, 0(a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: andi a3, a2, 512
; RV32-NEXT: beqz a3, .LBB16_46
@@ -11979,9 +11522,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV32-NEXT: vse16.v v8, (a5)
; RV32-NEXT: lh a3, 630(a4)
-; RV32-NEXT: sb a3, 0(a0)
-; RV32-NEXT: srli a3, a3, 8
-; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: sh a3, 0(a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: andi a3, a2, 1024
; RV32-NEXT: beqz a3, .LBB16_47
@@ -11993,9 +11534,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV32-NEXT: vse16.v v8, (a5)
; RV32-NEXT: lh a3, 504(a4)
-; RV32-NEXT: sb a3, 0(a0)
-; RV32-NEXT: srli a3, a3, 8
-; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: sh a3, 0(a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: slli a3, a2, 20
; RV32-NEXT: bgez a3, .LBB16_48
@@ -12007,9 +11546,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV32-NEXT: vse16.v v8, (a5)
; RV32-NEXT: lh a3, 378(a4)
-; RV32-NEXT: sb a3, 0(a0)
-; RV32-NEXT: srli a3, a3, 8
-; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: sh a3, 0(a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: slli a3, a2, 19
; RV32-NEXT: bgez a3, .LBB16_49
@@ -12021,9 +11558,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV32-NEXT: vse16.v v8, (a5)
; RV32-NEXT: lh a3, 252(a4)
-; RV32-NEXT: sb a3, 0(a0)
-; RV32-NEXT: srli a3, a3, 8
-; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: sh a3, 0(a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: slli a3, a2, 18
; RV32-NEXT: bgez a3, .LBB16_50
@@ -12035,9 +11570,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV32-NEXT: vse16.v v8, (a5)
; RV32-NEXT: lh a3, 126(a4)
-; RV32-NEXT: sb a3, 0(a0)
-; RV32-NEXT: srli a3, a3, 8
-; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: sh a3, 0(a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: slli a3, a2, 17
; RV32-NEXT: bltz a3, .LBB16_51
@@ -12050,9 +11583,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV32-NEXT: vse16.v v8, (a5)
; RV32-NEXT: lh a3, 2016(a4)
-; RV32-NEXT: sb a3, 0(a0)
-; RV32-NEXT: srli a3, a3, 8
-; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: sh a3, 0(a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: slli a3, a2, 15
; RV32-NEXT: bgez a3, .LBB16_54
@@ -12063,9 +11594,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV32-NEXT: vse16.v v8, (a5)
; RV32-NEXT: lh a3, 1890(a4)
-; RV32-NEXT: sb a3, 0(a0)
-; RV32-NEXT: srli a3, a3, 8
-; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: sh a3, 0(a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: slli a3, a2, 14
; RV32-NEXT: bgez a3, .LBB16_55
@@ -12077,9 +11606,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV32-NEXT: vse16.v v8, (a5)
; RV32-NEXT: lh a3, 1764(a4)
-; RV32-NEXT: sb a3, 0(a0)
-; RV32-NEXT: srli a3, a3, 8
-; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: sh a3, 0(a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: slli a3, a2, 13
; RV32-NEXT: bgez a3, .LBB16_56
@@ -12091,9 +11618,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV32-NEXT: vse16.v v8, (a5)
; RV32-NEXT: lh a3, 1638(a4)
-; RV32-NEXT: sb a3, 0(a0)
-; RV32-NEXT: srli a3, a3, 8
-; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: sh a3, 0(a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: slli a3, a2, 12
; RV32-NEXT: bgez a3, .LBB16_57
@@ -12105,9 +11630,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV32-NEXT: vse16.v v8, (a5)
; RV32-NEXT: lh a3, 1512(a4)
-; RV32-NEXT: sb a3, 0(a0)
-; RV32-NEXT: srli a3, a3, 8
-; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: sh a3, 0(a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: slli a3, a2, 11
; RV32-NEXT: bgez a3, .LBB16_58
@@ -12119,9 +11642,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV32-NEXT: vse16.v v8, (a5)
; RV32-NEXT: lh a3, 1386(a4)
-; RV32-NEXT: sb a3, 0(a0)
-; RV32-NEXT: srli a3, a3, 8
-; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: sh a3, 0(a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: slli a3, a2, 10
; RV32-NEXT: bgez a3, .LBB16_59
@@ -12133,9 +11654,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV32-NEXT: vse16.v v8, (a5)
; RV32-NEXT: lh a3, 1260(a4)
-; RV32-NEXT: sb a3, 0(a0)
-; RV32-NEXT: srli a3, a3, 8
-; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: sh a3, 0(a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: slli a3, a2, 9
; RV32-NEXT: bgez a3, .LBB16_60
@@ -12147,9 +11666,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV32-NEXT: vse16.v v8, (a5)
; RV32-NEXT: lh a3, 1134(a4)
-; RV32-NEXT: sb a3, 0(a0)
-; RV32-NEXT: srli a3, a3, 8
-; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: sh a3, 0(a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: slli a3, a2, 8
; RV32-NEXT: bgez a3, .LBB16_61
@@ -12161,9 +11678,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV32-NEXT: vse16.v v8, (a5)
; RV32-NEXT: lh a3, 1008(a4)
-; RV32-NEXT: sb a3, 0(a0)
-; RV32-NEXT: srli a3, a3, 8
-; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: sh a3, 0(a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: slli a3, a2, 7
; RV32-NEXT: bgez a3, .LBB16_62
@@ -12175,9 +11690,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV32-NEXT: vse16.v v8, (a5)
; RV32-NEXT: lh a3, 882(a4)
-; RV32-NEXT: sb a3, 0(a0)
-; RV32-NEXT: srli a3, a3, 8
-; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: sh a3, 0(a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: slli a3, a2, 6
; RV32-NEXT: bgez a3, .LBB16_63
@@ -12189,9 +11702,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV32-NEXT: vse16.v v8, (a5)
; RV32-NEXT: lh a3, 756(a4)
-; RV32-NEXT: sb a3, 0(a0)
-; RV32-NEXT: srli a3, a3, 8
-; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: sh a3, 0(a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: slli a3, a2, 5
; RV32-NEXT: bgez a3, .LBB16_64
@@ -12203,9 +11714,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV32-NEXT: vse16.v v8, (a5)
; RV32-NEXT: lh a3, 630(a4)
-; RV32-NEXT: sb a3, 0(a0)
-; RV32-NEXT: srli a3, a3, 8
-; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: sh a3, 0(a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: slli a3, a2, 4
; RV32-NEXT: bgez a3, .LBB16_65
@@ -12217,9 +11726,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV32-NEXT: vse16.v v8, (a5)
; RV32-NEXT: lh a3, 504(a4)
-; RV32-NEXT: sb a3, 0(a0)
-; RV32-NEXT: srli a3, a3, 8
-; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: sh a3, 0(a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: slli a3, a2, 3
; RV32-NEXT: bgez a3, .LBB16_66
@@ -12231,9 +11738,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV32-NEXT: vse16.v v8, (a5)
; RV32-NEXT: lh a3, 378(a4)
-; RV32-NEXT: sb a3, 0(a0)
-; RV32-NEXT: srli a3, a3, 8
-; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: sh a3, 0(a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: slli a3, a2, 2
; RV32-NEXT: bltz a3, .LBB16_67
@@ -12246,162 +11751,122 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: vsetvli zero, a2, e16, m8, ta, ma
; RV32-NEXT: vse16.v v8, (a5)
; RV32-NEXT: lh a2, 0(a4)
-; RV32-NEXT: sb a2, 0(a0)
-; RV32-NEXT: srli a2, a2, 8
-; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: sh a2, 0(a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: andi a2, a3, 1
; RV32-NEXT: beqz a2, .LBB16_72
; RV32-NEXT: .LBB16_199: # %cond.store190
; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32-NEXT: vmv.x.s a2, v16
-; RV32-NEXT: sb a2, 0(a0)
-; RV32-NEXT: srli a2, a2, 8
-; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: vse16.v v16, (a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: andi a2, a3, 2
; RV32-NEXT: beqz a2, .LBB16_73
; RV32-NEXT: .LBB16_200: # %cond.store193
; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV32-NEXT: vslidedown.vi v8, v16, 1
-; RV32-NEXT: vmv.x.s a2, v8
-; RV32-NEXT: sb a2, 0(a0)
-; RV32-NEXT: srli a2, a2, 8
-; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: vse16.v v8, (a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: andi a2, a3, 4
; RV32-NEXT: beqz a2, .LBB16_74
; RV32-NEXT: .LBB16_201: # %cond.store196
; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV32-NEXT: vslidedown.vi v8, v16, 2
-; RV32-NEXT: vmv.x.s a2, v8
-; RV32-NEXT: sb a2, 0(a0)
-; RV32-NEXT: srli a2, a2, 8
-; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: vse16.v v8, (a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: andi a2, a3, 8
; RV32-NEXT: beqz a2, .LBB16_75
; RV32-NEXT: .LBB16_202: # %cond.store199
; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV32-NEXT: vslidedown.vi v8, v16, 3
-; RV32-NEXT: vmv.x.s a2, v8
-; RV32-NEXT: sb a2, 0(a0)
-; RV32-NEXT: srli a2, a2, 8
-; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: vse16.v v8, (a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: andi a2, a3, 16
; RV32-NEXT: beqz a2, .LBB16_76
; RV32-NEXT: .LBB16_203: # %cond.store202
; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV32-NEXT: vslidedown.vi v8, v16, 4
-; RV32-NEXT: vmv.x.s a2, v8
-; RV32-NEXT: sb a2, 0(a0)
-; RV32-NEXT: srli a2, a2, 8
-; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: vse16.v v8, (a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: andi a2, a3, 32
; RV32-NEXT: beqz a2, .LBB16_77
; RV32-NEXT: .LBB16_204: # %cond.store205
; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV32-NEXT: vslidedown.vi v8, v16, 5
-; RV32-NEXT: vmv.x.s a2, v8
-; RV32-NEXT: sb a2, 0(a0)
-; RV32-NEXT: srli a2, a2, 8
-; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: vse16.v v8, (a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: andi a2, a3, 64
; RV32-NEXT: beqz a2, .LBB16_78
; RV32-NEXT: .LBB16_205: # %cond.store208
; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV32-NEXT: vslidedown.vi v8, v16, 6
-; RV32-NEXT: vmv.x.s a2, v8
-; RV32-NEXT: sb a2, 0(a0)
-; RV32-NEXT: srli a2, a2, 8
-; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: vse16.v v8, (a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: andi a2, a3, 128
; RV32-NEXT: beqz a2, .LBB16_79
; RV32-NEXT: .LBB16_206: # %cond.store211
; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV32-NEXT: vslidedown.vi v8, v16, 7
-; RV32-NEXT: vmv.x.s a2, v8
-; RV32-NEXT: sb a2, 0(a0)
-; RV32-NEXT: srli a2, a2, 8
-; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: vse16.v v8, (a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: andi a2, a3, 256
; RV32-NEXT: beqz a2, .LBB16_80
; RV32-NEXT: .LBB16_207: # %cond.store214
; RV32-NEXT: vsetivli zero, 1, e16, m2, ta, ma
; RV32-NEXT: vslidedown.vi v8, v16, 8
-; RV32-NEXT: vmv.x.s a2, v8
-; RV32-NEXT: sb a2, 0(a0)
-; RV32-NEXT: srli a2, a2, 8
-; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT: vse16.v v8, (a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: andi a2, a3, 512
; RV32-NEXT: beqz a2, .LBB16_81
; RV32-NEXT: .LBB16_208: # %cond.store217
; RV32-NEXT: vsetivli zero, 1, e16, m2, ta, ma
; RV32-NEXT: vslidedown.vi v8, v16, 9
-; RV32-NEXT: vmv.x.s a2, v8
-; RV32-NEXT: sb a2, 0(a0)
-; RV32-NEXT: srli a2, a2, 8
-; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT: vse16.v v8, (a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: andi a2, a3, 1024
; RV32-NEXT: beqz a2, .LBB16_82
; RV32-NEXT: .LBB16_209: # %cond.store220
; RV32-NEXT: vsetivli zero, 1, e16, m2, ta, ma
; RV32-NEXT: vslidedown.vi v8, v16, 10
-; RV32-NEXT: vmv.x.s a2, v8
-; RV32-NEXT: sb a2, 0(a0)
-; RV32-NEXT: srli a2, a2, 8
-; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT: vse16.v v8, (a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: slli a2, a3, 20
; RV32-NEXT: bgez a2, .LBB16_83
; RV32-NEXT: .LBB16_210: # %cond.store223
; RV32-NEXT: vsetivli zero, 1, e16, m2, ta, ma
; RV32-NEXT: vslidedown.vi v8, v16, 11
-; RV32-NEXT: vmv.x.s a2, v8
-; RV32-NEXT: sb a2, 0(a0)
-; RV32-NEXT: srli a2, a2, 8
-; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT: vse16.v v8, (a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: slli a2, a3, 19
; RV32-NEXT: bgez a2, .LBB16_84
; RV32-NEXT: .LBB16_211: # %cond.store226
; RV32-NEXT: vsetivli zero, 1, e16, m2, ta, ma
; RV32-NEXT: vslidedown.vi v8, v16, 12
-; RV32-NEXT: vmv.x.s a2, v8
-; RV32-NEXT: sb a2, 0(a0)
-; RV32-NEXT: srli a2, a2, 8
-; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT: vse16.v v8, (a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: slli a2, a3, 18
; RV32-NEXT: bgez a2, .LBB16_85
; RV32-NEXT: .LBB16_212: # %cond.store229
; RV32-NEXT: vsetivli zero, 1, e16, m2, ta, ma
; RV32-NEXT: vslidedown.vi v8, v16, 13
-; RV32-NEXT: vmv.x.s a2, v8
-; RV32-NEXT: sb a2, 0(a0)
-; RV32-NEXT: srli a2, a2, 8
-; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT: vse16.v v8, (a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: slli a2, a3, 17
; RV32-NEXT: bgez a2, .LBB16_86
; RV32-NEXT: .LBB16_213: # %cond.store232
; RV32-NEXT: vsetivli zero, 1, e16, m2, ta, ma
; RV32-NEXT: vslidedown.vi v8, v16, 14
-; RV32-NEXT: vmv.x.s a2, v8
-; RV32-NEXT: sb a2, 0(a0)
-; RV32-NEXT: srli a2, a2, 8
-; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT: vse16.v v8, (a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: slli a2, a3, 16
-; RV32-NEXT: bgez a2, .LBB16_88
-; RV32-NEXT: j .LBB16_87
+; RV32-NEXT: bltz a2, .LBB16_87
+; RV32-NEXT: j .LBB16_88
; RV32-NEXT: .LBB16_214: # %cond.store238
; RV32-NEXT: li a4, 64
; RV32-NEXT: li a5, 3
@@ -12410,13 +11875,10 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: vsetvli zero, a4, e16, m8, ta, ma
; RV32-NEXT: vse16.v v16, (a5)
; RV32-NEXT: lh a4, 2016(a2)
-; RV32-NEXT: sb a4, 0(a0)
-; RV32-NEXT: srli a4, a4, 8
-; RV32-NEXT: sb a4, 1(a0)
+; RV32-NEXT: sh a4, 0(a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: slli a4, a3, 14
-; RV32-NEXT: bltz a4, .LBB16_215
-; RV32-NEXT: j .LBB16_90
+; RV32-NEXT: bgez a4, .LBB16_90
; RV32-NEXT: .LBB16_215: # %cond.store241
; RV32-NEXT: li a4, 64
; RV32-NEXT: lui a5, 1
@@ -12425,13 +11887,10 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: vsetvli zero, a4, e16, m8, ta, ma
; RV32-NEXT: vse16.v v16, (a5)
; RV32-NEXT: lh a4, 1890(a2)
-; RV32-NEXT: sb a4, 0(a0)
-; RV32-NEXT: srli a4, a4, 8
-; RV32-NEXT: sb a4, 1(a0)
+; RV32-NEXT: sh a4, 0(a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: slli a4, a3, 13
-; RV32-NEXT: bltz a4, .LBB16_216
-; RV32-NEXT: j .LBB16_91
+; RV32-NEXT: bgez a4, .LBB16_91
; RV32-NEXT: .LBB16_216: # %cond.store244
; RV32-NEXT: li a4, 64
; RV32-NEXT: li a5, 23
@@ -12440,13 +11899,10 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: vsetvli zero, a4, e16, m8, ta, ma
; RV32-NEXT: vse16.v v16, (a5)
; RV32-NEXT: lh a4, 1764(a2)
-; RV32-NEXT: sb a4, 0(a0)
-; RV32-NEXT: srli a4, a4, 8
-; RV32-NEXT: sb a4, 1(a0)
+; RV32-NEXT: sh a4, 0(a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: slli a4, a3, 12
-; RV32-NEXT: bltz a4, .LBB16_217
-; RV32-NEXT: j .LBB16_92
+; RV32-NEXT: bgez a4, .LBB16_92
; RV32-NEXT: .LBB16_217: # %cond.store247
; RV32-NEXT: li a4, 64
; RV32-NEXT: lui a5, 1
@@ -12455,13 +11911,10 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: vsetvli zero, a4, e16, m8, ta, ma
; RV32-NEXT: vse16.v v16, (a5)
; RV32-NEXT: lh a4, 1638(a2)
-; RV32-NEXT: sb a4, 0(a0)
-; RV32-NEXT: srli a4, a4, 8
-; RV32-NEXT: sb a4, 1(a0)
+; RV32-NEXT: sh a4, 0(a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: slli a4, a3, 11
-; RV32-NEXT: bltz a4, .LBB16_218
-; RV32-NEXT: j .LBB16_93
+; RV32-NEXT: bgez a4, .LBB16_93
; RV32-NEXT: .LBB16_218: # %cond.store250
; RV32-NEXT: li a4, 64
; RV32-NEXT: li a5, 11
@@ -12470,13 +11923,10 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: vsetvli zero, a4, e16, m8, ta, ma
; RV32-NEXT: vse16.v v16, (a5)
; RV32-NEXT: lh a4, 1512(a2)
-; RV32-NEXT: sb a4, 0(a0)
-; RV32-NEXT: srli a4, a4, 8
-; RV32-NEXT: sb a4, 1(a0)
+; RV32-NEXT: sh a4, 0(a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: slli a4, a3, 10
-; RV32-NEXT: bltz a4, .LBB16_219
-; RV32-NEXT: j .LBB16_94
+; RV32-NEXT: bgez a4, .LBB16_94
; RV32-NEXT: .LBB16_219: # %cond.store253
; RV32-NEXT: li a4, 64
; RV32-NEXT: lui a5, 1
@@ -12485,13 +11935,10 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: vsetvli zero, a4, e16, m8, ta, ma
; RV32-NEXT: vse16.v v16, (a5)
; RV32-NEXT: lh a4, 1386(a2)
-; RV32-NEXT: sb a4, 0(a0)
-; RV32-NEXT: srli a4, a4, 8
-; RV32-NEXT: sb a4, 1(a0)
+; RV32-NEXT: sh a4, 0(a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: slli a4, a3, 9
-; RV32-NEXT: bltz a4, .LBB16_220
-; RV32-NEXT: j .LBB16_95
+; RV32-NEXT: bgez a4, .LBB16_95
; RV32-NEXT: .LBB16_220: # %cond.store256
; RV32-NEXT: li a4, 64
; RV32-NEXT: li a5, 21
@@ -12500,13 +11947,10 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: vsetvli zero, a4, e16, m8, ta, ma
; RV32-NEXT: vse16.v v16, (a5)
; RV32-NEXT: lh a4, 1260(a2)
-; RV32-NEXT: sb a4, 0(a0)
-; RV32-NEXT: srli a4, a4, 8
-; RV32-NEXT: sb a4, 1(a0)
+; RV32-NEXT: sh a4, 0(a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: slli a4, a3, 8
-; RV32-NEXT: bltz a4, .LBB16_221
-; RV32-NEXT: j .LBB16_96
+; RV32-NEXT: bgez a4, .LBB16_96
; RV32-NEXT: .LBB16_221: # %cond.store259
; RV32-NEXT: li a4, 64
; RV32-NEXT: lui a5, 1
@@ -12515,13 +11959,10 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: vsetvli zero, a4, e16, m8, ta, ma
; RV32-NEXT: vse16.v v16, (a5)
; RV32-NEXT: lh a4, 1134(a2)
-; RV32-NEXT: sb a4, 0(a0)
-; RV32-NEXT: srli a4, a4, 8
-; RV32-NEXT: sb a4, 1(a0)
+; RV32-NEXT: sh a4, 0(a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: slli a4, a3, 7
-; RV32-NEXT: bltz a4, .LBB16_222
-; RV32-NEXT: j .LBB16_97
+; RV32-NEXT: bgez a4, .LBB16_97
; RV32-NEXT: .LBB16_222: # %cond.store262
; RV32-NEXT: li a4, 64
; RV32-NEXT: li a5, 5
@@ -12530,13 +11971,10 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: vsetvli zero, a4, e16, m8, ta, ma
; RV32-NEXT: vse16.v v16, (a5)
; RV32-NEXT: lh a4, 1008(a2)
-; RV32-NEXT: sb a4, 0(a0)
-; RV32-NEXT: srli a4, a4, 8
-; RV32-NEXT: sb a4, 1(a0)
+; RV32-NEXT: sh a4, 0(a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: slli a4, a3, 6
-; RV32-NEXT: bltz a4, .LBB16_223
-; RV32-NEXT: j .LBB16_98
+; RV32-NEXT: bgez a4, .LBB16_98
; RV32-NEXT: .LBB16_223: # %cond.store265
; RV32-NEXT: li a4, 64
; RV32-NEXT: lui a5, 1
@@ -12545,13 +11983,10 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: vsetvli zero, a4, e16, m8, ta, ma
; RV32-NEXT: vse16.v v16, (a5)
; RV32-NEXT: lh a4, 882(a2)
-; RV32-NEXT: sb a4, 0(a0)
-; RV32-NEXT: srli a4, a4, 8
-; RV32-NEXT: sb a4, 1(a0)
+; RV32-NEXT: sh a4, 0(a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: slli a4, a3, 5
-; RV32-NEXT: bltz a4, .LBB16_224
-; RV32-NEXT: j .LBB16_99
+; RV32-NEXT: bgez a4, .LBB16_99
; RV32-NEXT: .LBB16_224: # %cond.store268
; RV32-NEXT: li a4, 64
; RV32-NEXT: li a5, 19
@@ -12560,13 +11995,10 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: vsetvli zero, a4, e16, m8, ta, ma
; RV32-NEXT: vse16.v v16, (a5)
; RV32-NEXT: lh a4, 756(a2)
-; RV32-NEXT: sb a4, 0(a0)
-; RV32-NEXT: srli a4, a4, 8
-; RV32-NEXT: sb a4, 1(a0)
+; RV32-NEXT: sh a4, 0(a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: slli a4, a3, 4
-; RV32-NEXT: bltz a4, .LBB16_225
-; RV32-NEXT: j .LBB16_100
+; RV32-NEXT: bgez a4, .LBB16_100
; RV32-NEXT: .LBB16_225: # %cond.store271
; RV32-NEXT: li a4, 64
; RV32-NEXT: lui a5, 1
@@ -12575,13 +12007,10 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: vsetvli zero, a4, e16, m8, ta, ma
; RV32-NEXT: vse16.v v16, (a5)
; RV32-NEXT: lh a4, 630(a2)
-; RV32-NEXT: sb a4, 0(a0)
-; RV32-NEXT: srli a4, a4, 8
-; RV32-NEXT: sb a4, 1(a0)
+; RV32-NEXT: sh a4, 0(a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: slli a4, a3, 3
-; RV32-NEXT: bltz a4, .LBB16_226
-; RV32-NEXT: j .LBB16_101
+; RV32-NEXT: bgez a4, .LBB16_101
; RV32-NEXT: .LBB16_226: # %cond.store274
; RV32-NEXT: li a4, 64
; RV32-NEXT: li a5, 9
@@ -12590,14 +12019,10 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: vsetvli zero, a4, e16, m8, ta, ma
; RV32-NEXT: vse16.v v16, (a5)
; RV32-NEXT: lh a4, 504(a2)
-; RV32-NEXT: sb a4, 0(a0)
-; RV32-NEXT: srli a4, a4, 8
-; RV32-NEXT: sb a4, 1(a0)
+; RV32-NEXT: sh a4, 0(a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: slli a4, a3, 2
-; RV32-NEXT: bgez a4, .LBB16_257
-; RV32-NEXT: j .LBB16_102
-; RV32-NEXT: .LBB16_257: # %cond.store274
+; RV32-NEXT: bltz a4, .LBB16_102
; RV32-NEXT: j .LBB16_103
; RV32-NEXT: .LBB16_227: # %cond.store289
; RV32-NEXT: li a3, 64
@@ -12606,13 +12031,10 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV32-NEXT: vse16.v v16, (a4)
; RV32-NEXT: lh a3, 2016(a2)
-; RV32-NEXT: sb a3, 0(a0)
-; RV32-NEXT: srli a3, a3, 8
-; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: sh a3, 0(a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: andi a3, a1, 4
-; RV32-NEXT: bnez a3, .LBB16_228
-; RV32-NEXT: j .LBB16_111
+; RV32-NEXT: beqz a3, .LBB16_111
; RV32-NEXT: .LBB16_228: # %cond.store292
; RV32-NEXT: li a3, 64
; RV32-NEXT: addi a4, sp, 2047
@@ -12620,13 +12042,10 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV32-NEXT: vse16.v v16, (a4)
; RV32-NEXT: lh a3, 1890(a2)
-; RV32-NEXT: sb a3, 0(a0)
-; RV32-NEXT: srli a3, a3, 8
-; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: sh a3, 0(a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: andi a3, a1, 8
-; RV32-NEXT: bnez a3, .LBB16_229
-; RV32-NEXT: j .LBB16_112
+; RV32-NEXT: beqz a3, .LBB16_112
; RV32-NEXT: .LBB16_229: # %cond.store295
; RV32-NEXT: li a3, 64
; RV32-NEXT: addi a4, sp, 2047
@@ -12634,13 +12053,10 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV32-NEXT: vse16.v v16, (a4)
; RV32-NEXT: lh a3, 1764(a2)
-; RV32-NEXT: sb a3, 0(a0)
-; RV32-NEXT: srli a3, a3, 8
-; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: sh a3, 0(a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: andi a3, a1, 16
-; RV32-NEXT: bnez a3, .LBB16_230
-; RV32-NEXT: j .LBB16_113
+; RV32-NEXT: beqz a3, .LBB16_113
; RV32-NEXT: .LBB16_230: # %cond.store298
; RV32-NEXT: li a3, 64
; RV32-NEXT: addi a4, sp, 2047
@@ -12648,13 +12064,10 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV32-NEXT: vse16.v v16, (a4)
; RV32-NEXT: lh a3, 1638(a2)
-; RV32-NEXT: sb a3, 0(a0)
-; RV32-NEXT: srli a3, a3, 8
-; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: sh a3, 0(a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: andi a3, a1, 32
-; RV32-NEXT: bnez a3, .LBB16_231
-; RV32-NEXT: j .LBB16_114
+; RV32-NEXT: beqz a3, .LBB16_114
; RV32-NEXT: .LBB16_231: # %cond.store301
; RV32-NEXT: li a3, 64
; RV32-NEXT: addi a4, sp, 2047
@@ -12662,13 +12075,10 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV32-NEXT: vse16.v v16, (a4)
; RV32-NEXT: lh a3, 1512(a2)
-; RV32-NEXT: sb a3, 0(a0)
-; RV32-NEXT: srli a3, a3, 8
-; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: sh a3, 0(a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: andi a3, a1, 64
-; RV32-NEXT: bnez a3, .LBB16_232
-; RV32-NEXT: j .LBB16_115
+; RV32-NEXT: beqz a3, .LBB16_115
; RV32-NEXT: .LBB16_232: # %cond.store304
; RV32-NEXT: li a3, 64
; RV32-NEXT: addi a4, sp, 2047
@@ -12676,13 +12086,10 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV32-NEXT: vse16.v v16, (a4)
; RV32-NEXT: lh a3, 1386(a2)
-; RV32-NEXT: sb a3, 0(a0)
-; RV32-NEXT: srli a3, a3, 8
-; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: sh a3, 0(a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: andi a3, a1, 128
-; RV32-NEXT: bnez a3, .LBB16_233
-; RV32-NEXT: j .LBB16_116
+; RV32-NEXT: beqz a3, .LBB16_116
; RV32-NEXT: .LBB16_233: # %cond.store307
; RV32-NEXT: li a3, 64
; RV32-NEXT: addi a4, sp, 2047
@@ -12690,13 +12097,10 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV32-NEXT: vse16.v v16, (a4)
; RV32-NEXT: lh a3, 1260(a2)
-; RV32-NEXT: sb a3, 0(a0)
-; RV32-NEXT: srli a3, a3, 8
-; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: sh a3, 0(a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: andi a3, a1, 256
-; RV32-NEXT: bnez a3, .LBB16_234
-; RV32-NEXT: j .LBB16_117
+; RV32-NEXT: beqz a3, .LBB16_117
; RV32-NEXT: .LBB16_234: # %cond.store310
; RV32-NEXT: li a3, 64
; RV32-NEXT: addi a4, sp, 2047
@@ -12704,13 +12108,10 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV32-NEXT: vse16.v v16, (a4)
; RV32-NEXT: lh a3, 1134(a2)
-; RV32-NEXT: sb a3, 0(a0)
-; RV32-NEXT: srli a3, a3, 8
-; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: sh a3, 0(a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: andi a3, a1, 512
-; RV32-NEXT: bnez a3, .LBB16_235
-; RV32-NEXT: j .LBB16_118
+; RV32-NEXT: beqz a3, .LBB16_118
; RV32-NEXT: .LBB16_235: # %cond.store313
; RV32-NEXT: li a3, 64
; RV32-NEXT: addi a4, sp, 2047
@@ -12718,13 +12119,10 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV32-NEXT: vse16.v v16, (a4)
; RV32-NEXT: lh a3, 1008(a2)
-; RV32-NEXT: sb a3, 0(a0)
-; RV32-NEXT: srli a3, a3, 8
-; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: sh a3, 0(a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: andi a3, a1, 1024
-; RV32-NEXT: bnez a3, .LBB16_236
-; RV32-NEXT: j .LBB16_119
+; RV32-NEXT: beqz a3, .LBB16_119
; RV32-NEXT: .LBB16_236: # %cond.store316
; RV32-NEXT: li a3, 64
; RV32-NEXT: addi a4, sp, 2047
@@ -12732,13 +12130,10 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV32-NEXT: vse16.v v16, (a4)
; RV32-NEXT: lh a3, 882(a2)
-; RV32-NEXT: sb a3, 0(a0)
-; RV32-NEXT: srli a3, a3, 8
-; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: sh a3, 0(a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: slli a3, a1, 20
-; RV32-NEXT: bltz a3, .LBB16_237
-; RV32-NEXT: j .LBB16_120
+; RV32-NEXT: bgez a3, .LBB16_120
; RV32-NEXT: .LBB16_237: # %cond.store319
; RV32-NEXT: li a3, 64
; RV32-NEXT: addi a4, sp, 2047
@@ -12746,13 +12141,10 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV32-NEXT: vse16.v v16, (a4)
; RV32-NEXT: lh a3, 756(a2)
-; RV32-NEXT: sb a3, 0(a0)
-; RV32-NEXT: srli a3, a3, 8
-; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: sh a3, 0(a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: slli a3, a1, 19
-; RV32-NEXT: bltz a3, .LBB16_238
-; RV32-NEXT: j .LBB16_121
+; RV32-NEXT: bgez a3, .LBB16_121
; RV32-NEXT: .LBB16_238: # %cond.store322
; RV32-NEXT: li a3, 64
; RV32-NEXT: addi a4, sp, 2047
@@ -12760,13 +12152,10 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV32-NEXT: vse16.v v16, (a4)
; RV32-NEXT: lh a3, 630(a2)
-; RV32-NEXT: sb a3, 0(a0)
-; RV32-NEXT: srli a3, a3, 8
-; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: sh a3, 0(a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: slli a3, a1, 18
-; RV32-NEXT: bltz a3, .LBB16_239
-; RV32-NEXT: j .LBB16_122
+; RV32-NEXT: bgez a3, .LBB16_122
; RV32-NEXT: .LBB16_239: # %cond.store325
; RV32-NEXT: li a3, 64
; RV32-NEXT: addi a4, sp, 2047
@@ -12774,13 +12163,10 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV32-NEXT: vse16.v v16, (a4)
; RV32-NEXT: lh a3, 504(a2)
-; RV32-NEXT: sb a3, 0(a0)
-; RV32-NEXT: srli a3, a3, 8
-; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: sh a3, 0(a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: slli a3, a1, 17
-; RV32-NEXT: bltz a3, .LBB16_240
-; RV32-NEXT: j .LBB16_123
+; RV32-NEXT: bgez a3, .LBB16_123
; RV32-NEXT: .LBB16_240: # %cond.store328
; RV32-NEXT: li a3, 64
; RV32-NEXT: addi a4, sp, 2047
@@ -12788,13 +12174,10 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV32-NEXT: vse16.v v16, (a4)
; RV32-NEXT: lh a3, 378(a2)
-; RV32-NEXT: sb a3, 0(a0)
-; RV32-NEXT: srli a3, a3, 8
-; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: sh a3, 0(a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: slli a3, a1, 16
-; RV32-NEXT: bltz a3, .LBB16_241
-; RV32-NEXT: j .LBB16_124
+; RV32-NEXT: bgez a3, .LBB16_124
; RV32-NEXT: .LBB16_241: # %cond.store331
; RV32-NEXT: li a3, 64
; RV32-NEXT: addi a4, sp, 2047
@@ -12802,13 +12185,10 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV32-NEXT: vse16.v v16, (a4)
; RV32-NEXT: lh a3, 252(a2)
-; RV32-NEXT: sb a3, 0(a0)
-; RV32-NEXT: srli a3, a3, 8
-; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: sh a3, 0(a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: slli a3, a1, 15
-; RV32-NEXT: bltz a3, .LBB16_242
-; RV32-NEXT: j .LBB16_125
+; RV32-NEXT: bgez a3, .LBB16_125
; RV32-NEXT: .LBB16_242: # %cond.store334
; RV32-NEXT: li a3, 64
; RV32-NEXT: addi a4, sp, 2047
@@ -12816,35 +12196,27 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV32-NEXT: vse16.v v16, (a4)
; RV32-NEXT: lh a3, 126(a2)
-; RV32-NEXT: sb a3, 0(a0)
-; RV32-NEXT: srli a3, a3, 8
-; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: sh a3, 0(a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: slli a3, a1, 14
-; RV32-NEXT: bltz a3, .LBB16_243
-; RV32-NEXT: j .LBB16_126
+; RV32-NEXT: bgez a3, .LBB16_126
; RV32-NEXT: .LBB16_243: # %cond.store337
; RV32-NEXT: li a3, 64
; RV32-NEXT: addi a4, sp, 1920
; RV32-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; RV32-NEXT: vse16.v v16, (a4)
; RV32-NEXT: lh a2, 0(a2)
-; RV32-NEXT: sb a2, 0(a0)
-; RV32-NEXT: srli a2, a2, 8
-; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: sh a2, 0(a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: slli a2, a1, 13
-; RV32-NEXT: bltz a2, .LBB16_244
-; RV32-NEXT: j .LBB16_127
+; RV32-NEXT: bgez a2, .LBB16_127
; RV32-NEXT: .LBB16_244: # %cond.store340
; RV32-NEXT: li a2, 64
; RV32-NEXT: addi a3, sp, 1792
; RV32-NEXT: vsetvli zero, a2, e16, m8, ta, ma
; RV32-NEXT: vse16.v v16, (a3)
; RV32-NEXT: lh a2, 1892(sp)
-; RV32-NEXT: sb a2, 0(a0)
-; RV32-NEXT: srli a2, a2, 8
-; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: sh a2, 0(a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: slli a2, a1, 12
; RV32-NEXT: bltz a2, .LBB16_245
@@ -12855,9 +12227,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: vsetvli zero, a2, e16, m8, ta, ma
; RV32-NEXT: vse16.v v16, (a3)
; RV32-NEXT: lh a2, 1766(sp)
-; RV32-NEXT: sb a2, 0(a0)
-; RV32-NEXT: srli a2, a2, 8
-; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: sh a2, 0(a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: slli a2, a1, 11
; RV32-NEXT: bltz a2, .LBB16_246
@@ -12868,9 +12238,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: vsetvli zero, a2, e16, m8, ta, ma
; RV32-NEXT: vse16.v v16, (a3)
; RV32-NEXT: lh a2, 1640(sp)
-; RV32-NEXT: sb a2, 0(a0)
-; RV32-NEXT: srli a2, a2, 8
-; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: sh a2, 0(a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: slli a2, a1, 10
; RV32-NEXT: bltz a2, .LBB16_247
@@ -12881,9 +12249,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: vsetvli zero, a2, e16, m8, ta, ma
; RV32-NEXT: vse16.v v16, (a3)
; RV32-NEXT: lh a2, 1514(sp)
-; RV32-NEXT: sb a2, 0(a0)
-; RV32-NEXT: srli a2, a2, 8
-; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: sh a2, 0(a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: slli a2, a1, 9
; RV32-NEXT: bltz a2, .LBB16_248
@@ -12894,9 +12260,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: vsetvli zero, a2, e16, m8, ta, ma
; RV32-NEXT: vse16.v v16, (a3)
; RV32-NEXT: lh a2, 1388(sp)
-; RV32-NEXT: sb a2, 0(a0)
-; RV32-NEXT: srli a2, a2, 8
-; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: sh a2, 0(a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: slli a2, a1, 8
; RV32-NEXT: bltz a2, .LBB16_249
@@ -12907,9 +12271,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: vsetvli zero, a2, e16, m8, ta, ma
; RV32-NEXT: vse16.v v16, (a3)
; RV32-NEXT: lh a2, 1262(sp)
-; RV32-NEXT: sb a2, 0(a0)
-; RV32-NEXT: srli a2, a2, 8
-; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: sh a2, 0(a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: slli a2, a1, 7
; RV32-NEXT: bltz a2, .LBB16_250
@@ -12920,9 +12282,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: vsetvli zero, a2, e16, m8, ta, ma
; RV32-NEXT: vse16.v v16, (a3)
; RV32-NEXT: lh a2, 1136(sp)
-; RV32-NEXT: sb a2, 0(a0)
-; RV32-NEXT: srli a2, a2, 8
-; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: sh a2, 0(a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: slli a2, a1, 6
; RV32-NEXT: bltz a2, .LBB16_251
@@ -12933,9 +12293,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: vsetvli zero, a2, e16, m8, ta, ma
; RV32-NEXT: vse16.v v16, (a3)
; RV32-NEXT: lh a2, 1010(sp)
-; RV32-NEXT: sb a2, 0(a0)
-; RV32-NEXT: srli a2, a2, 8
-; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: sh a2, 0(a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: slli a2, a1, 5
; RV32-NEXT: bltz a2, .LBB16_252
@@ -12946,9 +12304,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: vsetvli zero, a2, e16, m8, ta, ma
; RV32-NEXT: vse16.v v16, (a3)
; RV32-NEXT: lh a2, 884(sp)
-; RV32-NEXT: sb a2, 0(a0)
-; RV32-NEXT: srli a2, a2, 8
-; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: sh a2, 0(a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: slli a2, a1, 4
; RV32-NEXT: bltz a2, .LBB16_253
@@ -12959,9 +12315,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: vsetvli zero, a2, e16, m8, ta, ma
; RV32-NEXT: vse16.v v16, (a3)
; RV32-NEXT: lh a2, 758(sp)
-; RV32-NEXT: sb a2, 0(a0)
-; RV32-NEXT: srli a2, a2, 8
-; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: sh a2, 0(a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: slli a2, a1, 3
; RV32-NEXT: bltz a2, .LBB16_254
@@ -12972,9 +12326,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: vsetvli zero, a2, e16, m8, ta, ma
; RV32-NEXT: vse16.v v16, (a3)
; RV32-NEXT: lh a2, 632(sp)
-; RV32-NEXT: sb a2, 0(a0)
-; RV32-NEXT: srli a2, a2, 8
-; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: sh a2, 0(a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: slli a2, a1, 2
; RV32-NEXT: bltz a2, .LBB16_255
@@ -12985,9 +12337,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: vsetvli zero, a2, e16, m8, ta, ma
; RV32-NEXT: vse16.v v16, (a3)
; RV32-NEXT: lh a2, 506(sp)
-; RV32-NEXT: sb a2, 0(a0)
-; RV32-NEXT: srli a2, a2, 8
-; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: sh a2, 0(a0)
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: slli a2, a1, 1
; RV32-NEXT: bltz a2, .LBB16_256
@@ -12998,13 +12348,11 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV32-NEXT: vsetvli zero, a2, e16, m8, ta, ma
; RV32-NEXT: vse16.v v16, (a3)
; RV32-NEXT: lh a2, 380(sp)
-; RV32-NEXT: sb a2, 0(a0)
-; RV32-NEXT: srli a2, a2, 8
-; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: sh a2, 0(a0)
; RV32-NEXT: addi a0, a0, 2
-; RV32-NEXT: bgez a1, .LBB16_258
+; RV32-NEXT: bgez a1, .LBB16_257
; RV32-NEXT: j .LBB16_140
-; RV32-NEXT: .LBB16_258: # %cond.store376
+; RV32-NEXT: .LBB16_257: # %cond.store376
; RV32-NEXT: j .LBB16_141
entry:
tail call void @llvm.masked.compressstore.v128i16(<128 x i16> %data, ptr align 2 %p, <128 x i1> %mask)
@@ -13184,14 +12532,8 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV64-NEXT: .LBB23_8: # %cond.store19
; RV64-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV64-NEXT: vslidedown.vi v24, v8, 7
-; RV64-NEXT: vmv.x.s a2, v24
-; RV64-NEXT: sb a2, 0(a0)
-; RV64-NEXT: srli a3, a2, 24
-; RV64-NEXT: sb a3, 3(a0)
-; RV64-NEXT: srli a3, a2, 16
-; RV64-NEXT: sb a3, 2(a0)
-; RV64-NEXT: srli a2, a2, 8
-; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV64-NEXT: vse32.v v24, (a0)
; RV64-NEXT: addi a0, a0, 4
; RV64-NEXT: .LBB23_9: # %else20
; RV64-NEXT: addi sp, sp, -2032
@@ -13218,13 +12560,7 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV64-NEXT: lui a2, 2
; RV64-NEXT: add a2, sp, a2
; RV64-NEXT: lw a2, -2016(a2)
-; RV64-NEXT: sb a2, 0(a0)
-; RV64-NEXT: srli a3, a2, 24
-; RV64-NEXT: sb a3, 3(a0)
-; RV64-NEXT: srli a3, a2, 16
-; RV64-NEXT: sb a3, 2(a0)
-; RV64-NEXT: srli a2, a2, 8
-; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: sw a2, 0(a0)
; RV64-NEXT: addi a0, a0, 4
; RV64-NEXT: .LBB23_11: # %else23
; RV64-NEXT: andi a3, a1, 512
@@ -13286,13 +12622,7 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma
; RV64-NEXT: vse32.v v8, (a4)
; RV64-NEXT: lw a2, 0(a2)
-; RV64-NEXT: sb a2, 0(a0)
-; RV64-NEXT: srli a3, a2, 24
-; RV64-NEXT: sb a3, 3(a0)
-; RV64-NEXT: srli a3, a2, 16
-; RV64-NEXT: sb a3, 2(a0)
-; RV64-NEXT: srli a2, a2, 8
-; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: sw a2, 0(a0)
; RV64-NEXT: addi a0, a0, 4
; RV64-NEXT: .LBB23_29: # %else74
; RV64-NEXT: slli a3, a1, 37
@@ -13414,13 +12744,7 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV64-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; RV64-NEXT: vse32.v v16, (a2)
; RV64-NEXT: lw a1, 252(sp)
-; RV64-NEXT: sb a1, 0(a0)
-; RV64-NEXT: srli a2, a1, 24
-; RV64-NEXT: sb a2, 3(a0)
-; RV64-NEXT: srli a2, a1, 16
-; RV64-NEXT: sb a2, 2(a0)
-; RV64-NEXT: srli a1, a1, 8
-; RV64-NEXT: sb a1, 1(a0)
+; RV64-NEXT: sw a1, 0(a0)
; RV64-NEXT: .LBB23_68: # %else188
; RV64-NEXT: li a0, 25
; RV64-NEXT: slli a0, a0, 8
@@ -13433,99 +12757,53 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV64-NEXT: addi sp, sp, 2032
; RV64-NEXT: ret
; RV64-NEXT: .LBB23_69: # %cond.store
-; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; RV64-NEXT: vmv.x.s a2, v8
-; RV64-NEXT: sb a2, 0(a0)
-; RV64-NEXT: srli a3, a2, 24
-; RV64-NEXT: sb a3, 3(a0)
-; RV64-NEXT: srli a3, a2, 16
-; RV64-NEXT: sb a3, 2(a0)
-; RV64-NEXT: srli a2, a2, 8
-; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV64-NEXT: vse32.v v8, (a0)
; RV64-NEXT: addi a0, a0, 4
; RV64-NEXT: andi a2, a1, 2
; RV64-NEXT: beqz a2, .LBB23_2
; RV64-NEXT: .LBB23_70: # %cond.store1
; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-NEXT: vslidedown.vi v24, v8, 1
-; RV64-NEXT: vmv.x.s a2, v24
-; RV64-NEXT: sb a2, 0(a0)
-; RV64-NEXT: srli a3, a2, 24
-; RV64-NEXT: sb a3, 3(a0)
-; RV64-NEXT: srli a3, a2, 16
-; RV64-NEXT: sb a3, 2(a0)
-; RV64-NEXT: srli a2, a2, 8
-; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: vse32.v v24, (a0)
; RV64-NEXT: addi a0, a0, 4
; RV64-NEXT: andi a2, a1, 4
; RV64-NEXT: beqz a2, .LBB23_3
; RV64-NEXT: .LBB23_71: # %cond.store4
; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-NEXT: vslidedown.vi v24, v8, 2
-; RV64-NEXT: vmv.x.s a2, v24
-; RV64-NEXT: sb a2, 0(a0)
-; RV64-NEXT: srli a3, a2, 24
-; RV64-NEXT: sb a3, 3(a0)
-; RV64-NEXT: srli a3, a2, 16
-; RV64-NEXT: sb a3, 2(a0)
-; RV64-NEXT: srli a2, a2, 8
-; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: vse32.v v24, (a0)
; RV64-NEXT: addi a0, a0, 4
; RV64-NEXT: andi a2, a1, 8
; RV64-NEXT: beqz a2, .LBB23_4
; RV64-NEXT: .LBB23_72: # %cond.store7
; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-NEXT: vslidedown.vi v24, v8, 3
-; RV64-NEXT: vmv.x.s a2, v24
-; RV64-NEXT: sb a2, 0(a0)
-; RV64-NEXT: srli a3, a2, 24
-; RV64-NEXT: sb a3, 3(a0)
-; RV64-NEXT: srli a3, a2, 16
-; RV64-NEXT: sb a3, 2(a0)
-; RV64-NEXT: srli a2, a2, 8
-; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: vse32.v v24, (a0)
; RV64-NEXT: addi a0, a0, 4
; RV64-NEXT: andi a2, a1, 16
; RV64-NEXT: beqz a2, .LBB23_5
; RV64-NEXT: .LBB23_73: # %cond.store10
; RV64-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV64-NEXT: vslidedown.vi v24, v8, 4
-; RV64-NEXT: vmv.x.s a2, v24
-; RV64-NEXT: sb a2, 0(a0)
-; RV64-NEXT: srli a3, a2, 24
-; RV64-NEXT: sb a3, 3(a0)
-; RV64-NEXT: srli a3, a2, 16
-; RV64-NEXT: sb a3, 2(a0)
-; RV64-NEXT: srli a2, a2, 8
-; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV64-NEXT: vse32.v v24, (a0)
; RV64-NEXT: addi a0, a0, 4
; RV64-NEXT: andi a2, a1, 32
; RV64-NEXT: beqz a2, .LBB23_6
; RV64-NEXT: .LBB23_74: # %cond.store13
; RV64-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV64-NEXT: vslidedown.vi v24, v8, 5
-; RV64-NEXT: vmv.x.s a2, v24
-; RV64-NEXT: sb a2, 0(a0)
-; RV64-NEXT: srli a3, a2, 24
-; RV64-NEXT: sb a3, 3(a0)
-; RV64-NEXT: srli a3, a2, 16
-; RV64-NEXT: sb a3, 2(a0)
-; RV64-NEXT: srli a2, a2, 8
-; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV64-NEXT: vse32.v v24, (a0)
; RV64-NEXT: addi a0, a0, 4
; RV64-NEXT: andi a2, a1, 64
; RV64-NEXT: beqz a2, .LBB23_7
; RV64-NEXT: .LBB23_75: # %cond.store16
; RV64-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV64-NEXT: vslidedown.vi v24, v8, 6
-; RV64-NEXT: vmv.x.s a2, v24
-; RV64-NEXT: sb a2, 0(a0)
-; RV64-NEXT: srli a3, a2, 24
-; RV64-NEXT: sb a3, 3(a0)
-; RV64-NEXT: srli a3, a2, 16
-; RV64-NEXT: sb a3, 2(a0)
-; RV64-NEXT: srli a2, a2, 8
-; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV64-NEXT: vse32.v v24, (a0)
; RV64-NEXT: addi a0, a0, 4
; RV64-NEXT: andi a2, a1, 128
; RV64-NEXT: bnez a2, .LBB23_8
@@ -13538,13 +12816,7 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma
; RV64-NEXT: vse32.v v8, (a4)
; RV64-NEXT: lw a3, 1984(a2)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a4, a3, 24
-; RV64-NEXT: sb a4, 3(a0)
-; RV64-NEXT: srli a4, a3, 16
-; RV64-NEXT: sb a4, 2(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sw a3, 0(a0)
; RV64-NEXT: addi a0, a0, 4
; RV64-NEXT: andi a3, a1, 1024
; RV64-NEXT: beqz a3, .LBB23_13
@@ -13556,13 +12828,7 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma
; RV64-NEXT: vse32.v v8, (a4)
; RV64-NEXT: lw a3, 1860(a2)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a4, a3, 24
-; RV64-NEXT: sb a4, 3(a0)
-; RV64-NEXT: srli a4, a3, 16
-; RV64-NEXT: sb a4, 2(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sw a3, 0(a0)
; RV64-NEXT: addi a0, a0, 4
; RV64-NEXT: slli a3, a1, 52
; RV64-NEXT: bgez a3, .LBB23_14
@@ -13574,13 +12840,7 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma
; RV64-NEXT: vse32.v v8, (a4)
; RV64-NEXT: lw a3, 1736(a2)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a4, a3, 24
-; RV64-NEXT: sb a4, 3(a0)
-; RV64-NEXT: srli a4, a3, 16
-; RV64-NEXT: sb a4, 2(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sw a3, 0(a0)
; RV64-NEXT: addi a0, a0, 4
; RV64-NEXT: slli a3, a1, 51
; RV64-NEXT: bgez a3, .LBB23_15
@@ -13592,13 +12852,7 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma
; RV64-NEXT: vse32.v v8, (a4)
; RV64-NEXT: lw a3, 1612(a2)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a4, a3, 24
-; RV64-NEXT: sb a4, 3(a0)
-; RV64-NEXT: srli a4, a3, 16
-; RV64-NEXT: sb a4, 2(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sw a3, 0(a0)
; RV64-NEXT: addi a0, a0, 4
; RV64-NEXT: slli a3, a1, 50
; RV64-NEXT: bgez a3, .LBB23_16
@@ -13610,13 +12864,7 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma
; RV64-NEXT: vse32.v v8, (a4)
; RV64-NEXT: lw a3, 1488(a2)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a4, a3, 24
-; RV64-NEXT: sb a4, 3(a0)
-; RV64-NEXT: srli a4, a3, 16
-; RV64-NEXT: sb a4, 2(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sw a3, 0(a0)
; RV64-NEXT: addi a0, a0, 4
; RV64-NEXT: slli a3, a1, 49
; RV64-NEXT: bgez a3, .LBB23_17
@@ -13628,13 +12876,7 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma
; RV64-NEXT: vse32.v v8, (a4)
; RV64-NEXT: lw a3, 1364(a2)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a4, a3, 24
-; RV64-NEXT: sb a4, 3(a0)
-; RV64-NEXT: srli a4, a3, 16
-; RV64-NEXT: sb a4, 2(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sw a3, 0(a0)
; RV64-NEXT: addi a0, a0, 4
; RV64-NEXT: slli a3, a1, 48
; RV64-NEXT: bgez a3, .LBB23_18
@@ -13646,13 +12888,7 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma
; RV64-NEXT: vse32.v v8, (a4)
; RV64-NEXT: lw a3, 1240(a2)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a4, a3, 24
-; RV64-NEXT: sb a4, 3(a0)
-; RV64-NEXT: srli a4, a3, 16
-; RV64-NEXT: sb a4, 2(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sw a3, 0(a0)
; RV64-NEXT: addi a0, a0, 4
; RV64-NEXT: slli a3, a1, 47
; RV64-NEXT: bgez a3, .LBB23_19
@@ -13664,13 +12900,7 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma
; RV64-NEXT: vse32.v v8, (a4)
; RV64-NEXT: lw a3, 1116(a2)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a4, a3, 24
-; RV64-NEXT: sb a4, 3(a0)
-; RV64-NEXT: srli a4, a3, 16
-; RV64-NEXT: sb a4, 2(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sw a3, 0(a0)
; RV64-NEXT: addi a0, a0, 4
; RV64-NEXT: slli a3, a1, 46
; RV64-NEXT: bgez a3, .LBB23_20
@@ -13682,13 +12912,7 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma
; RV64-NEXT: vse32.v v8, (a4)
; RV64-NEXT: lw a3, 992(a2)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a4, a3, 24
-; RV64-NEXT: sb a4, 3(a0)
-; RV64-NEXT: srli a4, a3, 16
-; RV64-NEXT: sb a4, 2(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sw a3, 0(a0)
; RV64-NEXT: addi a0, a0, 4
; RV64-NEXT: slli a3, a1, 45
; RV64-NEXT: bgez a3, .LBB23_21
@@ -13700,13 +12924,7 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma
; RV64-NEXT: vse32.v v8, (a4)
; RV64-NEXT: lw a3, 868(a2)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a4, a3, 24
-; RV64-NEXT: sb a4, 3(a0)
-; RV64-NEXT: srli a4, a3, 16
-; RV64-NEXT: sb a4, 2(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sw a3, 0(a0)
; RV64-NEXT: addi a0, a0, 4
; RV64-NEXT: slli a3, a1, 44
; RV64-NEXT: bgez a3, .LBB23_22
@@ -13718,13 +12936,7 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma
; RV64-NEXT: vse32.v v8, (a4)
; RV64-NEXT: lw a3, 744(a2)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a4, a3, 24
-; RV64-NEXT: sb a4, 3(a0)
-; RV64-NEXT: srli a4, a3, 16
-; RV64-NEXT: sb a4, 2(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sw a3, 0(a0)
; RV64-NEXT: addi a0, a0, 4
; RV64-NEXT: slli a3, a1, 43
; RV64-NEXT: bgez a3, .LBB23_23
@@ -13736,13 +12948,7 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma
; RV64-NEXT: vse32.v v8, (a4)
; RV64-NEXT: lw a3, 620(a2)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a4, a3, 24
-; RV64-NEXT: sb a4, 3(a0)
-; RV64-NEXT: srli a4, a3, 16
-; RV64-NEXT: sb a4, 2(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sw a3, 0(a0)
; RV64-NEXT: addi a0, a0, 4
; RV64-NEXT: slli a3, a1, 42
; RV64-NEXT: bgez a3, .LBB23_24
@@ -13754,13 +12960,7 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma
; RV64-NEXT: vse32.v v8, (a4)
; RV64-NEXT: lw a3, 496(a2)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a4, a3, 24
-; RV64-NEXT: sb a4, 3(a0)
-; RV64-NEXT: srli a4, a3, 16
-; RV64-NEXT: sb a4, 2(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sw a3, 0(a0)
; RV64-NEXT: addi a0, a0, 4
; RV64-NEXT: slli a3, a1, 41
; RV64-NEXT: bgez a3, .LBB23_25
@@ -13772,13 +12972,7 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma
; RV64-NEXT: vse32.v v8, (a4)
; RV64-NEXT: lw a3, 372(a2)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a4, a3, 24
-; RV64-NEXT: sb a4, 3(a0)
-; RV64-NEXT: srli a4, a3, 16
-; RV64-NEXT: sb a4, 2(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sw a3, 0(a0)
; RV64-NEXT: addi a0, a0, 4
; RV64-NEXT: slli a3, a1, 40
; RV64-NEXT: bgez a3, .LBB23_26
@@ -13790,13 +12984,7 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma
; RV64-NEXT: vse32.v v8, (a4)
; RV64-NEXT: lw a3, 248(a2)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a4, a3, 24
-; RV64-NEXT: sb a4, 3(a0)
-; RV64-NEXT: srli a4, a3, 16
-; RV64-NEXT: sb a4, 2(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sw a3, 0(a0)
; RV64-NEXT: addi a0, a0, 4
; RV64-NEXT: slli a3, a1, 39
; RV64-NEXT: bgez a3, .LBB23_27
@@ -13807,13 +12995,7 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma
; RV64-NEXT: vse32.v v8, (a4)
; RV64-NEXT: lw a3, 124(a2)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a4, a3, 24
-; RV64-NEXT: sb a4, 3(a0)
-; RV64-NEXT: srli a4, a3, 16
-; RV64-NEXT: sb a4, 2(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sw a3, 0(a0)
; RV64-NEXT: addi a0, a0, 4
; RV64-NEXT: slli a3, a1, 38
; RV64-NEXT: bltz a3, .LBB23_28
@@ -13825,13 +13007,7 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma
; RV64-NEXT: vse32.v v8, (a4)
; RV64-NEXT: lw a3, 1956(a2)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a4, a3, 24
-; RV64-NEXT: sb a4, 3(a0)
-; RV64-NEXT: srli a4, a3, 16
-; RV64-NEXT: sb a4, 2(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sw a3, 0(a0)
; RV64-NEXT: addi a0, a0, 4
; RV64-NEXT: slli a3, a1, 36
; RV64-NEXT: bgez a3, .LBB23_31
@@ -13842,13 +13018,7 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma
; RV64-NEXT: vse32.v v8, (a4)
; RV64-NEXT: lw a3, 1832(a2)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a4, a3, 24
-; RV64-NEXT: sb a4, 3(a0)
-; RV64-NEXT: srli a4, a3, 16
-; RV64-NEXT: sb a4, 2(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sw a3, 0(a0)
; RV64-NEXT: addi a0, a0, 4
; RV64-NEXT: slli a3, a1, 35
; RV64-NEXT: bgez a3, .LBB23_32
@@ -13859,13 +13029,7 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma
; RV64-NEXT: vse32.v v8, (a4)
; RV64-NEXT: lw a3, 1708(a2)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a4, a3, 24
-; RV64-NEXT: sb a4, 3(a0)
-; RV64-NEXT: srli a4, a3, 16
-; RV64-NEXT: sb a4, 2(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sw a3, 0(a0)
; RV64-NEXT: addi a0, a0, 4
; RV64-NEXT: slli a3, a1, 34
; RV64-NEXT: bgez a3, .LBB23_33
@@ -13876,13 +13040,7 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma
; RV64-NEXT: vse32.v v8, (a4)
; RV64-NEXT: lw a3, 1584(a2)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a4, a3, 24
-; RV64-NEXT: sb a4, 3(a0)
-; RV64-NEXT: srli a4, a3, 16
-; RV64-NEXT: sb a4, 2(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sw a3, 0(a0)
; RV64-NEXT: addi a0, a0, 4
; RV64-NEXT: slli a3, a1, 33
; RV64-NEXT: bgez a3, .LBB23_34
@@ -13893,13 +13051,7 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma
; RV64-NEXT: vse32.v v8, (a4)
; RV64-NEXT: lw a3, 1460(a2)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a4, a3, 24
-; RV64-NEXT: sb a4, 3(a0)
-; RV64-NEXT: srli a4, a3, 16
-; RV64-NEXT: sb a4, 2(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sw a3, 0(a0)
; RV64-NEXT: addi a0, a0, 4
; RV64-NEXT: slli a3, a1, 32
; RV64-NEXT: bgez a3, .LBB23_35
@@ -13910,124 +13062,66 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma
; RV64-NEXT: vse32.v v8, (a4)
; RV64-NEXT: lw a3, 1336(a2)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a4, a3, 24
-; RV64-NEXT: sb a4, 3(a0)
-; RV64-NEXT: srli a4, a3, 16
-; RV64-NEXT: sb a4, 2(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sw a3, 0(a0)
; RV64-NEXT: addi a0, a0, 4
; RV64-NEXT: slli a3, a1, 31
; RV64-NEXT: bgez a3, .LBB23_36
; RV64-NEXT: .LBB23_98: # %cond.store94
; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT: vmv.x.s a3, v16
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a4, a3, 24
-; RV64-NEXT: sb a4, 3(a0)
-; RV64-NEXT: srli a4, a3, 16
-; RV64-NEXT: sb a4, 2(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: vse32.v v16, (a0)
; RV64-NEXT: addi a0, a0, 4
; RV64-NEXT: slli a3, a1, 30
; RV64-NEXT: bgez a3, .LBB23_37
; RV64-NEXT: .LBB23_99: # %cond.store97
; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-NEXT: vslidedown.vi v8, v16, 1
-; RV64-NEXT: vmv.x.s a3, v8
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a4, a3, 24
-; RV64-NEXT: sb a4, 3(a0)
-; RV64-NEXT: srli a4, a3, 16
-; RV64-NEXT: sb a4, 2(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: vse32.v v8, (a0)
; RV64-NEXT: addi a0, a0, 4
; RV64-NEXT: slli a3, a1, 29
; RV64-NEXT: bgez a3, .LBB23_38
; RV64-NEXT: .LBB23_100: # %cond.store100
; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-NEXT: vslidedown.vi v8, v16, 2
-; RV64-NEXT: vmv.x.s a3, v8
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a4, a3, 24
-; RV64-NEXT: sb a4, 3(a0)
-; RV64-NEXT: srli a4, a3, 16
-; RV64-NEXT: sb a4, 2(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: vse32.v v8, (a0)
; RV64-NEXT: addi a0, a0, 4
; RV64-NEXT: slli a3, a1, 28
; RV64-NEXT: bgez a3, .LBB23_39
; RV64-NEXT: .LBB23_101: # %cond.store103
; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-NEXT: vslidedown.vi v8, v16, 3
-; RV64-NEXT: vmv.x.s a3, v8
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a4, a3, 24
-; RV64-NEXT: sb a4, 3(a0)
-; RV64-NEXT: srli a4, a3, 16
-; RV64-NEXT: sb a4, 2(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: vse32.v v8, (a0)
; RV64-NEXT: addi a0, a0, 4
; RV64-NEXT: slli a3, a1, 27
; RV64-NEXT: bgez a3, .LBB23_40
; RV64-NEXT: .LBB23_102: # %cond.store106
; RV64-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV64-NEXT: vslidedown.vi v8, v16, 4
-; RV64-NEXT: vmv.x.s a3, v8
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a4, a3, 24
-; RV64-NEXT: sb a4, 3(a0)
-; RV64-NEXT: srli a4, a3, 16
-; RV64-NEXT: sb a4, 2(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV64-NEXT: vse32.v v8, (a0)
; RV64-NEXT: addi a0, a0, 4
; RV64-NEXT: slli a3, a1, 26
; RV64-NEXT: bgez a3, .LBB23_41
; RV64-NEXT: .LBB23_103: # %cond.store109
; RV64-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV64-NEXT: vslidedown.vi v8, v16, 5
-; RV64-NEXT: vmv.x.s a3, v8
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a4, a3, 24
-; RV64-NEXT: sb a4, 3(a0)
-; RV64-NEXT: srli a4, a3, 16
-; RV64-NEXT: sb a4, 2(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV64-NEXT: vse32.v v8, (a0)
; RV64-NEXT: addi a0, a0, 4
; RV64-NEXT: slli a3, a1, 25
; RV64-NEXT: bgez a3, .LBB23_42
; RV64-NEXT: .LBB23_104: # %cond.store112
; RV64-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV64-NEXT: vslidedown.vi v8, v16, 6
-; RV64-NEXT: vmv.x.s a3, v8
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a4, a3, 24
-; RV64-NEXT: sb a4, 3(a0)
-; RV64-NEXT: srli a4, a3, 16
-; RV64-NEXT: sb a4, 2(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV64-NEXT: vse32.v v8, (a0)
; RV64-NEXT: addi a0, a0, 4
; RV64-NEXT: slli a3, a1, 24
; RV64-NEXT: bgez a3, .LBB23_43
; RV64-NEXT: .LBB23_105: # %cond.store115
; RV64-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV64-NEXT: vslidedown.vi v8, v16, 7
-; RV64-NEXT: vmv.x.s a3, v8
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a4, a3, 24
-; RV64-NEXT: sb a4, 3(a0)
-; RV64-NEXT: srli a4, a3, 16
-; RV64-NEXT: sb a4, 2(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV64-NEXT: vse32.v v8, (a0)
; RV64-NEXT: addi a0, a0, 4
; RV64-NEXT: slli a3, a1, 23
; RV64-NEXT: bgez a3, .LBB23_44
@@ -14038,13 +13132,7 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma
; RV64-NEXT: vse32.v v16, (a4)
; RV64-NEXT: lw a3, 1116(a2)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a4, a3, 24
-; RV64-NEXT: sb a4, 3(a0)
-; RV64-NEXT: srli a4, a3, 16
-; RV64-NEXT: sb a4, 2(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sw a3, 0(a0)
; RV64-NEXT: addi a0, a0, 4
; RV64-NEXT: slli a3, a1, 22
; RV64-NEXT: bgez a3, .LBB23_45
@@ -14055,13 +13143,7 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma
; RV64-NEXT: vse32.v v16, (a4)
; RV64-NEXT: lw a3, 992(a2)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a4, a3, 24
-; RV64-NEXT: sb a4, 3(a0)
-; RV64-NEXT: srli a4, a3, 16
-; RV64-NEXT: sb a4, 2(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sw a3, 0(a0)
; RV64-NEXT: addi a0, a0, 4
; RV64-NEXT: slli a3, a1, 21
; RV64-NEXT: bgez a3, .LBB23_46
@@ -14072,13 +13154,7 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma
; RV64-NEXT: vse32.v v16, (a4)
; RV64-NEXT: lw a3, 868(a2)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a4, a3, 24
-; RV64-NEXT: sb a4, 3(a0)
-; RV64-NEXT: srli a4, a3, 16
-; RV64-NEXT: sb a4, 2(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sw a3, 0(a0)
; RV64-NEXT: addi a0, a0, 4
; RV64-NEXT: slli a3, a1, 20
; RV64-NEXT: bgez a3, .LBB23_47
@@ -14089,13 +13165,7 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma
; RV64-NEXT: vse32.v v16, (a4)
; RV64-NEXT: lw a3, 744(a2)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a4, a3, 24
-; RV64-NEXT: sb a4, 3(a0)
-; RV64-NEXT: srli a4, a3, 16
-; RV64-NEXT: sb a4, 2(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sw a3, 0(a0)
; RV64-NEXT: addi a0, a0, 4
; RV64-NEXT: slli a3, a1, 19
; RV64-NEXT: bgez a3, .LBB23_48
@@ -14106,13 +13176,7 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma
; RV64-NEXT: vse32.v v16, (a4)
; RV64-NEXT: lw a3, 620(a2)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a4, a3, 24
-; RV64-NEXT: sb a4, 3(a0)
-; RV64-NEXT: srli a4, a3, 16
-; RV64-NEXT: sb a4, 2(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sw a3, 0(a0)
; RV64-NEXT: addi a0, a0, 4
; RV64-NEXT: slli a3, a1, 18
; RV64-NEXT: bgez a3, .LBB23_49
@@ -14123,13 +13187,7 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma
; RV64-NEXT: vse32.v v16, (a4)
; RV64-NEXT: lw a3, 496(a2)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a4, a3, 24
-; RV64-NEXT: sb a4, 3(a0)
-; RV64-NEXT: srli a4, a3, 16
-; RV64-NEXT: sb a4, 2(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sw a3, 0(a0)
; RV64-NEXT: addi a0, a0, 4
; RV64-NEXT: slli a3, a1, 17
; RV64-NEXT: bgez a3, .LBB23_50
@@ -14140,13 +13198,7 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma
; RV64-NEXT: vse32.v v16, (a4)
; RV64-NEXT: lw a3, 372(a2)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a4, a3, 24
-; RV64-NEXT: sb a4, 3(a0)
-; RV64-NEXT: srli a4, a3, 16
-; RV64-NEXT: sb a4, 2(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sw a3, 0(a0)
; RV64-NEXT: addi a0, a0, 4
; RV64-NEXT: slli a3, a1, 16
; RV64-NEXT: bgez a3, .LBB23_51
@@ -14157,13 +13209,7 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma
; RV64-NEXT: vse32.v v16, (a4)
; RV64-NEXT: lw a3, 248(a2)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a4, a3, 24
-; RV64-NEXT: sb a4, 3(a0)
-; RV64-NEXT: srli a4, a3, 16
-; RV64-NEXT: sb a4, 2(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sw a3, 0(a0)
; RV64-NEXT: addi a0, a0, 4
; RV64-NEXT: slli a3, a1, 15
; RV64-NEXT: bgez a3, .LBB23_52
@@ -14174,13 +13220,7 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma
; RV64-NEXT: vse32.v v16, (a4)
; RV64-NEXT: lw a3, 124(a2)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a4, a3, 24
-; RV64-NEXT: sb a4, 3(a0)
-; RV64-NEXT: srli a4, a3, 16
-; RV64-NEXT: sb a4, 2(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sw a3, 0(a0)
; RV64-NEXT: addi a0, a0, 4
; RV64-NEXT: slli a3, a1, 14
; RV64-NEXT: bgez a3, .LBB23_53
@@ -14190,13 +13230,7 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma
; RV64-NEXT: vse32.v v16, (a4)
; RV64-NEXT: lw a2, 0(a2)
-; RV64-NEXT: sb a2, 0(a0)
-; RV64-NEXT: srli a3, a2, 24
-; RV64-NEXT: sb a3, 3(a0)
-; RV64-NEXT: srli a3, a2, 16
-; RV64-NEXT: sb a3, 2(a0)
-; RV64-NEXT: srli a2, a2, 8
-; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: sw a2, 0(a0)
; RV64-NEXT: addi a0, a0, 4
; RV64-NEXT: slli a2, a1, 13
; RV64-NEXT: bgez a2, .LBB23_54
@@ -14206,13 +13240,7 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV64-NEXT: vsetvli zero, a2, e32, m8, ta, ma
; RV64-NEXT: vse32.v v16, (a3)
; RV64-NEXT: lw a2, 1864(sp)
-; RV64-NEXT: sb a2, 0(a0)
-; RV64-NEXT: srli a3, a2, 24
-; RV64-NEXT: sb a3, 3(a0)
-; RV64-NEXT: srli a3, a2, 16
-; RV64-NEXT: sb a3, 2(a0)
-; RV64-NEXT: srli a2, a2, 8
-; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: sw a2, 0(a0)
; RV64-NEXT: addi a0, a0, 4
; RV64-NEXT: slli a2, a1, 12
; RV64-NEXT: bgez a2, .LBB23_55
@@ -14222,13 +13250,7 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV64-NEXT: vsetvli zero, a2, e32, m8, ta, ma
; RV64-NEXT: vse32.v v16, (a3)
; RV64-NEXT: lw a2, 1740(sp)
-; RV64-NEXT: sb a2, 0(a0)
-; RV64-NEXT: srli a3, a2, 24
-; RV64-NEXT: sb a3, 3(a0)
-; RV64-NEXT: srli a3, a2, 16
-; RV64-NEXT: sb a3, 2(a0)
-; RV64-NEXT: srli a2, a2, 8
-; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: sw a2, 0(a0)
; RV64-NEXT: addi a0, a0, 4
; RV64-NEXT: slli a2, a1, 11
; RV64-NEXT: bgez a2, .LBB23_56
@@ -14238,13 +13260,7 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV64-NEXT: vsetvli zero, a2, e32, m8, ta, ma
; RV64-NEXT: vse32.v v16, (a3)
; RV64-NEXT: lw a2, 1616(sp)
-; RV64-NEXT: sb a2, 0(a0)
-; RV64-NEXT: srli a3, a2, 24
-; RV64-NEXT: sb a3, 3(a0)
-; RV64-NEXT: srli a3, a2, 16
-; RV64-NEXT: sb a3, 2(a0)
-; RV64-NEXT: srli a2, a2, 8
-; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: sw a2, 0(a0)
; RV64-NEXT: addi a0, a0, 4
; RV64-NEXT: slli a2, a1, 10
; RV64-NEXT: bgez a2, .LBB23_57
@@ -14254,13 +13270,7 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV64-NEXT: vsetvli zero, a2, e32, m8, ta, ma
; RV64-NEXT: vse32.v v16, (a3)
; RV64-NEXT: lw a2, 1492(sp)
-; RV64-NEXT: sb a2, 0(a0)
-; RV64-NEXT: srli a3, a2, 24
-; RV64-NEXT: sb a3, 3(a0)
-; RV64-NEXT: srli a3, a2, 16
-; RV64-NEXT: sb a3, 2(a0)
-; RV64-NEXT: srli a2, a2, 8
-; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: sw a2, 0(a0)
; RV64-NEXT: addi a0, a0, 4
; RV64-NEXT: slli a2, a1, 9
; RV64-NEXT: bgez a2, .LBB23_58
@@ -14270,13 +13280,7 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV64-NEXT: vsetvli zero, a2, e32, m8, ta, ma
; RV64-NEXT: vse32.v v16, (a3)
; RV64-NEXT: lw a2, 1368(sp)
-; RV64-NEXT: sb a2, 0(a0)
-; RV64-NEXT: srli a3, a2, 24
-; RV64-NEXT: sb a3, 3(a0)
-; RV64-NEXT: srli a3, a2, 16
-; RV64-NEXT: sb a3, 2(a0)
-; RV64-NEXT: srli a2, a2, 8
-; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: sw a2, 0(a0)
; RV64-NEXT: addi a0, a0, 4
; RV64-NEXT: slli a2, a1, 8
; RV64-NEXT: bgez a2, .LBB23_59
@@ -14286,13 +13290,7 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV64-NEXT: vsetvli zero, a2, e32, m8, ta, ma
; RV64-NEXT: vse32.v v16, (a3)
; RV64-NEXT: lw a2, 1244(sp)
-; RV64-NEXT: sb a2, 0(a0)
-; RV64-NEXT: srli a3, a2, 24
-; RV64-NEXT: sb a3, 3(a0)
-; RV64-NEXT: srli a3, a2, 16
-; RV64-NEXT: sb a3, 2(a0)
-; RV64-NEXT: srli a2, a2, 8
-; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: sw a2, 0(a0)
; RV64-NEXT: addi a0, a0, 4
; RV64-NEXT: slli a2, a1, 7
; RV64-NEXT: bgez a2, .LBB23_60
@@ -14302,13 +13300,7 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV64-NEXT: vsetvli zero, a2, e32, m8, ta, ma
; RV64-NEXT: vse32.v v16, (a3)
; RV64-NEXT: lw a2, 1120(sp)
-; RV64-NEXT: sb a2, 0(a0)
-; RV64-NEXT: srli a3, a2, 24
-; RV64-NEXT: sb a3, 3(a0)
-; RV64-NEXT: srli a3, a2, 16
-; RV64-NEXT: sb a3, 2(a0)
-; RV64-NEXT: srli a2, a2, 8
-; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: sw a2, 0(a0)
; RV64-NEXT: addi a0, a0, 4
; RV64-NEXT: slli a2, a1, 6
; RV64-NEXT: bgez a2, .LBB23_61
@@ -14318,13 +13310,7 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV64-NEXT: vsetvli zero, a2, e32, m8, ta, ma
; RV64-NEXT: vse32.v v16, (a3)
; RV64-NEXT: lw a2, 996(sp)
-; RV64-NEXT: sb a2, 0(a0)
-; RV64-NEXT: srli a3, a2, 24
-; RV64-NEXT: sb a3, 3(a0)
-; RV64-NEXT: srli a3, a2, 16
-; RV64-NEXT: sb a3, 2(a0)
-; RV64-NEXT: srli a2, a2, 8
-; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: sw a2, 0(a0)
; RV64-NEXT: addi a0, a0, 4
; RV64-NEXT: slli a2, a1, 5
; RV64-NEXT: bgez a2, .LBB23_62
@@ -14334,13 +13320,7 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV64-NEXT: vsetvli zero, a2, e32, m8, ta, ma
; RV64-NEXT: vse32.v v16, (a3)
; RV64-NEXT: lw a2, 872(sp)
-; RV64-NEXT: sb a2, 0(a0)
-; RV64-NEXT: srli a3, a2, 24
-; RV64-NEXT: sb a3, 3(a0)
-; RV64-NEXT: srli a3, a2, 16
-; RV64-NEXT: sb a3, 2(a0)
-; RV64-NEXT: srli a2, a2, 8
-; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: sw a2, 0(a0)
; RV64-NEXT: addi a0, a0, 4
; RV64-NEXT: slli a2, a1, 4
; RV64-NEXT: bgez a2, .LBB23_63
@@ -14350,13 +13330,7 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV64-NEXT: vsetvli zero, a2, e32, m8, ta, ma
; RV64-NEXT: vse32.v v16, (a3)
; RV64-NEXT: lw a2, 748(sp)
-; RV64-NEXT: sb a2, 0(a0)
-; RV64-NEXT: srli a3, a2, 24
-; RV64-NEXT: sb a3, 3(a0)
-; RV64-NEXT: srli a3, a2, 16
-; RV64-NEXT: sb a3, 2(a0)
-; RV64-NEXT: srli a2, a2, 8
-; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: sw a2, 0(a0)
; RV64-NEXT: addi a0, a0, 4
; RV64-NEXT: slli a2, a1, 3
; RV64-NEXT: bgez a2, .LBB23_64
@@ -14366,13 +13340,7 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV64-NEXT: vsetvli zero, a2, e32, m8, ta, ma
; RV64-NEXT: vse32.v v16, (a3)
; RV64-NEXT: lw a2, 624(sp)
-; RV64-NEXT: sb a2, 0(a0)
-; RV64-NEXT: srli a3, a2, 24
-; RV64-NEXT: sb a3, 3(a0)
-; RV64-NEXT: srli a3, a2, 16
-; RV64-NEXT: sb a3, 2(a0)
-; RV64-NEXT: srli a2, a2, 8
-; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: sw a2, 0(a0)
; RV64-NEXT: addi a0, a0, 4
; RV64-NEXT: slli a2, a1, 2
; RV64-NEXT: bgez a2, .LBB23_65
@@ -14382,13 +13350,7 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV64-NEXT: vsetvli zero, a2, e32, m8, ta, ma
; RV64-NEXT: vse32.v v16, (a3)
; RV64-NEXT: lw a2, 500(sp)
-; RV64-NEXT: sb a2, 0(a0)
-; RV64-NEXT: srli a3, a2, 24
-; RV64-NEXT: sb a3, 3(a0)
-; RV64-NEXT: srli a3, a2, 16
-; RV64-NEXT: sb a3, 2(a0)
-; RV64-NEXT: srli a2, a2, 8
-; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: sw a2, 0(a0)
; RV64-NEXT: addi a0, a0, 4
; RV64-NEXT: slli a2, a1, 1
; RV64-NEXT: bgez a2, .LBB23_66
@@ -14398,13 +13360,7 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV64-NEXT: vsetvli zero, a2, e32, m8, ta, ma
; RV64-NEXT: vse32.v v16, (a3)
; RV64-NEXT: lw a2, 376(sp)
-; RV64-NEXT: sb a2, 0(a0)
-; RV64-NEXT: srli a3, a2, 24
-; RV64-NEXT: sb a3, 3(a0)
-; RV64-NEXT: srli a3, a2, 16
-; RV64-NEXT: sb a3, 2(a0)
-; RV64-NEXT: srli a2, a2, 8
-; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: sw a2, 0(a0)
; RV64-NEXT: addi a0, a0, 4
; RV64-NEXT: bltz a1, .LBB23_67
; RV64-NEXT: j .LBB23_68
@@ -14439,14 +13395,8 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV32-NEXT: .LBB23_8: # %cond.store19
; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32-NEXT: vslidedown.vi v24, v8, 7
-; RV32-NEXT: vmv.x.s a1, v24
-; RV32-NEXT: sb a1, 0(a0)
-; RV32-NEXT: srli a3, a1, 24
-; RV32-NEXT: sb a3, 3(a0)
-; RV32-NEXT: srli a3, a1, 16
-; RV32-NEXT: sb a3, 2(a0)
-; RV32-NEXT: srli a1, a1, 8
-; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: vse32.v v24, (a0)
; RV32-NEXT: addi a0, a0, 4
; RV32-NEXT: .LBB23_9: # %else20
; RV32-NEXT: addi sp, sp, -2032
@@ -14473,13 +13423,7 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV32-NEXT: lui a1, 2
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: lw a1, -2016(a1)
-; RV32-NEXT: sb a1, 0(a0)
-; RV32-NEXT: srli a3, a1, 24
-; RV32-NEXT: sb a3, 3(a0)
-; RV32-NEXT: srli a3, a1, 16
-; RV32-NEXT: sb a3, 2(a0)
-; RV32-NEXT: srli a1, a1, 8
-; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: sw a1, 0(a0)
; RV32-NEXT: addi a0, a0, 4
; RV32-NEXT: .LBB23_11: # %else23
; RV32-NEXT: andi a3, a2, 512
@@ -14541,13 +13485,7 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma
; RV32-NEXT: vse32.v v8, (a4)
; RV32-NEXT: lw a1, 0(a1)
-; RV32-NEXT: sb a1, 0(a0)
-; RV32-NEXT: srli a3, a1, 24
-; RV32-NEXT: sb a3, 3(a0)
-; RV32-NEXT: srli a3, a1, 16
-; RV32-NEXT: sb a3, 2(a0)
-; RV32-NEXT: srli a1, a1, 8
-; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: sw a1, 0(a0)
; RV32-NEXT: addi a0, a0, 4
; RV32-NEXT: .LBB23_29: # %else74
; RV32-NEXT: slli a1, a2, 5
@@ -14566,13 +13504,7 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; RV32-NEXT: vse32.v v8, (a4)
; RV32-NEXT: lw a1, 1708(a3)
-; RV32-NEXT: sb a1, 0(a0)
-; RV32-NEXT: srli a4, a1, 24
-; RV32-NEXT: sb a4, 3(a0)
-; RV32-NEXT: srli a4, a1, 16
-; RV32-NEXT: sb a4, 2(a0)
-; RV32-NEXT: srli a1, a1, 8
-; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: sw a1, 0(a0)
; RV32-NEXT: addi a0, a0, 4
; RV32-NEXT: .LBB23_33: # %else83
; RV32-NEXT: slli a4, a2, 2
@@ -14585,13 +13517,7 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV32-NEXT: vsetvli zero, a4, e32, m8, ta, ma
; RV32-NEXT: vse32.v v8, (a5)
; RV32-NEXT: lw a4, 1584(a3)
-; RV32-NEXT: sb a4, 0(a0)
-; RV32-NEXT: srli a5, a4, 24
-; RV32-NEXT: sb a5, 3(a0)
-; RV32-NEXT: srli a5, a4, 16
-; RV32-NEXT: sb a5, 2(a0)
-; RV32-NEXT: srli a4, a4, 8
-; RV32-NEXT: sb a4, 1(a0)
+; RV32-NEXT: sw a4, 0(a0)
; RV32-NEXT: addi a0, a0, 4
; RV32-NEXT: .LBB23_35: # %else86
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
@@ -14605,13 +13531,7 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; RV32-NEXT: vse32.v v8, (a4)
; RV32-NEXT: lw a1, 1460(a3)
-; RV32-NEXT: sb a1, 0(a0)
-; RV32-NEXT: srli a4, a1, 24
-; RV32-NEXT: sb a4, 3(a0)
-; RV32-NEXT: srli a4, a1, 16
-; RV32-NEXT: sb a4, 2(a0)
-; RV32-NEXT: srli a1, a1, 8
-; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: sw a1, 0(a0)
; RV32-NEXT: addi a0, a0, 4
; RV32-NEXT: .LBB23_37: # %else89
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
@@ -14718,13 +13638,7 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; RV32-NEXT: vse32.v v16, (a2)
; RV32-NEXT: lw a1, 252(sp)
-; RV32-NEXT: sb a1, 0(a0)
-; RV32-NEXT: srli a2, a1, 24
-; RV32-NEXT: sb a2, 3(a0)
-; RV32-NEXT: srli a2, a1, 16
-; RV32-NEXT: sb a2, 2(a0)
-; RV32-NEXT: srli a1, a1, 8
-; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: sw a1, 0(a0)
; RV32-NEXT: .LBB23_71: # %else188
; RV32-NEXT: li a0, 25
; RV32-NEXT: slli a0, a0, 8
@@ -14737,99 +13651,53 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV32-NEXT: addi sp, sp, 2032
; RV32-NEXT: ret
; RV32-NEXT: .LBB23_72: # %cond.store
-; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; RV32-NEXT: vmv.x.s a1, v8
-; RV32-NEXT: sb a1, 0(a0)
-; RV32-NEXT: srli a3, a1, 24
-; RV32-NEXT: sb a3, 3(a0)
-; RV32-NEXT: srli a3, a1, 16
-; RV32-NEXT: sb a3, 2(a0)
-; RV32-NEXT: srli a1, a1, 8
-; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: vse32.v v8, (a0)
; RV32-NEXT: addi a0, a0, 4
; RV32-NEXT: andi a1, a2, 2
; RV32-NEXT: beqz a1, .LBB23_2
; RV32-NEXT: .LBB23_73: # %cond.store1
; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32-NEXT: vslidedown.vi v24, v8, 1
-; RV32-NEXT: vmv.x.s a1, v24
-; RV32-NEXT: sb a1, 0(a0)
-; RV32-NEXT: srli a3, a1, 24
-; RV32-NEXT: sb a3, 3(a0)
-; RV32-NEXT: srli a3, a1, 16
-; RV32-NEXT: sb a3, 2(a0)
-; RV32-NEXT: srli a1, a1, 8
-; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: vse32.v v24, (a0)
; RV32-NEXT: addi a0, a0, 4
; RV32-NEXT: andi a1, a2, 4
; RV32-NEXT: beqz a1, .LBB23_3
; RV32-NEXT: .LBB23_74: # %cond.store4
; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32-NEXT: vslidedown.vi v24, v8, 2
-; RV32-NEXT: vmv.x.s a1, v24
-; RV32-NEXT: sb a1, 0(a0)
-; RV32-NEXT: srli a3, a1, 24
-; RV32-NEXT: sb a3, 3(a0)
-; RV32-NEXT: srli a3, a1, 16
-; RV32-NEXT: sb a3, 2(a0)
-; RV32-NEXT: srli a1, a1, 8
-; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: vse32.v v24, (a0)
; RV32-NEXT: addi a0, a0, 4
; RV32-NEXT: andi a1, a2, 8
; RV32-NEXT: beqz a1, .LBB23_4
; RV32-NEXT: .LBB23_75: # %cond.store7
; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32-NEXT: vslidedown.vi v24, v8, 3
-; RV32-NEXT: vmv.x.s a1, v24
-; RV32-NEXT: sb a1, 0(a0)
-; RV32-NEXT: srli a3, a1, 24
-; RV32-NEXT: sb a3, 3(a0)
-; RV32-NEXT: srli a3, a1, 16
-; RV32-NEXT: sb a3, 2(a0)
-; RV32-NEXT: srli a1, a1, 8
-; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: vse32.v v24, (a0)
; RV32-NEXT: addi a0, a0, 4
; RV32-NEXT: andi a1, a2, 16
; RV32-NEXT: beqz a1, .LBB23_5
; RV32-NEXT: .LBB23_76: # %cond.store10
; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32-NEXT: vslidedown.vi v24, v8, 4
-; RV32-NEXT: vmv.x.s a1, v24
-; RV32-NEXT: sb a1, 0(a0)
-; RV32-NEXT: srli a3, a1, 24
-; RV32-NEXT: sb a3, 3(a0)
-; RV32-NEXT: srli a3, a1, 16
-; RV32-NEXT: sb a3, 2(a0)
-; RV32-NEXT: srli a1, a1, 8
-; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: vse32.v v24, (a0)
; RV32-NEXT: addi a0, a0, 4
; RV32-NEXT: andi a1, a2, 32
; RV32-NEXT: beqz a1, .LBB23_6
; RV32-NEXT: .LBB23_77: # %cond.store13
; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32-NEXT: vslidedown.vi v24, v8, 5
-; RV32-NEXT: vmv.x.s a1, v24
-; RV32-NEXT: sb a1, 0(a0)
-; RV32-NEXT: srli a3, a1, 24
-; RV32-NEXT: sb a3, 3(a0)
-; RV32-NEXT: srli a3, a1, 16
-; RV32-NEXT: sb a3, 2(a0)
-; RV32-NEXT: srli a1, a1, 8
-; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: vse32.v v24, (a0)
; RV32-NEXT: addi a0, a0, 4
; RV32-NEXT: andi a1, a2, 64
; RV32-NEXT: beqz a1, .LBB23_7
; RV32-NEXT: .LBB23_78: # %cond.store16
; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32-NEXT: vslidedown.vi v24, v8, 6
-; RV32-NEXT: vmv.x.s a1, v24
-; RV32-NEXT: sb a1, 0(a0)
-; RV32-NEXT: srli a3, a1, 24
-; RV32-NEXT: sb a3, 3(a0)
-; RV32-NEXT: srli a3, a1, 16
-; RV32-NEXT: sb a3, 2(a0)
-; RV32-NEXT: srli a1, a1, 8
-; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: vse32.v v24, (a0)
; RV32-NEXT: addi a0, a0, 4
; RV32-NEXT: andi a1, a2, 128
; RV32-NEXT: bnez a1, .LBB23_8
@@ -14842,13 +13710,7 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma
; RV32-NEXT: vse32.v v8, (a4)
; RV32-NEXT: lw a3, 1984(a1)
-; RV32-NEXT: sb a3, 0(a0)
-; RV32-NEXT: srli a4, a3, 24
-; RV32-NEXT: sb a4, 3(a0)
-; RV32-NEXT: srli a4, a3, 16
-; RV32-NEXT: sb a4, 2(a0)
-; RV32-NEXT: srli a3, a3, 8
-; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: sw a3, 0(a0)
; RV32-NEXT: addi a0, a0, 4
; RV32-NEXT: andi a3, a2, 1024
; RV32-NEXT: beqz a3, .LBB23_13
@@ -14860,13 +13722,7 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma
; RV32-NEXT: vse32.v v8, (a4)
; RV32-NEXT: lw a3, 1860(a1)
-; RV32-NEXT: sb a3, 0(a0)
-; RV32-NEXT: srli a4, a3, 24
-; RV32-NEXT: sb a4, 3(a0)
-; RV32-NEXT: srli a4, a3, 16
-; RV32-NEXT: sb a4, 2(a0)
-; RV32-NEXT: srli a3, a3, 8
-; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: sw a3, 0(a0)
; RV32-NEXT: addi a0, a0, 4
; RV32-NEXT: slli a3, a2, 20
; RV32-NEXT: bgez a3, .LBB23_14
@@ -14878,13 +13734,7 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma
; RV32-NEXT: vse32.v v8, (a4)
; RV32-NEXT: lw a3, 1736(a1)
-; RV32-NEXT: sb a3, 0(a0)
-; RV32-NEXT: srli a4, a3, 24
-; RV32-NEXT: sb a4, 3(a0)
-; RV32-NEXT: srli a4, a3, 16
-; RV32-NEXT: sb a4, 2(a0)
-; RV32-NEXT: srli a3, a3, 8
-; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: sw a3, 0(a0)
; RV32-NEXT: addi a0, a0, 4
; RV32-NEXT: slli a3, a2, 19
; RV32-NEXT: bgez a3, .LBB23_15
@@ -14896,13 +13746,7 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma
; RV32-NEXT: vse32.v v8, (a4)
; RV32-NEXT: lw a3, 1612(a1)
-; RV32-NEXT: sb a3, 0(a0)
-; RV32-NEXT: srli a4, a3, 24
-; RV32-NEXT: sb a4, 3(a0)
-; RV32-NEXT: srli a4, a3, 16
-; RV32-NEXT: sb a4, 2(a0)
-; RV32-NEXT: srli a3, a3, 8
-; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: sw a3, 0(a0)
; RV32-NEXT: addi a0, a0, 4
; RV32-NEXT: slli a3, a2, 18
; RV32-NEXT: bgez a3, .LBB23_16
@@ -14914,13 +13758,7 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma
; RV32-NEXT: vse32.v v8, (a4)
; RV32-NEXT: lw a3, 1488(a1)
-; RV32-NEXT: sb a3, 0(a0)
-; RV32-NEXT: srli a4, a3, 24
-; RV32-NEXT: sb a4, 3(a0)
-; RV32-NEXT: srli a4, a3, 16
-; RV32-NEXT: sb a4, 2(a0)
-; RV32-NEXT: srli a3, a3, 8
-; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: sw a3, 0(a0)
; RV32-NEXT: addi a0, a0, 4
; RV32-NEXT: slli a3, a2, 17
; RV32-NEXT: bgez a3, .LBB23_17
@@ -14932,13 +13770,7 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma
; RV32-NEXT: vse32.v v8, (a4)
; RV32-NEXT: lw a3, 1364(a1)
-; RV32-NEXT: sb a3, 0(a0)
-; RV32-NEXT: srli a4, a3, 24
-; RV32-NEXT: sb a4, 3(a0)
-; RV32-NEXT: srli a4, a3, 16
-; RV32-NEXT: sb a4, 2(a0)
-; RV32-NEXT: srli a3, a3, 8
-; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: sw a3, 0(a0)
; RV32-NEXT: addi a0, a0, 4
; RV32-NEXT: slli a3, a2, 16
; RV32-NEXT: bgez a3, .LBB23_18
@@ -14950,13 +13782,7 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma
; RV32-NEXT: vse32.v v8, (a4)
; RV32-NEXT: lw a3, 1240(a1)
-; RV32-NEXT: sb a3, 0(a0)
-; RV32-NEXT: srli a4, a3, 24
-; RV32-NEXT: sb a4, 3(a0)
-; RV32-NEXT: srli a4, a3, 16
-; RV32-NEXT: sb a4, 2(a0)
-; RV32-NEXT: srli a3, a3, 8
-; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: sw a3, 0(a0)
; RV32-NEXT: addi a0, a0, 4
; RV32-NEXT: slli a3, a2, 15
; RV32-NEXT: bgez a3, .LBB23_19
@@ -14968,13 +13794,7 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma
; RV32-NEXT: vse32.v v8, (a4)
; RV32-NEXT: lw a3, 1116(a1)
-; RV32-NEXT: sb a3, 0(a0)
-; RV32-NEXT: srli a4, a3, 24
-; RV32-NEXT: sb a4, 3(a0)
-; RV32-NEXT: srli a4, a3, 16
-; RV32-NEXT: sb a4, 2(a0)
-; RV32-NEXT: srli a3, a3, 8
-; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: sw a3, 0(a0)
; RV32-NEXT: addi a0, a0, 4
; RV32-NEXT: slli a3, a2, 14
; RV32-NEXT: bgez a3, .LBB23_20
@@ -14986,13 +13806,7 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma
; RV32-NEXT: vse32.v v8, (a4)
; RV32-NEXT: lw a3, 992(a1)
-; RV32-NEXT: sb a3, 0(a0)
-; RV32-NEXT: srli a4, a3, 24
-; RV32-NEXT: sb a4, 3(a0)
-; RV32-NEXT: srli a4, a3, 16
-; RV32-NEXT: sb a4, 2(a0)
-; RV32-NEXT: srli a3, a3, 8
-; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: sw a3, 0(a0)
; RV32-NEXT: addi a0, a0, 4
; RV32-NEXT: slli a3, a2, 13
; RV32-NEXT: bgez a3, .LBB23_21
@@ -15004,13 +13818,7 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma
; RV32-NEXT: vse32.v v8, (a4)
; RV32-NEXT: lw a3, 868(a1)
-; RV32-NEXT: sb a3, 0(a0)
-; RV32-NEXT: srli a4, a3, 24
-; RV32-NEXT: sb a4, 3(a0)
-; RV32-NEXT: srli a4, a3, 16
-; RV32-NEXT: sb a4, 2(a0)
-; RV32-NEXT: srli a3, a3, 8
-; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: sw a3, 0(a0)
; RV32-NEXT: addi a0, a0, 4
; RV32-NEXT: slli a3, a2, 12
; RV32-NEXT: bgez a3, .LBB23_22
@@ -15022,13 +13830,7 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma
; RV32-NEXT: vse32.v v8, (a4)
; RV32-NEXT: lw a3, 744(a1)
-; RV32-NEXT: sb a3, 0(a0)
-; RV32-NEXT: srli a4, a3, 24
-; RV32-NEXT: sb a4, 3(a0)
-; RV32-NEXT: srli a4, a3, 16
-; RV32-NEXT: sb a4, 2(a0)
-; RV32-NEXT: srli a3, a3, 8
-; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: sw a3, 0(a0)
; RV32-NEXT: addi a0, a0, 4
; RV32-NEXT: slli a3, a2, 11
; RV32-NEXT: bgez a3, .LBB23_23
@@ -15040,13 +13842,7 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma
; RV32-NEXT: vse32.v v8, (a4)
; RV32-NEXT: lw a3, 620(a1)
-; RV32-NEXT: sb a3, 0(a0)
-; RV32-NEXT: srli a4, a3, 24
-; RV32-NEXT: sb a4, 3(a0)
-; RV32-NEXT: srli a4, a3, 16
-; RV32-NEXT: sb a4, 2(a0)
-; RV32-NEXT: srli a3, a3, 8
-; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: sw a3, 0(a0)
; RV32-NEXT: addi a0, a0, 4
; RV32-NEXT: slli a3, a2, 10
; RV32-NEXT: bgez a3, .LBB23_24
@@ -15058,13 +13854,7 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma
; RV32-NEXT: vse32.v v8, (a4)
; RV32-NEXT: lw a3, 496(a1)
-; RV32-NEXT: sb a3, 0(a0)
-; RV32-NEXT: srli a4, a3, 24
-; RV32-NEXT: sb a4, 3(a0)
-; RV32-NEXT: srli a4, a3, 16
-; RV32-NEXT: sb a4, 2(a0)
-; RV32-NEXT: srli a3, a3, 8
-; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: sw a3, 0(a0)
; RV32-NEXT: addi a0, a0, 4
; RV32-NEXT: slli a3, a2, 9
; RV32-NEXT: bgez a3, .LBB23_25
@@ -15076,13 +13866,7 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma
; RV32-NEXT: vse32.v v8, (a4)
; RV32-NEXT: lw a3, 372(a1)
-; RV32-NEXT: sb a3, 0(a0)
-; RV32-NEXT: srli a4, a3, 24
-; RV32-NEXT: sb a4, 3(a0)
-; RV32-NEXT: srli a4, a3, 16
-; RV32-NEXT: sb a4, 2(a0)
-; RV32-NEXT: srli a3, a3, 8
-; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: sw a3, 0(a0)
; RV32-NEXT: addi a0, a0, 4
; RV32-NEXT: slli a3, a2, 8
; RV32-NEXT: bgez a3, .LBB23_26
@@ -15094,13 +13878,7 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma
; RV32-NEXT: vse32.v v8, (a4)
; RV32-NEXT: lw a3, 248(a1)
-; RV32-NEXT: sb a3, 0(a0)
-; RV32-NEXT: srli a4, a3, 24
-; RV32-NEXT: sb a4, 3(a0)
-; RV32-NEXT: srli a4, a3, 16
-; RV32-NEXT: sb a4, 2(a0)
-; RV32-NEXT: srli a3, a3, 8
-; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: sw a3, 0(a0)
; RV32-NEXT: addi a0, a0, 4
; RV32-NEXT: slli a3, a2, 7
; RV32-NEXT: bgez a3, .LBB23_27
@@ -15111,13 +13889,7 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma
; RV32-NEXT: vse32.v v8, (a4)
; RV32-NEXT: lw a3, 124(a1)
-; RV32-NEXT: sb a3, 0(a0)
-; RV32-NEXT: srli a4, a3, 24
-; RV32-NEXT: sb a4, 3(a0)
-; RV32-NEXT: srli a4, a3, 16
-; RV32-NEXT: sb a4, 2(a0)
-; RV32-NEXT: srli a3, a3, 8
-; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: sw a3, 0(a0)
; RV32-NEXT: addi a0, a0, 4
; RV32-NEXT: slli a3, a2, 6
; RV32-NEXT: bltz a3, .LBB23_28
@@ -15129,13 +13901,7 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; RV32-NEXT: vse32.v v8, (a4)
; RV32-NEXT: lw a1, 1956(a3)
-; RV32-NEXT: sb a1, 0(a0)
-; RV32-NEXT: srli a4, a1, 24
-; RV32-NEXT: sb a4, 3(a0)
-; RV32-NEXT: srli a4, a1, 16
-; RV32-NEXT: sb a4, 2(a0)
-; RV32-NEXT: srli a1, a1, 8
-; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: sw a1, 0(a0)
; RV32-NEXT: addi a0, a0, 4
; RV32-NEXT: slli a1, a2, 4
; RV32-NEXT: bgez a1, .LBB23_31
@@ -15146,13 +13912,7 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; RV32-NEXT: vse32.v v8, (a4)
; RV32-NEXT: lw a1, 1832(a3)
-; RV32-NEXT: sb a1, 0(a0)
-; RV32-NEXT: srli a4, a1, 24
-; RV32-NEXT: sb a4, 3(a0)
-; RV32-NEXT: srli a4, a1, 16
-; RV32-NEXT: sb a4, 2(a0)
-; RV32-NEXT: srli a1, a1, 8
-; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: sw a1, 0(a0)
; RV32-NEXT: addi a0, a0, 4
; RV32-NEXT: slli a1, a2, 3
; RV32-NEXT: bltz a1, .LBB23_32
@@ -15164,124 +13924,66 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
; RV32-NEXT: vse32.v v8, (a4)
; RV32-NEXT: lw a2, 1336(a3)
-; RV32-NEXT: sb a2, 0(a0)
-; RV32-NEXT: srli a4, a2, 24
-; RV32-NEXT: sb a4, 3(a0)
-; RV32-NEXT: srli a4, a2, 16
-; RV32-NEXT: sb a4, 2(a0)
-; RV32-NEXT: srli a2, a2, 8
-; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: sw a2, 0(a0)
; RV32-NEXT: addi a0, a0, 4
; RV32-NEXT: andi a2, a1, 1
; RV32-NEXT: beqz a2, .LBB23_39
; RV32-NEXT: .LBB23_98: # %cond.store94
; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vmv.x.s a2, v16
-; RV32-NEXT: sb a2, 0(a0)
-; RV32-NEXT: srli a4, a2, 24
-; RV32-NEXT: sb a4, 3(a0)
-; RV32-NEXT: srli a4, a2, 16
-; RV32-NEXT: sb a4, 2(a0)
-; RV32-NEXT: srli a2, a2, 8
-; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: vse32.v v16, (a0)
; RV32-NEXT: addi a0, a0, 4
; RV32-NEXT: andi a2, a1, 2
; RV32-NEXT: beqz a2, .LBB23_40
; RV32-NEXT: .LBB23_99: # %cond.store97
; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32-NEXT: vslidedown.vi v8, v16, 1
-; RV32-NEXT: vmv.x.s a2, v8
-; RV32-NEXT: sb a2, 0(a0)
-; RV32-NEXT: srli a4, a2, 24
-; RV32-NEXT: sb a4, 3(a0)
-; RV32-NEXT: srli a4, a2, 16
-; RV32-NEXT: sb a4, 2(a0)
-; RV32-NEXT: srli a2, a2, 8
-; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: vse32.v v8, (a0)
; RV32-NEXT: addi a0, a0, 4
; RV32-NEXT: andi a2, a1, 4
; RV32-NEXT: beqz a2, .LBB23_41
; RV32-NEXT: .LBB23_100: # %cond.store100
; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32-NEXT: vslidedown.vi v8, v16, 2
-; RV32-NEXT: vmv.x.s a2, v8
-; RV32-NEXT: sb a2, 0(a0)
-; RV32-NEXT: srli a4, a2, 24
-; RV32-NEXT: sb a4, 3(a0)
-; RV32-NEXT: srli a4, a2, 16
-; RV32-NEXT: sb a4, 2(a0)
-; RV32-NEXT: srli a2, a2, 8
-; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: vse32.v v8, (a0)
; RV32-NEXT: addi a0, a0, 4
; RV32-NEXT: andi a2, a1, 8
; RV32-NEXT: beqz a2, .LBB23_42
; RV32-NEXT: .LBB23_101: # %cond.store103
; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32-NEXT: vslidedown.vi v8, v16, 3
-; RV32-NEXT: vmv.x.s a2, v8
-; RV32-NEXT: sb a2, 0(a0)
-; RV32-NEXT: srli a4, a2, 24
-; RV32-NEXT: sb a4, 3(a0)
-; RV32-NEXT: srli a4, a2, 16
-; RV32-NEXT: sb a4, 2(a0)
-; RV32-NEXT: srli a2, a2, 8
-; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: vse32.v v8, (a0)
; RV32-NEXT: addi a0, a0, 4
; RV32-NEXT: andi a2, a1, 16
; RV32-NEXT: beqz a2, .LBB23_43
; RV32-NEXT: .LBB23_102: # %cond.store106
; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32-NEXT: vslidedown.vi v8, v16, 4
-; RV32-NEXT: vmv.x.s a2, v8
-; RV32-NEXT: sb a2, 0(a0)
-; RV32-NEXT: srli a4, a2, 24
-; RV32-NEXT: sb a4, 3(a0)
-; RV32-NEXT: srli a4, a2, 16
-; RV32-NEXT: sb a4, 2(a0)
-; RV32-NEXT: srli a2, a2, 8
-; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: vse32.v v8, (a0)
; RV32-NEXT: addi a0, a0, 4
; RV32-NEXT: andi a2, a1, 32
; RV32-NEXT: beqz a2, .LBB23_44
; RV32-NEXT: .LBB23_103: # %cond.store109
; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32-NEXT: vslidedown.vi v8, v16, 5
-; RV32-NEXT: vmv.x.s a2, v8
-; RV32-NEXT: sb a2, 0(a0)
-; RV32-NEXT: srli a4, a2, 24
-; RV32-NEXT: sb a4, 3(a0)
-; RV32-NEXT: srli a4, a2, 16
-; RV32-NEXT: sb a4, 2(a0)
-; RV32-NEXT: srli a2, a2, 8
-; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: vse32.v v8, (a0)
; RV32-NEXT: addi a0, a0, 4
; RV32-NEXT: andi a2, a1, 64
; RV32-NEXT: beqz a2, .LBB23_45
; RV32-NEXT: .LBB23_104: # %cond.store112
; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32-NEXT: vslidedown.vi v8, v16, 6
-; RV32-NEXT: vmv.x.s a2, v8
-; RV32-NEXT: sb a2, 0(a0)
-; RV32-NEXT: srli a4, a2, 24
-; RV32-NEXT: sb a4, 3(a0)
-; RV32-NEXT: srli a4, a2, 16
-; RV32-NEXT: sb a4, 2(a0)
-; RV32-NEXT: srli a2, a2, 8
-; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: vse32.v v8, (a0)
; RV32-NEXT: addi a0, a0, 4
; RV32-NEXT: andi a2, a1, 128
; RV32-NEXT: beqz a2, .LBB23_46
; RV32-NEXT: .LBB23_105: # %cond.store115
; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32-NEXT: vslidedown.vi v8, v16, 7
-; RV32-NEXT: vmv.x.s a2, v8
-; RV32-NEXT: sb a2, 0(a0)
-; RV32-NEXT: srli a4, a2, 24
-; RV32-NEXT: sb a4, 3(a0)
-; RV32-NEXT: srli a4, a2, 16
-; RV32-NEXT: sb a4, 2(a0)
-; RV32-NEXT: srli a2, a2, 8
-; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: vse32.v v8, (a0)
; RV32-NEXT: addi a0, a0, 4
; RV32-NEXT: andi a2, a1, 256
; RV32-NEXT: beqz a2, .LBB23_47
@@ -15292,13 +13994,7 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
; RV32-NEXT: vse32.v v16, (a4)
; RV32-NEXT: lw a2, 1116(a3)
-; RV32-NEXT: sb a2, 0(a0)
-; RV32-NEXT: srli a4, a2, 24
-; RV32-NEXT: sb a4, 3(a0)
-; RV32-NEXT: srli a4, a2, 16
-; RV32-NEXT: sb a4, 2(a0)
-; RV32-NEXT: srli a2, a2, 8
-; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: sw a2, 0(a0)
; RV32-NEXT: addi a0, a0, 4
; RV32-NEXT: andi a2, a1, 512
; RV32-NEXT: beqz a2, .LBB23_48
@@ -15309,13 +14005,7 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
; RV32-NEXT: vse32.v v16, (a4)
; RV32-NEXT: lw a2, 992(a3)
-; RV32-NEXT: sb a2, 0(a0)
-; RV32-NEXT: srli a4, a2, 24
-; RV32-NEXT: sb a4, 3(a0)
-; RV32-NEXT: srli a4, a2, 16
-; RV32-NEXT: sb a4, 2(a0)
-; RV32-NEXT: srli a2, a2, 8
-; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: sw a2, 0(a0)
; RV32-NEXT: addi a0, a0, 4
; RV32-NEXT: andi a2, a1, 1024
; RV32-NEXT: beqz a2, .LBB23_49
@@ -15326,13 +14016,7 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
; RV32-NEXT: vse32.v v16, (a4)
; RV32-NEXT: lw a2, 868(a3)
-; RV32-NEXT: sb a2, 0(a0)
-; RV32-NEXT: srli a4, a2, 24
-; RV32-NEXT: sb a4, 3(a0)
-; RV32-NEXT: srli a4, a2, 16
-; RV32-NEXT: sb a4, 2(a0)
-; RV32-NEXT: srli a2, a2, 8
-; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: sw a2, 0(a0)
; RV32-NEXT: addi a0, a0, 4
; RV32-NEXT: slli a2, a1, 20
; RV32-NEXT: bgez a2, .LBB23_50
@@ -15343,13 +14027,7 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
; RV32-NEXT: vse32.v v16, (a4)
; RV32-NEXT: lw a2, 744(a3)
-; RV32-NEXT: sb a2, 0(a0)
-; RV32-NEXT: srli a4, a2, 24
-; RV32-NEXT: sb a4, 3(a0)
-; RV32-NEXT: srli a4, a2, 16
-; RV32-NEXT: sb a4, 2(a0)
-; RV32-NEXT: srli a2, a2, 8
-; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: sw a2, 0(a0)
; RV32-NEXT: addi a0, a0, 4
; RV32-NEXT: slli a2, a1, 19
; RV32-NEXT: bgez a2, .LBB23_51
@@ -15360,13 +14038,7 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
; RV32-NEXT: vse32.v v16, (a4)
; RV32-NEXT: lw a2, 620(a3)
-; RV32-NEXT: sb a2, 0(a0)
-; RV32-NEXT: srli a4, a2, 24
-; RV32-NEXT: sb a4, 3(a0)
-; RV32-NEXT: srli a4, a2, 16
-; RV32-NEXT: sb a4, 2(a0)
-; RV32-NEXT: srli a2, a2, 8
-; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: sw a2, 0(a0)
; RV32-NEXT: addi a0, a0, 4
; RV32-NEXT: slli a2, a1, 18
; RV32-NEXT: bgez a2, .LBB23_52
@@ -15377,13 +14049,7 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
; RV32-NEXT: vse32.v v16, (a4)
; RV32-NEXT: lw a2, 496(a3)
-; RV32-NEXT: sb a2, 0(a0)
-; RV32-NEXT: srli a4, a2, 24
-; RV32-NEXT: sb a4, 3(a0)
-; RV32-NEXT: srli a4, a2, 16
-; RV32-NEXT: sb a4, 2(a0)
-; RV32-NEXT: srli a2, a2, 8
-; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: sw a2, 0(a0)
; RV32-NEXT: addi a0, a0, 4
; RV32-NEXT: slli a2, a1, 17
; RV32-NEXT: bgez a2, .LBB23_53
@@ -15394,13 +14060,7 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
; RV32-NEXT: vse32.v v16, (a4)
; RV32-NEXT: lw a2, 372(a3)
-; RV32-NEXT: sb a2, 0(a0)
-; RV32-NEXT: srli a4, a2, 24
-; RV32-NEXT: sb a4, 3(a0)
-; RV32-NEXT: srli a4, a2, 16
-; RV32-NEXT: sb a4, 2(a0)
-; RV32-NEXT: srli a2, a2, 8
-; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: sw a2, 0(a0)
; RV32-NEXT: addi a0, a0, 4
; RV32-NEXT: slli a2, a1, 16
; RV32-NEXT: bgez a2, .LBB23_54
@@ -15411,13 +14071,7 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
; RV32-NEXT: vse32.v v16, (a4)
; RV32-NEXT: lw a2, 248(a3)
-; RV32-NEXT: sb a2, 0(a0)
-; RV32-NEXT: srli a4, a2, 24
-; RV32-NEXT: sb a4, 3(a0)
-; RV32-NEXT: srli a4, a2, 16
-; RV32-NEXT: sb a4, 2(a0)
-; RV32-NEXT: srli a2, a2, 8
-; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: sw a2, 0(a0)
; RV32-NEXT: addi a0, a0, 4
; RV32-NEXT: slli a2, a1, 15
; RV32-NEXT: bgez a2, .LBB23_55
@@ -15428,13 +14082,7 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
; RV32-NEXT: vse32.v v16, (a4)
; RV32-NEXT: lw a2, 124(a3)
-; RV32-NEXT: sb a2, 0(a0)
-; RV32-NEXT: srli a4, a2, 24
-; RV32-NEXT: sb a4, 3(a0)
-; RV32-NEXT: srli a4, a2, 16
-; RV32-NEXT: sb a4, 2(a0)
-; RV32-NEXT: srli a2, a2, 8
-; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: sw a2, 0(a0)
; RV32-NEXT: addi a0, a0, 4
; RV32-NEXT: slli a2, a1, 14
; RV32-NEXT: bgez a2, .LBB23_56
@@ -15444,13 +14092,7 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
; RV32-NEXT: vse32.v v16, (a4)
; RV32-NEXT: lw a2, 0(a3)
-; RV32-NEXT: sb a2, 0(a0)
-; RV32-NEXT: srli a3, a2, 24
-; RV32-NEXT: sb a3, 3(a0)
-; RV32-NEXT: srli a3, a2, 16
-; RV32-NEXT: sb a3, 2(a0)
-; RV32-NEXT: srli a2, a2, 8
-; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: sw a2, 0(a0)
; RV32-NEXT: addi a0, a0, 4
; RV32-NEXT: slli a2, a1, 13
; RV32-NEXT: bgez a2, .LBB23_57
@@ -15460,13 +14102,7 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
; RV32-NEXT: vse32.v v16, (a3)
; RV32-NEXT: lw a2, 1864(sp)
-; RV32-NEXT: sb a2, 0(a0)
-; RV32-NEXT: srli a3, a2, 24
-; RV32-NEXT: sb a3, 3(a0)
-; RV32-NEXT: srli a3, a2, 16
-; RV32-NEXT: sb a3, 2(a0)
-; RV32-NEXT: srli a2, a2, 8
-; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: sw a2, 0(a0)
; RV32-NEXT: addi a0, a0, 4
; RV32-NEXT: slli a2, a1, 12
; RV32-NEXT: bgez a2, .LBB23_58
@@ -15476,13 +14112,7 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
; RV32-NEXT: vse32.v v16, (a3)
; RV32-NEXT: lw a2, 1740(sp)
-; RV32-NEXT: sb a2, 0(a0)
-; RV32-NEXT: srli a3, a2, 24
-; RV32-NEXT: sb a3, 3(a0)
-; RV32-NEXT: srli a3, a2, 16
-; RV32-NEXT: sb a3, 2(a0)
-; RV32-NEXT: srli a2, a2, 8
-; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: sw a2, 0(a0)
; RV32-NEXT: addi a0, a0, 4
; RV32-NEXT: slli a2, a1, 11
; RV32-NEXT: bgez a2, .LBB23_59
@@ -15492,13 +14122,7 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
; RV32-NEXT: vse32.v v16, (a3)
; RV32-NEXT: lw a2, 1616(sp)
-; RV32-NEXT: sb a2, 0(a0)
-; RV32-NEXT: srli a3, a2, 24
-; RV32-NEXT: sb a3, 3(a0)
-; RV32-NEXT: srli a3, a2, 16
-; RV32-NEXT: sb a3, 2(a0)
-; RV32-NEXT: srli a2, a2, 8
-; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: sw a2, 0(a0)
; RV32-NEXT: addi a0, a0, 4
; RV32-NEXT: slli a2, a1, 10
; RV32-NEXT: bgez a2, .LBB23_60
@@ -15508,13 +14132,7 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
; RV32-NEXT: vse32.v v16, (a3)
; RV32-NEXT: lw a2, 1492(sp)
-; RV32-NEXT: sb a2, 0(a0)
-; RV32-NEXT: srli a3, a2, 24
-; RV32-NEXT: sb a3, 3(a0)
-; RV32-NEXT: srli a3, a2, 16
-; RV32-NEXT: sb a3, 2(a0)
-; RV32-NEXT: srli a2, a2, 8
-; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: sw a2, 0(a0)
; RV32-NEXT: addi a0, a0, 4
; RV32-NEXT: slli a2, a1, 9
; RV32-NEXT: bgez a2, .LBB23_61
@@ -15524,13 +14142,7 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
; RV32-NEXT: vse32.v v16, (a3)
; RV32-NEXT: lw a2, 1368(sp)
-; RV32-NEXT: sb a2, 0(a0)
-; RV32-NEXT: srli a3, a2, 24
-; RV32-NEXT: sb a3, 3(a0)
-; RV32-NEXT: srli a3, a2, 16
-; RV32-NEXT: sb a3, 2(a0)
-; RV32-NEXT: srli a2, a2, 8
-; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: sw a2, 0(a0)
; RV32-NEXT: addi a0, a0, 4
; RV32-NEXT: slli a2, a1, 8
; RV32-NEXT: bgez a2, .LBB23_62
@@ -15540,13 +14152,7 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
; RV32-NEXT: vse32.v v16, (a3)
; RV32-NEXT: lw a2, 1244(sp)
-; RV32-NEXT: sb a2, 0(a0)
-; RV32-NEXT: srli a3, a2, 24
-; RV32-NEXT: sb a3, 3(a0)
-; RV32-NEXT: srli a3, a2, 16
-; RV32-NEXT: sb a3, 2(a0)
-; RV32-NEXT: srli a2, a2, 8
-; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: sw a2, 0(a0)
; RV32-NEXT: addi a0, a0, 4
; RV32-NEXT: slli a2, a1, 7
; RV32-NEXT: bgez a2, .LBB23_63
@@ -15556,13 +14162,7 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
; RV32-NEXT: vse32.v v16, (a3)
; RV32-NEXT: lw a2, 1120(sp)
-; RV32-NEXT: sb a2, 0(a0)
-; RV32-NEXT: srli a3, a2, 24
-; RV32-NEXT: sb a3, 3(a0)
-; RV32-NEXT: srli a3, a2, 16
-; RV32-NEXT: sb a3, 2(a0)
-; RV32-NEXT: srli a2, a2, 8
-; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: sw a2, 0(a0)
; RV32-NEXT: addi a0, a0, 4
; RV32-NEXT: slli a2, a1, 6
; RV32-NEXT: bgez a2, .LBB23_64
@@ -15572,13 +14172,7 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
; RV32-NEXT: vse32.v v16, (a3)
; RV32-NEXT: lw a2, 996(sp)
-; RV32-NEXT: sb a2, 0(a0)
-; RV32-NEXT: srli a3, a2, 24
-; RV32-NEXT: sb a3, 3(a0)
-; RV32-NEXT: srli a3, a2, 16
-; RV32-NEXT: sb a3, 2(a0)
-; RV32-NEXT: srli a2, a2, 8
-; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: sw a2, 0(a0)
; RV32-NEXT: addi a0, a0, 4
; RV32-NEXT: slli a2, a1, 5
; RV32-NEXT: bgez a2, .LBB23_65
@@ -15588,13 +14182,7 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
; RV32-NEXT: vse32.v v16, (a3)
; RV32-NEXT: lw a2, 872(sp)
-; RV32-NEXT: sb a2, 0(a0)
-; RV32-NEXT: srli a3, a2, 24
-; RV32-NEXT: sb a3, 3(a0)
-; RV32-NEXT: srli a3, a2, 16
-; RV32-NEXT: sb a3, 2(a0)
-; RV32-NEXT: srli a2, a2, 8
-; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: sw a2, 0(a0)
; RV32-NEXT: addi a0, a0, 4
; RV32-NEXT: slli a2, a1, 4
; RV32-NEXT: bgez a2, .LBB23_66
@@ -15604,13 +14192,7 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
; RV32-NEXT: vse32.v v16, (a3)
; RV32-NEXT: lw a2, 748(sp)
-; RV32-NEXT: sb a2, 0(a0)
-; RV32-NEXT: srli a3, a2, 24
-; RV32-NEXT: sb a3, 3(a0)
-; RV32-NEXT: srli a3, a2, 16
-; RV32-NEXT: sb a3, 2(a0)
-; RV32-NEXT: srli a2, a2, 8
-; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: sw a2, 0(a0)
; RV32-NEXT: addi a0, a0, 4
; RV32-NEXT: slli a2, a1, 3
; RV32-NEXT: bgez a2, .LBB23_67
@@ -15620,13 +14202,7 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
; RV32-NEXT: vse32.v v16, (a3)
; RV32-NEXT: lw a2, 624(sp)
-; RV32-NEXT: sb a2, 0(a0)
-; RV32-NEXT: srli a3, a2, 24
-; RV32-NEXT: sb a3, 3(a0)
-; RV32-NEXT: srli a3, a2, 16
-; RV32-NEXT: sb a3, 2(a0)
-; RV32-NEXT: srli a2, a2, 8
-; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: sw a2, 0(a0)
; RV32-NEXT: addi a0, a0, 4
; RV32-NEXT: slli a2, a1, 2
; RV32-NEXT: bgez a2, .LBB23_68
@@ -15636,13 +14212,7 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
; RV32-NEXT: vse32.v v16, (a3)
; RV32-NEXT: lw a2, 500(sp)
-; RV32-NEXT: sb a2, 0(a0)
-; RV32-NEXT: srli a3, a2, 24
-; RV32-NEXT: sb a3, 3(a0)
-; RV32-NEXT: srli a3, a2, 16
-; RV32-NEXT: sb a3, 2(a0)
-; RV32-NEXT: srli a2, a2, 8
-; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: sw a2, 0(a0)
; RV32-NEXT: addi a0, a0, 4
; RV32-NEXT: slli a2, a1, 1
; RV32-NEXT: bgez a2, .LBB23_69
@@ -15652,13 +14222,7 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
; RV32-NEXT: vse32.v v16, (a3)
; RV32-NEXT: lw a2, 376(sp)
-; RV32-NEXT: sb a2, 0(a0)
-; RV32-NEXT: srli a3, a2, 24
-; RV32-NEXT: sb a3, 3(a0)
-; RV32-NEXT: srli a3, a2, 16
-; RV32-NEXT: sb a3, 2(a0)
-; RV32-NEXT: srli a2, a2, 8
-; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: sw a2, 0(a0)
; RV32-NEXT: addi a0, a0, 4
; RV32-NEXT: bltz a1, .LBB23_70
; RV32-NEXT: j .LBB23_71
@@ -15803,22 +14367,8 @@ define void @test_compresstore_i64_v32(ptr %p, <32 x i1> %mask, <32 x i64> %data
; RV64-NEXT: .LBB29_4: # %cond.store7
; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma
; RV64-NEXT: vslidedown.vi v24, v8, 3
-; RV64-NEXT: vmv.x.s a2, v24
-; RV64-NEXT: sb a2, 0(a0)
-; RV64-NEXT: srli a3, a2, 56
-; RV64-NEXT: sb a3, 7(a0)
-; RV64-NEXT: srli a3, a2, 48
-; RV64-NEXT: sb a3, 6(a0)
-; RV64-NEXT: srli a3, a2, 40
-; RV64-NEXT: sb a3, 5(a0)
-; RV64-NEXT: srli a3, a2, 32
-; RV64-NEXT: sb a3, 4(a0)
-; RV64-NEXT: srli a3, a2, 24
-; RV64-NEXT: sb a3, 3(a0)
-; RV64-NEXT: srli a3, a2, 16
-; RV64-NEXT: sb a3, 2(a0)
-; RV64-NEXT: srli a2, a2, 8
-; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vse64.v v24, (a0)
; RV64-NEXT: addi a0, a0, 8
; RV64-NEXT: .LBB29_5: # %else8
; RV64-NEXT: addi sp, sp, -2032
@@ -15917,21 +14467,7 @@ define void @test_compresstore_i64_v32(ptr %p, <32 x i1> %mask, <32 x i64> %data
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV64-NEXT: vse64.v v16, (a2)
; RV64-NEXT: ld a2, 368(sp)
-; RV64-NEXT: sb a2, 0(a0)
-; RV64-NEXT: srli a3, a2, 56
-; RV64-NEXT: sb a3, 7(a0)
-; RV64-NEXT: srli a3, a2, 48
-; RV64-NEXT: sb a3, 6(a0)
-; RV64-NEXT: srli a3, a2, 40
-; RV64-NEXT: sb a3, 5(a0)
-; RV64-NEXT: srli a3, a2, 32
-; RV64-NEXT: sb a3, 4(a0)
-; RV64-NEXT: srli a3, a2, 24
-; RV64-NEXT: sb a3, 3(a0)
-; RV64-NEXT: srli a3, a2, 16
-; RV64-NEXT: sb a3, 2(a0)
-; RV64-NEXT: srli a2, a2, 8
-; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: sd a2, 0(a0)
; RV64-NEXT: addi a0, a0, 8
; RV64-NEXT: .LBB29_33: # %else89
; RV64-NEXT: lui a2, 524288
@@ -15942,21 +14478,7 @@ define void @test_compresstore_i64_v32(ptr %p, <32 x i1> %mask, <32 x i64> %data
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV64-NEXT: vse64.v v16, (a1)
; RV64-NEXT: ld a1, 248(sp)
-; RV64-NEXT: sb a1, 0(a0)
-; RV64-NEXT: srli a2, a1, 56
-; RV64-NEXT: sb a2, 7(a0)
-; RV64-NEXT: srli a2, a1, 48
-; RV64-NEXT: sb a2, 6(a0)
-; RV64-NEXT: srli a2, a1, 40
-; RV64-NEXT: sb a2, 5(a0)
-; RV64-NEXT: srli a2, a1, 32
-; RV64-NEXT: sb a2, 4(a0)
-; RV64-NEXT: srli a2, a1, 24
-; RV64-NEXT: sb a2, 3(a0)
-; RV64-NEXT: srli a2, a1, 16
-; RV64-NEXT: sb a2, 2(a0)
-; RV64-NEXT: srli a1, a1, 8
-; RV64-NEXT: sb a1, 1(a0)
+; RV64-NEXT: sd a1, 0(a0)
; RV64-NEXT: .LBB29_35: # %else92
; RV64-NEXT: addi sp, s0, -2048
; RV64-NEXT: addi sp, sp, -1280
@@ -15966,67 +14488,23 @@ define void @test_compresstore_i64_v32(ptr %p, <32 x i1> %mask, <32 x i64> %data
; RV64-NEXT: addi sp, sp, 2032
; RV64-NEXT: ret
; RV64-NEXT: .LBB29_36: # %cond.store
-; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; RV64-NEXT: vmv.x.s a2, v8
-; RV64-NEXT: sb a2, 0(a0)
-; RV64-NEXT: srli a3, a2, 56
-; RV64-NEXT: sb a3, 7(a0)
-; RV64-NEXT: srli a3, a2, 48
-; RV64-NEXT: sb a3, 6(a0)
-; RV64-NEXT: srli a3, a2, 40
-; RV64-NEXT: sb a3, 5(a0)
-; RV64-NEXT: srli a3, a2, 32
-; RV64-NEXT: sb a3, 4(a0)
-; RV64-NEXT: srli a3, a2, 24
-; RV64-NEXT: sb a3, 3(a0)
-; RV64-NEXT: srli a3, a2, 16
-; RV64-NEXT: sb a3, 2(a0)
-; RV64-NEXT: srli a2, a2, 8
-; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vse64.v v8, (a0)
; RV64-NEXT: addi a0, a0, 8
; RV64-NEXT: andi a2, a1, 2
; RV64-NEXT: beqz a2, .LBB29_2
; RV64-NEXT: .LBB29_37: # %cond.store1
; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV64-NEXT: vslidedown.vi v24, v8, 1
-; RV64-NEXT: vmv.x.s a2, v24
-; RV64-NEXT: sb a2, 0(a0)
-; RV64-NEXT: srli a3, a2, 56
-; RV64-NEXT: sb a3, 7(a0)
-; RV64-NEXT: srli a3, a2, 48
-; RV64-NEXT: sb a3, 6(a0)
-; RV64-NEXT: srli a3, a2, 40
-; RV64-NEXT: sb a3, 5(a0)
-; RV64-NEXT: srli a3, a2, 32
-; RV64-NEXT: sb a3, 4(a0)
-; RV64-NEXT: srli a3, a2, 24
-; RV64-NEXT: sb a3, 3(a0)
-; RV64-NEXT: srli a3, a2, 16
-; RV64-NEXT: sb a3, 2(a0)
-; RV64-NEXT: srli a2, a2, 8
-; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: vse64.v v24, (a0)
; RV64-NEXT: addi a0, a0, 8
; RV64-NEXT: andi a2, a1, 4
; RV64-NEXT: beqz a2, .LBB29_3
; RV64-NEXT: .LBB29_38: # %cond.store4
; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma
; RV64-NEXT: vslidedown.vi v24, v8, 2
-; RV64-NEXT: vmv.x.s a2, v24
-; RV64-NEXT: sb a2, 0(a0)
-; RV64-NEXT: srli a3, a2, 56
-; RV64-NEXT: sb a3, 7(a0)
-; RV64-NEXT: srli a3, a2, 48
-; RV64-NEXT: sb a3, 6(a0)
-; RV64-NEXT: srli a3, a2, 40
-; RV64-NEXT: sb a3, 5(a0)
-; RV64-NEXT: srli a3, a2, 32
-; RV64-NEXT: sb a3, 4(a0)
-; RV64-NEXT: srli a3, a2, 24
-; RV64-NEXT: sb a3, 3(a0)
-; RV64-NEXT: srli a3, a2, 16
-; RV64-NEXT: sb a3, 2(a0)
-; RV64-NEXT: srli a2, a2, 8
-; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vse64.v v24, (a0)
; RV64-NEXT: addi a0, a0, 8
; RV64-NEXT: andi a2, a1, 8
; RV64-NEXT: bnez a2, .LBB29_4
@@ -16037,21 +14515,7 @@ define void @test_compresstore_i64_v32(ptr %p, <32 x i1> %mask, <32 x i64> %data
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV64-NEXT: vse64.v v8, (a3)
; RV64-NEXT: ld a3, 1080(a2)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a4, a3, 56
-; RV64-NEXT: sb a4, 7(a0)
-; RV64-NEXT: srli a4, a3, 48
-; RV64-NEXT: sb a4, 6(a0)
-; RV64-NEXT: srli a4, a3, 40
-; RV64-NEXT: sb a4, 5(a0)
-; RV64-NEXT: srli a4, a3, 32
-; RV64-NEXT: sb a4, 4(a0)
-; RV64-NEXT: srli a4, a3, 24
-; RV64-NEXT: sb a4, 3(a0)
-; RV64-NEXT: srli a4, a3, 16
-; RV64-NEXT: sb a4, 2(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sd a3, 0(a0)
; RV64-NEXT: addi a0, a0, 8
; RV64-NEXT: andi a3, a1, 32
; RV64-NEXT: beqz a3, .LBB29_7
@@ -16061,21 +14525,7 @@ define void @test_compresstore_i64_v32(ptr %p, <32 x i1> %mask, <32 x i64> %data
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV64-NEXT: vse64.v v8, (a3)
; RV64-NEXT: ld a3, 960(a2)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a4, a3, 56
-; RV64-NEXT: sb a4, 7(a0)
-; RV64-NEXT: srli a4, a3, 48
-; RV64-NEXT: sb a4, 6(a0)
-; RV64-NEXT: srli a4, a3, 40
-; RV64-NEXT: sb a4, 5(a0)
-; RV64-NEXT: srli a4, a3, 32
-; RV64-NEXT: sb a4, 4(a0)
-; RV64-NEXT: srli a4, a3, 24
-; RV64-NEXT: sb a4, 3(a0)
-; RV64-NEXT: srli a4, a3, 16
-; RV64-NEXT: sb a4, 2(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sd a3, 0(a0)
; RV64-NEXT: addi a0, a0, 8
; RV64-NEXT: andi a3, a1, 64
; RV64-NEXT: beqz a3, .LBB29_8
@@ -16085,21 +14535,7 @@ define void @test_compresstore_i64_v32(ptr %p, <32 x i1> %mask, <32 x i64> %data
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV64-NEXT: vse64.v v8, (a3)
; RV64-NEXT: ld a3, 840(a2)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a4, a3, 56
-; RV64-NEXT: sb a4, 7(a0)
-; RV64-NEXT: srli a4, a3, 48
-; RV64-NEXT: sb a4, 6(a0)
-; RV64-NEXT: srli a4, a3, 40
-; RV64-NEXT: sb a4, 5(a0)
-; RV64-NEXT: srli a4, a3, 32
-; RV64-NEXT: sb a4, 4(a0)
-; RV64-NEXT: srli a4, a3, 24
-; RV64-NEXT: sb a4, 3(a0)
-; RV64-NEXT: srli a4, a3, 16
-; RV64-NEXT: sb a4, 2(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sd a3, 0(a0)
; RV64-NEXT: addi a0, a0, 8
; RV64-NEXT: andi a3, a1, 128
; RV64-NEXT: beqz a3, .LBB29_9
@@ -16109,21 +14545,7 @@ define void @test_compresstore_i64_v32(ptr %p, <32 x i1> %mask, <32 x i64> %data
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV64-NEXT: vse64.v v8, (a3)
; RV64-NEXT: ld a3, 720(a2)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a4, a3, 56
-; RV64-NEXT: sb a4, 7(a0)
-; RV64-NEXT: srli a4, a3, 48
-; RV64-NEXT: sb a4, 6(a0)
-; RV64-NEXT: srli a4, a3, 40
-; RV64-NEXT: sb a4, 5(a0)
-; RV64-NEXT: srli a4, a3, 32
-; RV64-NEXT: sb a4, 4(a0)
-; RV64-NEXT: srli a4, a3, 24
-; RV64-NEXT: sb a4, 3(a0)
-; RV64-NEXT: srli a4, a3, 16
-; RV64-NEXT: sb a4, 2(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sd a3, 0(a0)
; RV64-NEXT: addi a0, a0, 8
; RV64-NEXT: andi a3, a1, 256
; RV64-NEXT: beqz a3, .LBB29_10
@@ -16133,21 +14555,7 @@ define void @test_compresstore_i64_v32(ptr %p, <32 x i1> %mask, <32 x i64> %data
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV64-NEXT: vse64.v v8, (a3)
; RV64-NEXT: ld a3, 600(a2)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a4, a3, 56
-; RV64-NEXT: sb a4, 7(a0)
-; RV64-NEXT: srli a4, a3, 48
-; RV64-NEXT: sb a4, 6(a0)
-; RV64-NEXT: srli a4, a3, 40
-; RV64-NEXT: sb a4, 5(a0)
-; RV64-NEXT: srli a4, a3, 32
-; RV64-NEXT: sb a4, 4(a0)
-; RV64-NEXT: srli a4, a3, 24
-; RV64-NEXT: sb a4, 3(a0)
-; RV64-NEXT: srli a4, a3, 16
-; RV64-NEXT: sb a4, 2(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sd a3, 0(a0)
; RV64-NEXT: addi a0, a0, 8
; RV64-NEXT: andi a3, a1, 512
; RV64-NEXT: beqz a3, .LBB29_11
@@ -16157,21 +14565,7 @@ define void @test_compresstore_i64_v32(ptr %p, <32 x i1> %mask, <32 x i64> %data
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV64-NEXT: vse64.v v8, (a3)
; RV64-NEXT: ld a3, 480(a2)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a4, a3, 56
-; RV64-NEXT: sb a4, 7(a0)
-; RV64-NEXT: srli a4, a3, 48
-; RV64-NEXT: sb a4, 6(a0)
-; RV64-NEXT: srli a4, a3, 40
-; RV64-NEXT: sb a4, 5(a0)
-; RV64-NEXT: srli a4, a3, 32
-; RV64-NEXT: sb a4, 4(a0)
-; RV64-NEXT: srli a4, a3, 24
-; RV64-NEXT: sb a4, 3(a0)
-; RV64-NEXT: srli a4, a3, 16
-; RV64-NEXT: sb a4, 2(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sd a3, 0(a0)
; RV64-NEXT: addi a0, a0, 8
; RV64-NEXT: andi a3, a1, 1024
; RV64-NEXT: beqz a3, .LBB29_12
@@ -16181,21 +14575,7 @@ define void @test_compresstore_i64_v32(ptr %p, <32 x i1> %mask, <32 x i64> %data
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV64-NEXT: vse64.v v8, (a3)
; RV64-NEXT: ld a3, 360(a2)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a4, a3, 56
-; RV64-NEXT: sb a4, 7(a0)
-; RV64-NEXT: srli a4, a3, 48
-; RV64-NEXT: sb a4, 6(a0)
-; RV64-NEXT: srli a4, a3, 40
-; RV64-NEXT: sb a4, 5(a0)
-; RV64-NEXT: srli a4, a3, 32
-; RV64-NEXT: sb a4, 4(a0)
-; RV64-NEXT: srli a4, a3, 24
-; RV64-NEXT: sb a4, 3(a0)
-; RV64-NEXT: srli a4, a3, 16
-; RV64-NEXT: sb a4, 2(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sd a3, 0(a0)
; RV64-NEXT: addi a0, a0, 8
; RV64-NEXT: slli a3, a1, 52
; RV64-NEXT: bgez a3, .LBB29_13
@@ -16205,21 +14585,7 @@ define void @test_compresstore_i64_v32(ptr %p, <32 x i1> %mask, <32 x i64> %data
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV64-NEXT: vse64.v v8, (a3)
; RV64-NEXT: ld a3, 240(a2)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a4, a3, 56
-; RV64-NEXT: sb a4, 7(a0)
-; RV64-NEXT: srli a4, a3, 48
-; RV64-NEXT: sb a4, 6(a0)
-; RV64-NEXT: srli a4, a3, 40
-; RV64-NEXT: sb a4, 5(a0)
-; RV64-NEXT: srli a4, a3, 32
-; RV64-NEXT: sb a4, 4(a0)
-; RV64-NEXT: srli a4, a3, 24
-; RV64-NEXT: sb a4, 3(a0)
-; RV64-NEXT: srli a4, a3, 16
-; RV64-NEXT: sb a4, 2(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sd a3, 0(a0)
; RV64-NEXT: addi a0, a0, 8
; RV64-NEXT: slli a3, a1, 51
; RV64-NEXT: bgez a3, .LBB29_14
@@ -16229,21 +14595,7 @@ define void @test_compresstore_i64_v32(ptr %p, <32 x i1> %mask, <32 x i64> %data
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV64-NEXT: vse64.v v8, (a3)
; RV64-NEXT: ld a3, 120(a2)
-; RV64-NEXT: sb a3, 0(a0)
-; RV64-NEXT: srli a4, a3, 56
-; RV64-NEXT: sb a4, 7(a0)
-; RV64-NEXT: srli a4, a3, 48
-; RV64-NEXT: sb a4, 6(a0)
-; RV64-NEXT: srli a4, a3, 40
-; RV64-NEXT: sb a4, 5(a0)
-; RV64-NEXT: srli a4, a3, 32
-; RV64-NEXT: sb a4, 4(a0)
-; RV64-NEXT: srli a4, a3, 24
-; RV64-NEXT: sb a4, 3(a0)
-; RV64-NEXT: srli a4, a3, 16
-; RV64-NEXT: sb a4, 2(a0)
-; RV64-NEXT: srli a3, a3, 8
-; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sd a3, 0(a0)
; RV64-NEXT: addi a0, a0, 8
; RV64-NEXT: slli a3, a1, 50
; RV64-NEXT: bgez a3, .LBB29_15
@@ -16252,21 +14604,7 @@ define void @test_compresstore_i64_v32(ptr %p, <32 x i1> %mask, <32 x i64> %data
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV64-NEXT: vse64.v v8, (a3)
; RV64-NEXT: ld a2, 0(a2)
-; RV64-NEXT: sb a2, 0(a0)
-; RV64-NEXT: srli a3, a2, 56
-; RV64-NEXT: sb a3, 7(a0)
-; RV64-NEXT: srli a3, a2, 48
-; RV64-NEXT: sb a3, 6(a0)
-; RV64-NEXT: srli a3, a2, 40
-; RV64-NEXT: sb a3, 5(a0)
-; RV64-NEXT: srli a3, a2, 32
-; RV64-NEXT: sb a3, 4(a0)
-; RV64-NEXT: srli a3, a2, 24
-; RV64-NEXT: sb a3, 3(a0)
-; RV64-NEXT: srli a3, a2, 16
-; RV64-NEXT: sb a3, 2(a0)
-; RV64-NEXT: srli a2, a2, 8
-; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: sd a2, 0(a0)
; RV64-NEXT: addi a0, a0, 8
; RV64-NEXT: slli a2, a1, 49
; RV64-NEXT: bgez a2, .LBB29_16
@@ -16275,21 +14613,7 @@ define void @test_compresstore_i64_v32(ptr %p, <32 x i1> %mask, <32 x i64> %data
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV64-NEXT: vse64.v v8, (a2)
; RV64-NEXT: ld a2, 1904(sp)
-; RV64-NEXT: sb a2, 0(a0)
-; RV64-NEXT: srli a3, a2, 56
-; RV64-NEXT: sb a3, 7(a0)
-; RV64-NEXT: srli a3, a2, 48
-; RV64-NEXT: sb a3, 6(a0)
-; RV64-NEXT: srli a3, a2, 40
-; RV64-NEXT: sb a3, 5(a0)
-; RV64-NEXT: srli a3, a2, 32
-; RV64-NEXT: sb a3, 4(a0)
-; RV64-NEXT: srli a3, a2, 24
-; RV64-NEXT: sb a3, 3(a0)
-; RV64-NEXT: srli a3, a2, 16
-; RV64-NEXT: sb a3, 2(a0)
-; RV64-NEXT: srli a2, a2, 8
-; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: sd a2, 0(a0)
; RV64-NEXT: addi a0, a0, 8
; RV64-NEXT: slli a2, a1, 48
; RV64-NEXT: bgez a2, .LBB29_17
@@ -16298,108 +14622,36 @@ define void @test_compresstore_i64_v32(ptr %p, <32 x i1> %mask, <32 x i64> %data
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV64-NEXT: vse64.v v8, (a2)
; RV64-NEXT: ld a2, 1784(sp)
-; RV64-NEXT: sb a2, 0(a0)
-; RV64-NEXT: srli a3, a2, 56
-; RV64-NEXT: sb a3, 7(a0)
-; RV64-NEXT: srli a3, a2, 48
-; RV64-NEXT: sb a3, 6(a0)
-; RV64-NEXT: srli a3, a2, 40
-; RV64-NEXT: sb a3, 5(a0)
-; RV64-NEXT: srli a3, a2, 32
-; RV64-NEXT: sb a3, 4(a0)
-; RV64-NEXT: srli a3, a2, 24
-; RV64-NEXT: sb a3, 3(a0)
-; RV64-NEXT: srli a3, a2, 16
-; RV64-NEXT: sb a3, 2(a0)
-; RV64-NEXT: srli a2, a2, 8
-; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: sd a2, 0(a0)
; RV64-NEXT: addi a0, a0, 8
; RV64-NEXT: slli a2, a1, 47
; RV64-NEXT: bgez a2, .LBB29_18
; RV64-NEXT: .LBB29_51: # %cond.store46
; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vmv.x.s a2, v16
-; RV64-NEXT: sb a2, 0(a0)
-; RV64-NEXT: srli a3, a2, 56
-; RV64-NEXT: sb a3, 7(a0)
-; RV64-NEXT: srli a3, a2, 48
-; RV64-NEXT: sb a3, 6(a0)
-; RV64-NEXT: srli a3, a2, 40
-; RV64-NEXT: sb a3, 5(a0)
-; RV64-NEXT: srli a3, a2, 32
-; RV64-NEXT: sb a3, 4(a0)
-; RV64-NEXT: srli a3, a2, 24
-; RV64-NEXT: sb a3, 3(a0)
-; RV64-NEXT: srli a3, a2, 16
-; RV64-NEXT: sb a3, 2(a0)
-; RV64-NEXT: srli a2, a2, 8
-; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: vse64.v v16, (a0)
; RV64-NEXT: addi a0, a0, 8
; RV64-NEXT: slli a2, a1, 46
; RV64-NEXT: bgez a2, .LBB29_19
; RV64-NEXT: .LBB29_52: # %cond.store49
; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV64-NEXT: vslidedown.vi v8, v16, 1
-; RV64-NEXT: vmv.x.s a2, v8
-; RV64-NEXT: sb a2, 0(a0)
-; RV64-NEXT: srli a3, a2, 56
-; RV64-NEXT: sb a3, 7(a0)
-; RV64-NEXT: srli a3, a2, 48
-; RV64-NEXT: sb a3, 6(a0)
-; RV64-NEXT: srli a3, a2, 40
-; RV64-NEXT: sb a3, 5(a0)
-; RV64-NEXT: srli a3, a2, 32
-; RV64-NEXT: sb a3, 4(a0)
-; RV64-NEXT: srli a3, a2, 24
-; RV64-NEXT: sb a3, 3(a0)
-; RV64-NEXT: srli a3, a2, 16
-; RV64-NEXT: sb a3, 2(a0)
-; RV64-NEXT: srli a2, a2, 8
-; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: vse64.v v8, (a0)
; RV64-NEXT: addi a0, a0, 8
; RV64-NEXT: slli a2, a1, 45
; RV64-NEXT: bgez a2, .LBB29_20
; RV64-NEXT: .LBB29_53: # %cond.store52
; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma
; RV64-NEXT: vslidedown.vi v8, v16, 2
-; RV64-NEXT: vmv.x.s a2, v8
-; RV64-NEXT: sb a2, 0(a0)
-; RV64-NEXT: srli a3, a2, 56
-; RV64-NEXT: sb a3, 7(a0)
-; RV64-NEXT: srli a3, a2, 48
-; RV64-NEXT: sb a3, 6(a0)
-; RV64-NEXT: srli a3, a2, 40
-; RV64-NEXT: sb a3, 5(a0)
-; RV64-NEXT: srli a3, a2, 32
-; RV64-NEXT: sb a3, 4(a0)
-; RV64-NEXT: srli a3, a2, 24
-; RV64-NEXT: sb a3, 3(a0)
-; RV64-NEXT: srli a3, a2, 16
-; RV64-NEXT: sb a3, 2(a0)
-; RV64-NEXT: srli a2, a2, 8
-; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vse64.v v8, (a0)
; RV64-NEXT: addi a0, a0, 8
; RV64-NEXT: slli a2, a1, 44
; RV64-NEXT: bgez a2, .LBB29_21
; RV64-NEXT: .LBB29_54: # %cond.store55
; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma
; RV64-NEXT: vslidedown.vi v8, v16, 3
-; RV64-NEXT: vmv.x.s a2, v8
-; RV64-NEXT: sb a2, 0(a0)
-; RV64-NEXT: srli a3, a2, 56
-; RV64-NEXT: sb a3, 7(a0)
-; RV64-NEXT: srli a3, a2, 48
-; RV64-NEXT: sb a3, 6(a0)
-; RV64-NEXT: srli a3, a2, 40
-; RV64-NEXT: sb a3, 5(a0)
-; RV64-NEXT: srli a3, a2, 32
-; RV64-NEXT: sb a3, 4(a0)
-; RV64-NEXT: srli a3, a2, 24
-; RV64-NEXT: sb a3, 3(a0)
-; RV64-NEXT: srli a3, a2, 16
-; RV64-NEXT: sb a3, 2(a0)
-; RV64-NEXT: srli a2, a2, 8
-; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vse64.v v8, (a0)
; RV64-NEXT: addi a0, a0, 8
; RV64-NEXT: slli a2, a1, 43
; RV64-NEXT: bgez a2, .LBB29_22
@@ -16408,21 +14660,7 @@ define void @test_compresstore_i64_v32(ptr %p, <32 x i1> %mask, <32 x i64> %data
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV64-NEXT: vse64.v v16, (a2)
; RV64-NEXT: ld a2, 1568(sp)
-; RV64-NEXT: sb a2, 0(a0)
-; RV64-NEXT: srli a3, a2, 56
-; RV64-NEXT: sb a3, 7(a0)
-; RV64-NEXT: srli a3, a2, 48
-; RV64-NEXT: sb a3, 6(a0)
-; RV64-NEXT: srli a3, a2, 40
-; RV64-NEXT: sb a3, 5(a0)
-; RV64-NEXT: srli a3, a2, 32
-; RV64-NEXT: sb a3, 4(a0)
-; RV64-NEXT: srli a3, a2, 24
-; RV64-NEXT: sb a3, 3(a0)
-; RV64-NEXT: srli a3, a2, 16
-; RV64-NEXT: sb a3, 2(a0)
-; RV64-NEXT: srli a2, a2, 8
-; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: sd a2, 0(a0)
; RV64-NEXT: addi a0, a0, 8
; RV64-NEXT: slli a2, a1, 42
; RV64-NEXT: bgez a2, .LBB29_23
@@ -16431,21 +14669,7 @@ define void @test_compresstore_i64_v32(ptr %p, <32 x i1> %mask, <32 x i64> %data
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV64-NEXT: vse64.v v16, (a2)
; RV64-NEXT: ld a2, 1448(sp)
-; RV64-NEXT: sb a2, 0(a0)
-; RV64-NEXT: srli a3, a2, 56
-; RV64-NEXT: sb a3, 7(a0)
-; RV64-NEXT: srli a3, a2, 48
-; RV64-NEXT: sb a3, 6(a0)
-; RV64-NEXT: srli a3, a2, 40
-; RV64-NEXT: sb a3, 5(a0)
-; RV64-NEXT: srli a3, a2, 32
-; RV64-NEXT: sb a3, 4(a0)
-; RV64-NEXT: srli a3, a2, 24
-; RV64-NEXT: sb a3, 3(a0)
-; RV64-NEXT: srli a3, a2, 16
-; RV64-NEXT: sb a3, 2(a0)
-; RV64-NEXT: srli a2, a2, 8
-; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: sd a2, 0(a0)
; RV64-NEXT: addi a0, a0, 8
; RV64-NEXT: slli a2, a1, 41
; RV64-NEXT: bgez a2, .LBB29_24
@@ -16454,21 +14678,7 @@ define void @test_compresstore_i64_v32(ptr %p, <32 x i1> %mask, <32 x i64> %data
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV64-NEXT: vse64.v v16, (a2)
; RV64-NEXT: ld a2, 1328(sp)
-; RV64-NEXT: sb a2, 0(a0)
-; RV64-NEXT: srli a3, a2, 56
-; RV64-NEXT: sb a3, 7(a0)
-; RV64-NEXT: srli a3, a2, 48
-; RV64-NEXT: sb a3, 6(a0)
-; RV64-NEXT: srli a3, a2, 40
-; RV64-NEXT: sb a3, 5(a0)
-; RV64-NEXT: srli a3, a2, 32
-; RV64-NEXT: sb a3, 4(a0)
-; RV64-NEXT: srli a3, a2, 24
-; RV64-NEXT: sb a3, 3(a0)
-; RV64-NEXT: srli a3, a2, 16
-; RV64-NEXT: sb a3, 2(a0)
-; RV64-NEXT: srli a2, a2, 8
-; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: sd a2, 0(a0)
; RV64-NEXT: addi a0, a0, 8
; RV64-NEXT: slli a2, a1, 40
; RV64-NEXT: bgez a2, .LBB29_25
@@ -16477,21 +14687,7 @@ define void @test_compresstore_i64_v32(ptr %p, <32 x i1> %mask, <32 x i64> %data
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV64-NEXT: vse64.v v16, (a2)
; RV64-NEXT: ld a2, 1208(sp)
-; RV64-NEXT: sb a2, 0(a0)
-; RV64-NEXT: srli a3, a2, 56
-; RV64-NEXT: sb a3, 7(a0)
-; RV64-NEXT: srli a3, a2, 48
-; RV64-NEXT: sb a3, 6(a0)
-; RV64-NEXT: srli a3, a2, 40
-; RV64-NEXT: sb a3, 5(a0)
-; RV64-NEXT: srli a3, a2, 32
-; RV64-NEXT: sb a3, 4(a0)
-; RV64-NEXT: srli a3, a2, 24
-; RV64-NEXT: sb a3, 3(a0)
-; RV64-NEXT: srli a3, a2, 16
-; RV64-NEXT: sb a3, 2(a0)
-; RV64-NEXT: srli a2, a2, 8
-; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: sd a2, 0(a0)
; RV64-NEXT: addi a0, a0, 8
; RV64-NEXT: slli a2, a1, 39
; RV64-NEXT: bgez a2, .LBB29_26
@@ -16500,21 +14696,7 @@ define void @test_compresstore_i64_v32(ptr %p, <32 x i1> %mask, <32 x i64> %data
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV64-NEXT: vse64.v v16, (a2)
; RV64-NEXT: ld a2, 1088(sp)
-; RV64-NEXT: sb a2, 0(a0)
-; RV64-NEXT: srli a3, a2, 56
-; RV64-NEXT: sb a3, 7(a0)
-; RV64-NEXT: srli a3, a2, 48
-; RV64-NEXT: sb a3, 6(a0)
-; RV64-NEXT: srli a3, a2, 40
-; RV64-NEXT: sb a3, 5(a0)
-; RV64-NEXT: srli a3, a2, 32
-; RV64-NEXT: sb a3, 4(a0)
-; RV64-NEXT: srli a3, a2, 24
-; RV64-NEXT: sb a3, 3(a0)
-; RV64-NEXT: srli a3, a2, 16
-; RV64-NEXT: sb a3, 2(a0)
-; RV64-NEXT: srli a2, a2, 8
-; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: sd a2, 0(a0)
; RV64-NEXT: addi a0, a0, 8
; RV64-NEXT: slli a2, a1, 38
; RV64-NEXT: bgez a2, .LBB29_27
@@ -16523,21 +14705,7 @@ define void @test_compresstore_i64_v32(ptr %p, <32 x i1> %mask, <32 x i64> %data
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV64-NEXT: vse64.v v16, (a2)
; RV64-NEXT: ld a2, 968(sp)
-; RV64-NEXT: sb a2, 0(a0)
-; RV64-NEXT: srli a3, a2, 56
-; RV64-NEXT: sb a3, 7(a0)
-; RV64-NEXT: srli a3, a2, 48
-; RV64-NEXT: sb a3, 6(a0)
-; RV64-NEXT: srli a3, a2, 40
-; RV64-NEXT: sb a3, 5(a0)
-; RV64-NEXT: srli a3, a2, 32
-; RV64-NEXT: sb a3, 4(a0)
-; RV64-NEXT: srli a3, a2, 24
-; RV64-NEXT: sb a3, 3(a0)
-; RV64-NEXT: srli a3, a2, 16
-; RV64-NEXT: sb a3, 2(a0)
-; RV64-NEXT: srli a2, a2, 8
-; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: sd a2, 0(a0)
; RV64-NEXT: addi a0, a0, 8
; RV64-NEXT: slli a2, a1, 37
; RV64-NEXT: bgez a2, .LBB29_28
@@ -16546,21 +14714,7 @@ define void @test_compresstore_i64_v32(ptr %p, <32 x i1> %mask, <32 x i64> %data
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV64-NEXT: vse64.v v16, (a2)
; RV64-NEXT: ld a2, 848(sp)
-; RV64-NEXT: sb a2, 0(a0)
-; RV64-NEXT: srli a3, a2, 56
-; RV64-NEXT: sb a3, 7(a0)
-; RV64-NEXT: srli a3, a2, 48
-; RV64-NEXT: sb a3, 6(a0)
-; RV64-NEXT: srli a3, a2, 40
-; RV64-NEXT: sb a3, 5(a0)
-; RV64-NEXT: srli a3, a2, 32
-; RV64-NEXT: sb a3, 4(a0)
-; RV64-NEXT: srli a3, a2, 24
-; RV64-NEXT: sb a3, 3(a0)
-; RV64-NEXT: srli a3, a2, 16
-; RV64-NEXT: sb a3, 2(a0)
-; RV64-NEXT: srli a2, a2, 8
-; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: sd a2, 0(a0)
; RV64-NEXT: addi a0, a0, 8
; RV64-NEXT: slli a2, a1, 36
; RV64-NEXT: bgez a2, .LBB29_29
@@ -16569,21 +14723,7 @@ define void @test_compresstore_i64_v32(ptr %p, <32 x i1> %mask, <32 x i64> %data
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV64-NEXT: vse64.v v16, (a2)
; RV64-NEXT: ld a2, 728(sp)
-; RV64-NEXT: sb a2, 0(a0)
-; RV64-NEXT: srli a3, a2, 56
-; RV64-NEXT: sb a3, 7(a0)
-; RV64-NEXT: srli a3, a2, 48
-; RV64-NEXT: sb a3, 6(a0)
-; RV64-NEXT: srli a3, a2, 40
-; RV64-NEXT: sb a3, 5(a0)
-; RV64-NEXT: srli a3, a2, 32
-; RV64-NEXT: sb a3, 4(a0)
-; RV64-NEXT: srli a3, a2, 24
-; RV64-NEXT: sb a3, 3(a0)
-; RV64-NEXT: srli a3, a2, 16
-; RV64-NEXT: sb a3, 2(a0)
-; RV64-NEXT: srli a2, a2, 8
-; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: sd a2, 0(a0)
; RV64-NEXT: addi a0, a0, 8
; RV64-NEXT: slli a2, a1, 35
; RV64-NEXT: bgez a2, .LBB29_30
@@ -16592,21 +14732,7 @@ define void @test_compresstore_i64_v32(ptr %p, <32 x i1> %mask, <32 x i64> %data
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV64-NEXT: vse64.v v16, (a2)
; RV64-NEXT: ld a2, 608(sp)
-; RV64-NEXT: sb a2, 0(a0)
-; RV64-NEXT: srli a3, a2, 56
-; RV64-NEXT: sb a3, 7(a0)
-; RV64-NEXT: srli a3, a2, 48
-; RV64-NEXT: sb a3, 6(a0)
-; RV64-NEXT: srli a3, a2, 40
-; RV64-NEXT: sb a3, 5(a0)
-; RV64-NEXT: srli a3, a2, 32
-; RV64-NEXT: sb a3, 4(a0)
-; RV64-NEXT: srli a3, a2, 24
-; RV64-NEXT: sb a3, 3(a0)
-; RV64-NEXT: srli a3, a2, 16
-; RV64-NEXT: sb a3, 2(a0)
-; RV64-NEXT: srli a2, a2, 8
-; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: sd a2, 0(a0)
; RV64-NEXT: addi a0, a0, 8
; RV64-NEXT: slli a2, a1, 34
; RV64-NEXT: bgez a2, .LBB29_31
@@ -16615,21 +14741,7 @@ define void @test_compresstore_i64_v32(ptr %p, <32 x i1> %mask, <32 x i64> %data
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV64-NEXT: vse64.v v16, (a2)
; RV64-NEXT: ld a2, 488(sp)
-; RV64-NEXT: sb a2, 0(a0)
-; RV64-NEXT: srli a3, a2, 56
-; RV64-NEXT: sb a3, 7(a0)
-; RV64-NEXT: srli a3, a2, 48
-; RV64-NEXT: sb a3, 6(a0)
-; RV64-NEXT: srli a3, a2, 40
-; RV64-NEXT: sb a3, 5(a0)
-; RV64-NEXT: srli a3, a2, 32
-; RV64-NEXT: sb a3, 4(a0)
-; RV64-NEXT: srli a3, a2, 24
-; RV64-NEXT: sb a3, 3(a0)
-; RV64-NEXT: srli a3, a2, 16
-; RV64-NEXT: sb a3, 2(a0)
-; RV64-NEXT: srli a2, a2, 8
-; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: sd a2, 0(a0)
; RV64-NEXT: addi a0, a0, 8
; RV64-NEXT: slli a2, a1, 33
; RV64-NEXT: bltz a2, .LBB29_32
@@ -16741,20 +14853,8 @@ define void @test_compresstore_i64_v32(ptr %p, <32 x i1> %mask, <32 x i64> %data
; RV32-NEXT: vsrl.vx v24, v8, a2
; RV32-NEXT: vmv.x.s a2, v24
; RV32-NEXT: vmv.x.s a3, v8
-; RV32-NEXT: sb a3, 0(a0)
-; RV32-NEXT: sb a2, 4(a0)
-; RV32-NEXT: srli a4, a3, 24
-; RV32-NEXT: sb a4, 3(a0)
-; RV32-NEXT: srli a4, a3, 16
-; RV32-NEXT: sb a4, 2(a0)
-; RV32-NEXT: srli a3, a3, 8
-; RV32-NEXT: sb a3, 1(a0)
-; RV32-NEXT: srli a3, a2, 24
-; RV32-NEXT: sb a3, 7(a0)
-; RV32-NEXT: srli a3, a2, 16
-; RV32-NEXT: sb a3, 6(a0)
-; RV32-NEXT: srli a2, a2, 8
-; RV32-NEXT: sb a2, 5(a0)
+; RV32-NEXT: sw a3, 0(a0)
+; RV32-NEXT: sw a2, 4(a0)
; RV32-NEXT: addi a0, a0, 8
; RV32-NEXT: andi a2, a1, 2
; RV32-NEXT: beqz a2, .LBB29_2
@@ -16765,20 +14865,8 @@ define void @test_compresstore_i64_v32(ptr %p, <32 x i1> %mask, <32 x i64> %data
; RV32-NEXT: vsrl.vx v0, v24, a2
; RV32-NEXT: vmv.x.s a2, v0
; RV32-NEXT: vmv.x.s a3, v24
-; RV32-NEXT: sb a3, 0(a0)
-; RV32-NEXT: sb a2, 4(a0)
-; RV32-NEXT: srli a4, a3, 24
-; RV32-NEXT: sb a4, 3(a0)
-; RV32-NEXT: srli a4, a3, 16
-; RV32-NEXT: sb a4, 2(a0)
-; RV32-NEXT: srli a3, a3, 8
-; RV32-NEXT: sb a3, 1(a0)
-; RV32-NEXT: srli a3, a2, 24
-; RV32-NEXT: sb a3, 7(a0)
-; RV32-NEXT: srli a3, a2, 16
-; RV32-NEXT: sb a3, 6(a0)
-; RV32-NEXT: srli a2, a2, 8
-; RV32-NEXT: sb a2, 5(a0)
+; RV32-NEXT: sw a3, 0(a0)
+; RV32-NEXT: sw a2, 4(a0)
; RV32-NEXT: addi a0, a0, 8
; RV32-NEXT: andi a2, a1, 4
; RV32-NEXT: beqz a2, .LBB29_3
@@ -16789,20 +14877,8 @@ define void @test_compresstore_i64_v32(ptr %p, <32 x i1> %mask, <32 x i64> %data
; RV32-NEXT: vsrl.vx v0, v24, a2
; RV32-NEXT: vmv.x.s a2, v0
; RV32-NEXT: vmv.x.s a3, v24
-; RV32-NEXT: sb a3, 0(a0)
-; RV32-NEXT: sb a2, 4(a0)
-; RV32-NEXT: srli a4, a3, 24
-; RV32-NEXT: sb a4, 3(a0)
-; RV32-NEXT: srli a4, a3, 16
-; RV32-NEXT: sb a4, 2(a0)
-; RV32-NEXT: srli a3, a3, 8
-; RV32-NEXT: sb a3, 1(a0)
-; RV32-NEXT: srli a3, a2, 24
-; RV32-NEXT: sb a3, 7(a0)
-; RV32-NEXT: srli a3, a2, 16
-; RV32-NEXT: sb a3, 6(a0)
-; RV32-NEXT: srli a2, a2, 8
-; RV32-NEXT: sb a2, 5(a0)
+; RV32-NEXT: sw a3, 0(a0)
+; RV32-NEXT: sw a2, 4(a0)
; RV32-NEXT: addi a0, a0, 8
; RV32-NEXT: andi a2, a1, 8
; RV32-NEXT: beqz a2, .LBB29_4
@@ -16813,20 +14889,8 @@ define void @test_compresstore_i64_v32(ptr %p, <32 x i1> %mask, <32 x i64> %data
; RV32-NEXT: vsrl.vx v0, v24, a2
; RV32-NEXT: vmv.x.s a2, v0
; RV32-NEXT: vmv.x.s a3, v24
-; RV32-NEXT: sb a3, 0(a0)
-; RV32-NEXT: sb a2, 4(a0)
-; RV32-NEXT: srli a4, a3, 24
-; RV32-NEXT: sb a4, 3(a0)
-; RV32-NEXT: srli a4, a3, 16
-; RV32-NEXT: sb a4, 2(a0)
-; RV32-NEXT: srli a3, a3, 8
-; RV32-NEXT: sb a3, 1(a0)
-; RV32-NEXT: srli a3, a2, 24
-; RV32-NEXT: sb a3, 7(a0)
-; RV32-NEXT: srli a3, a2, 16
-; RV32-NEXT: sb a3, 6(a0)
-; RV32-NEXT: srli a2, a2, 8
-; RV32-NEXT: sb a2, 5(a0)
+; RV32-NEXT: sw a3, 0(a0)
+; RV32-NEXT: sw a2, 4(a0)
; RV32-NEXT: addi a0, a0, 8
; RV32-NEXT: andi a2, a1, 16
; RV32-NEXT: beqz a2, .LBB29_5
@@ -16837,20 +14901,8 @@ define void @test_compresstore_i64_v32(ptr %p, <32 x i1> %mask, <32 x i64> %data
; RV32-NEXT: vsrl.vx v0, v24, a2
; RV32-NEXT: vmv.x.s a2, v0
; RV32-NEXT: vmv.x.s a3, v24
-; RV32-NEXT: sb a3, 0(a0)
-; RV32-NEXT: sb a2, 4(a0)
-; RV32-NEXT: srli a4, a3, 24
-; RV32-NEXT: sb a4, 3(a0)
-; RV32-NEXT: srli a4, a3, 16
-; RV32-NEXT: sb a4, 2(a0)
-; RV32-NEXT: srli a3, a3, 8
-; RV32-NEXT: sb a3, 1(a0)
-; RV32-NEXT: srli a3, a2, 24
-; RV32-NEXT: sb a3, 7(a0)
-; RV32-NEXT: srli a3, a2, 16
-; RV32-NEXT: sb a3, 6(a0)
-; RV32-NEXT: srli a2, a2, 8
-; RV32-NEXT: sb a2, 5(a0)
+; RV32-NEXT: sw a3, 0(a0)
+; RV32-NEXT: sw a2, 4(a0)
; RV32-NEXT: addi a0, a0, 8
; RV32-NEXT: andi a2, a1, 32
; RV32-NEXT: beqz a2, .LBB29_6
@@ -16861,20 +14913,8 @@ define void @test_compresstore_i64_v32(ptr %p, <32 x i1> %mask, <32 x i64> %data
; RV32-NEXT: vsrl.vx v0, v24, a2
; RV32-NEXT: vmv.x.s a2, v0
; RV32-NEXT: vmv.x.s a3, v24
-; RV32-NEXT: sb a3, 0(a0)
-; RV32-NEXT: sb a2, 4(a0)
-; RV32-NEXT: srli a4, a3, 24
-; RV32-NEXT: sb a4, 3(a0)
-; RV32-NEXT: srli a4, a3, 16
-; RV32-NEXT: sb a4, 2(a0)
-; RV32-NEXT: srli a3, a3, 8
-; RV32-NEXT: sb a3, 1(a0)
-; RV32-NEXT: srli a3, a2, 24
-; RV32-NEXT: sb a3, 7(a0)
-; RV32-NEXT: srli a3, a2, 16
-; RV32-NEXT: sb a3, 6(a0)
-; RV32-NEXT: srli a2, a2, 8
-; RV32-NEXT: sb a2, 5(a0)
+; RV32-NEXT: sw a3, 0(a0)
+; RV32-NEXT: sw a2, 4(a0)
; RV32-NEXT: addi a0, a0, 8
; RV32-NEXT: andi a2, a1, 64
; RV32-NEXT: beqz a2, .LBB29_7
@@ -16885,20 +14925,8 @@ define void @test_compresstore_i64_v32(ptr %p, <32 x i1> %mask, <32 x i64> %data
; RV32-NEXT: vsrl.vx v0, v24, a2
; RV32-NEXT: vmv.x.s a2, v0
; RV32-NEXT: vmv.x.s a3, v24
-; RV32-NEXT: sb a3, 0(a0)
-; RV32-NEXT: sb a2, 4(a0)
-; RV32-NEXT: srli a4, a3, 24
-; RV32-NEXT: sb a4, 3(a0)
-; RV32-NEXT: srli a4, a3, 16
-; RV32-NEXT: sb a4, 2(a0)
-; RV32-NEXT: srli a3, a3, 8
-; RV32-NEXT: sb a3, 1(a0)
-; RV32-NEXT: srli a3, a2, 24
-; RV32-NEXT: sb a3, 7(a0)
-; RV32-NEXT: srli a3, a2, 16
-; RV32-NEXT: sb a3, 6(a0)
-; RV32-NEXT: srli a2, a2, 8
-; RV32-NEXT: sb a2, 5(a0)
+; RV32-NEXT: sw a3, 0(a0)
+; RV32-NEXT: sw a2, 4(a0)
; RV32-NEXT: addi a0, a0, 8
; RV32-NEXT: andi a2, a1, 128
; RV32-NEXT: beqz a2, .LBB29_8
@@ -16909,20 +14937,8 @@ define void @test_compresstore_i64_v32(ptr %p, <32 x i1> %mask, <32 x i64> %data
; RV32-NEXT: vsrl.vx v0, v24, a2
; RV32-NEXT: vmv.x.s a2, v0
; RV32-NEXT: vmv.x.s a3, v24
-; RV32-NEXT: sb a3, 0(a0)
-; RV32-NEXT: sb a2, 4(a0)
-; RV32-NEXT: srli a4, a3, 24
-; RV32-NEXT: sb a4, 3(a0)
-; RV32-NEXT: srli a4, a3, 16
-; RV32-NEXT: sb a4, 2(a0)
-; RV32-NEXT: srli a3, a3, 8
-; RV32-NEXT: sb a3, 1(a0)
-; RV32-NEXT: srli a3, a2, 24
-; RV32-NEXT: sb a3, 7(a0)
-; RV32-NEXT: srli a3, a2, 16
-; RV32-NEXT: sb a3, 6(a0)
-; RV32-NEXT: srli a2, a2, 8
-; RV32-NEXT: sb a2, 5(a0)
+; RV32-NEXT: sw a3, 0(a0)
+; RV32-NEXT: sw a2, 4(a0)
; RV32-NEXT: addi a0, a0, 8
; RV32-NEXT: andi a2, a1, 256
; RV32-NEXT: beqz a2, .LBB29_9
@@ -16933,20 +14949,8 @@ define void @test_compresstore_i64_v32(ptr %p, <32 x i1> %mask, <32 x i64> %data
; RV32-NEXT: vsrl.vx v0, v24, a2
; RV32-NEXT: vmv.x.s a2, v0
; RV32-NEXT: vmv.x.s a3, v24
-; RV32-NEXT: sb a3, 0(a0)
-; RV32-NEXT: sb a2, 4(a0)
-; RV32-NEXT: srli a4, a3, 24
-; RV32-NEXT: sb a4, 3(a0)
-; RV32-NEXT: srli a4, a3, 16
-; RV32-NEXT: sb a4, 2(a0)
-; RV32-NEXT: srli a3, a3, 8
-; RV32-NEXT: sb a3, 1(a0)
-; RV32-NEXT: srli a3, a2, 24
-; RV32-NEXT: sb a3, 7(a0)
-; RV32-NEXT: srli a3, a2, 16
-; RV32-NEXT: sb a3, 6(a0)
-; RV32-NEXT: srli a2, a2, 8
-; RV32-NEXT: sb a2, 5(a0)
+; RV32-NEXT: sw a3, 0(a0)
+; RV32-NEXT: sw a2, 4(a0)
; RV32-NEXT: addi a0, a0, 8
; RV32-NEXT: andi a2, a1, 512
; RV32-NEXT: beqz a2, .LBB29_10
@@ -16957,20 +14961,8 @@ define void @test_compresstore_i64_v32(ptr %p, <32 x i1> %mask, <32 x i64> %data
; RV32-NEXT: vsrl.vx v0, v24, a2
; RV32-NEXT: vmv.x.s a2, v0
; RV32-NEXT: vmv.x.s a3, v24
-; RV32-NEXT: sb a3, 0(a0)
-; RV32-NEXT: sb a2, 4(a0)
-; RV32-NEXT: srli a4, a3, 24
-; RV32-NEXT: sb a4, 3(a0)
-; RV32-NEXT: srli a4, a3, 16
-; RV32-NEXT: sb a4, 2(a0)
-; RV32-NEXT: srli a3, a3, 8
-; RV32-NEXT: sb a3, 1(a0)
-; RV32-NEXT: srli a3, a2, 24
-; RV32-NEXT: sb a3, 7(a0)
-; RV32-NEXT: srli a3, a2, 16
-; RV32-NEXT: sb a3, 6(a0)
-; RV32-NEXT: srli a2, a2, 8
-; RV32-NEXT: sb a2, 5(a0)
+; RV32-NEXT: sw a3, 0(a0)
+; RV32-NEXT: sw a2, 4(a0)
; RV32-NEXT: addi a0, a0, 8
; RV32-NEXT: andi a2, a1, 1024
; RV32-NEXT: beqz a2, .LBB29_11
@@ -16981,20 +14973,8 @@ define void @test_compresstore_i64_v32(ptr %p, <32 x i1> %mask, <32 x i64> %data
; RV32-NEXT: vsrl.vx v0, v24, a2
; RV32-NEXT: vmv.x.s a2, v0
; RV32-NEXT: vmv.x.s a3, v24
-; RV32-NEXT: sb a3, 0(a0)
-; RV32-NEXT: sb a2, 4(a0)
-; RV32-NEXT: srli a4, a3, 24
-; RV32-NEXT: sb a4, 3(a0)
-; RV32-NEXT: srli a4, a3, 16
-; RV32-NEXT: sb a4, 2(a0)
-; RV32-NEXT: srli a3, a3, 8
-; RV32-NEXT: sb a3, 1(a0)
-; RV32-NEXT: srli a3, a2, 24
-; RV32-NEXT: sb a3, 7(a0)
-; RV32-NEXT: srli a3, a2, 16
-; RV32-NEXT: sb a3, 6(a0)
-; RV32-NEXT: srli a2, a2, 8
-; RV32-NEXT: sb a2, 5(a0)
+; RV32-NEXT: sw a3, 0(a0)
+; RV32-NEXT: sw a2, 4(a0)
; RV32-NEXT: addi a0, a0, 8
; RV32-NEXT: slli a2, a1, 20
; RV32-NEXT: bgez a2, .LBB29_12
@@ -17005,20 +14985,8 @@ define void @test_compresstore_i64_v32(ptr %p, <32 x i1> %mask, <32 x i64> %data
; RV32-NEXT: vsrl.vx v0, v24, a2
; RV32-NEXT: vmv.x.s a2, v0
; RV32-NEXT: vmv.x.s a3, v24
-; RV32-NEXT: sb a3, 0(a0)
-; RV32-NEXT: sb a2, 4(a0)
-; RV32-NEXT: srli a4, a3, 24
-; RV32-NEXT: sb a4, 3(a0)
-; RV32-NEXT: srli a4, a3, 16
-; RV32-NEXT: sb a4, 2(a0)
-; RV32-NEXT: srli a3, a3, 8
-; RV32-NEXT: sb a3, 1(a0)
-; RV32-NEXT: srli a3, a2, 24
-; RV32-NEXT: sb a3, 7(a0)
-; RV32-NEXT: srli a3, a2, 16
-; RV32-NEXT: sb a3, 6(a0)
-; RV32-NEXT: srli a2, a2, 8
-; RV32-NEXT: sb a2, 5(a0)
+; RV32-NEXT: sw a3, 0(a0)
+; RV32-NEXT: sw a2, 4(a0)
; RV32-NEXT: addi a0, a0, 8
; RV32-NEXT: slli a2, a1, 19
; RV32-NEXT: bgez a2, .LBB29_13
@@ -17029,20 +14997,8 @@ define void @test_compresstore_i64_v32(ptr %p, <32 x i1> %mask, <32 x i64> %data
; RV32-NEXT: vsrl.vx v0, v24, a2
; RV32-NEXT: vmv.x.s a2, v0
; RV32-NEXT: vmv.x.s a3, v24
-; RV32-NEXT: sb a3, 0(a0)
-; RV32-NEXT: sb a2, 4(a0)
-; RV32-NEXT: srli a4, a3, 24
-; RV32-NEXT: sb a4, 3(a0)
-; RV32-NEXT: srli a4, a3, 16
-; RV32-NEXT: sb a4, 2(a0)
-; RV32-NEXT: srli a3, a3, 8
-; RV32-NEXT: sb a3, 1(a0)
-; RV32-NEXT: srli a3, a2, 24
-; RV32-NEXT: sb a3, 7(a0)
-; RV32-NEXT: srli a3, a2, 16
-; RV32-NEXT: sb a3, 6(a0)
-; RV32-NEXT: srli a2, a2, 8
-; RV32-NEXT: sb a2, 5(a0)
+; RV32-NEXT: sw a3, 0(a0)
+; RV32-NEXT: sw a2, 4(a0)
; RV32-NEXT: addi a0, a0, 8
; RV32-NEXT: slli a2, a1, 18
; RV32-NEXT: bgez a2, .LBB29_14
@@ -17053,20 +15009,8 @@ define void @test_compresstore_i64_v32(ptr %p, <32 x i1> %mask, <32 x i64> %data
; RV32-NEXT: vsrl.vx v0, v24, a2
; RV32-NEXT: vmv.x.s a2, v0
; RV32-NEXT: vmv.x.s a3, v24
-; RV32-NEXT: sb a3, 0(a0)
-; RV32-NEXT: sb a2, 4(a0)
-; RV32-NEXT: srli a4, a3, 24
-; RV32-NEXT: sb a4, 3(a0)
-; RV32-NEXT: srli a4, a3, 16
-; RV32-NEXT: sb a4, 2(a0)
-; RV32-NEXT: srli a3, a3, 8
-; RV32-NEXT: sb a3, 1(a0)
-; RV32-NEXT: srli a3, a2, 24
-; RV32-NEXT: sb a3, 7(a0)
-; RV32-NEXT: srli a3, a2, 16
-; RV32-NEXT: sb a3, 6(a0)
-; RV32-NEXT: srli a2, a2, 8
-; RV32-NEXT: sb a2, 5(a0)
+; RV32-NEXT: sw a3, 0(a0)
+; RV32-NEXT: sw a2, 4(a0)
; RV32-NEXT: addi a0, a0, 8
; RV32-NEXT: slli a2, a1, 17
; RV32-NEXT: bgez a2, .LBB29_15
@@ -17077,20 +15021,8 @@ define void @test_compresstore_i64_v32(ptr %p, <32 x i1> %mask, <32 x i64> %data
; RV32-NEXT: vsrl.vx v0, v24, a2
; RV32-NEXT: vmv.x.s a2, v0
; RV32-NEXT: vmv.x.s a3, v24
-; RV32-NEXT: sb a3, 0(a0)
-; RV32-NEXT: sb a2, 4(a0)
-; RV32-NEXT: srli a4, a3, 24
-; RV32-NEXT: sb a4, 3(a0)
-; RV32-NEXT: srli a4, a3, 16
-; RV32-NEXT: sb a4, 2(a0)
-; RV32-NEXT: srli a3, a3, 8
-; RV32-NEXT: sb a3, 1(a0)
-; RV32-NEXT: srli a3, a2, 24
-; RV32-NEXT: sb a3, 7(a0)
-; RV32-NEXT: srli a3, a2, 16
-; RV32-NEXT: sb a3, 6(a0)
-; RV32-NEXT: srli a2, a2, 8
-; RV32-NEXT: sb a2, 5(a0)
+; RV32-NEXT: sw a3, 0(a0)
+; RV32-NEXT: sw a2, 4(a0)
; RV32-NEXT: addi a0, a0, 8
; RV32-NEXT: slli a2, a1, 16
; RV32-NEXT: bgez a2, .LBB29_16
@@ -17101,20 +15033,8 @@ define void @test_compresstore_i64_v32(ptr %p, <32 x i1> %mask, <32 x i64> %data
; RV32-NEXT: vsrl.vx v24, v8, a2
; RV32-NEXT: vmv.x.s a2, v24
; RV32-NEXT: vmv.x.s a3, v8
-; RV32-NEXT: sb a3, 0(a0)
-; RV32-NEXT: sb a2, 4(a0)
-; RV32-NEXT: srli a4, a3, 24
-; RV32-NEXT: sb a4, 3(a0)
-; RV32-NEXT: srli a4, a3, 16
-; RV32-NEXT: sb a4, 2(a0)
-; RV32-NEXT: srli a3, a3, 8
-; RV32-NEXT: sb a3, 1(a0)
-; RV32-NEXT: srli a3, a2, 24
-; RV32-NEXT: sb a3, 7(a0)
-; RV32-NEXT: srli a3, a2, 16
-; RV32-NEXT: sb a3, 6(a0)
-; RV32-NEXT: srli a2, a2, 8
-; RV32-NEXT: sb a2, 5(a0)
+; RV32-NEXT: sw a3, 0(a0)
+; RV32-NEXT: sw a2, 4(a0)
; RV32-NEXT: addi a0, a0, 8
; RV32-NEXT: slli a2, a1, 15
; RV32-NEXT: bgez a2, .LBB29_17
@@ -17124,20 +15044,8 @@ define void @test_compresstore_i64_v32(ptr %p, <32 x i1> %mask, <32 x i64> %data
; RV32-NEXT: vsrl.vx v8, v16, a2
; RV32-NEXT: vmv.x.s a2, v8
; RV32-NEXT: vmv.x.s a3, v16
-; RV32-NEXT: sb a3, 0(a0)
-; RV32-NEXT: sb a2, 4(a0)
-; RV32-NEXT: srli a4, a3, 24
-; RV32-NEXT: sb a4, 3(a0)
-; RV32-NEXT: srli a4, a3, 16
-; RV32-NEXT: sb a4, 2(a0)
-; RV32-NEXT: srli a3, a3, 8
-; RV32-NEXT: sb a3, 1(a0)
-; RV32-NEXT: srli a3, a2, 24
-; RV32-NEXT: sb a3, 7(a0)
-; RV32-NEXT: srli a3, a2, 16
-; RV32-NEXT: sb a3, 6(a0)
-; RV32-NEXT: srli a2, a2, 8
-; RV32-NEXT: sb a2, 5(a0)
+; RV32-NEXT: sw a3, 0(a0)
+; RV32-NEXT: sw a2, 4(a0)
; RV32-NEXT: addi a0, a0, 8
; RV32-NEXT: slli a2, a1, 14
; RV32-NEXT: bgez a2, .LBB29_18
@@ -17148,20 +15056,8 @@ define void @test_compresstore_i64_v32(ptr %p, <32 x i1> %mask, <32 x i64> %data
; RV32-NEXT: vsrl.vx v24, v8, a2
; RV32-NEXT: vmv.x.s a2, v24
; RV32-NEXT: vmv.x.s a3, v8
-; RV32-NEXT: sb a3, 0(a0)
-; RV32-NEXT: sb a2, 4(a0)
-; RV32-NEXT: srli a4, a3, 24
-; RV32-NEXT: sb a4, 3(a0)
-; RV32-NEXT: srli a4, a3, 16
-; RV32-NEXT: sb a4, 2(a0)
-; RV32-NEXT: srli a3, a3, 8
-; RV32-NEXT: sb a3, 1(a0)
-; RV32-NEXT: srli a3, a2, 24
-; RV32-NEXT: sb a3, 7(a0)
-; RV32-NEXT: srli a3, a2, 16
-; RV32-NEXT: sb a3, 6(a0)
-; RV32-NEXT: srli a2, a2, 8
-; RV32-NEXT: sb a2, 5(a0)
+; RV32-NEXT: sw a3, 0(a0)
+; RV32-NEXT: sw a2, 4(a0)
; RV32-NEXT: addi a0, a0, 8
; RV32-NEXT: slli a2, a1, 13
; RV32-NEXT: bgez a2, .LBB29_19
@@ -17172,20 +15068,8 @@ define void @test_compresstore_i64_v32(ptr %p, <32 x i1> %mask, <32 x i64> %data
; RV32-NEXT: vsrl.vx v24, v8, a2
; RV32-NEXT: vmv.x.s a2, v24
; RV32-NEXT: vmv.x.s a3, v8
-; RV32-NEXT: sb a3, 0(a0)
-; RV32-NEXT: sb a2, 4(a0)
-; RV32-NEXT: srli a4, a3, 24
-; RV32-NEXT: sb a4, 3(a0)
-; RV32-NEXT: srli a4, a3, 16
-; RV32-NEXT: sb a4, 2(a0)
-; RV32-NEXT: srli a3, a3, 8
-; RV32-NEXT: sb a3, 1(a0)
-; RV32-NEXT: srli a3, a2, 24
-; RV32-NEXT: sb a3, 7(a0)
-; RV32-NEXT: srli a3, a2, 16
-; RV32-NEXT: sb a3, 6(a0)
-; RV32-NEXT: srli a2, a2, 8
-; RV32-NEXT: sb a2, 5(a0)
+; RV32-NEXT: sw a3, 0(a0)
+; RV32-NEXT: sw a2, 4(a0)
; RV32-NEXT: addi a0, a0, 8
; RV32-NEXT: slli a2, a1, 12
; RV32-NEXT: bgez a2, .LBB29_20
@@ -17196,20 +15080,8 @@ define void @test_compresstore_i64_v32(ptr %p, <32 x i1> %mask, <32 x i64> %data
; RV32-NEXT: vsrl.vx v24, v8, a2
; RV32-NEXT: vmv.x.s a2, v24
; RV32-NEXT: vmv.x.s a3, v8
-; RV32-NEXT: sb a3, 0(a0)
-; RV32-NEXT: sb a2, 4(a0)
-; RV32-NEXT: srli a4, a3, 24
-; RV32-NEXT: sb a4, 3(a0)
-; RV32-NEXT: srli a4, a3, 16
-; RV32-NEXT: sb a4, 2(a0)
-; RV32-NEXT: srli a3, a3, 8
-; RV32-NEXT: sb a3, 1(a0)
-; RV32-NEXT: srli a3, a2, 24
-; RV32-NEXT: sb a3, 7(a0)
-; RV32-NEXT: srli a3, a2, 16
-; RV32-NEXT: sb a3, 6(a0)
-; RV32-NEXT: srli a2, a2, 8
-; RV32-NEXT: sb a2, 5(a0)
+; RV32-NEXT: sw a3, 0(a0)
+; RV32-NEXT: sw a2, 4(a0)
; RV32-NEXT: addi a0, a0, 8
; RV32-NEXT: slli a2, a1, 11
; RV32-NEXT: bgez a2, .LBB29_21
@@ -17220,20 +15092,8 @@ define void @test_compresstore_i64_v32(ptr %p, <32 x i1> %mask, <32 x i64> %data
; RV32-NEXT: vsrl.vx v24, v8, a2
; RV32-NEXT: vmv.x.s a2, v24
; RV32-NEXT: vmv.x.s a3, v8
-; RV32-NEXT: sb a3, 0(a0)
-; RV32-NEXT: sb a2, 4(a0)
-; RV32-NEXT: srli a4, a3, 24
-; RV32-NEXT: sb a4, 3(a0)
-; RV32-NEXT: srli a4, a3, 16
-; RV32-NEXT: sb a4, 2(a0)
-; RV32-NEXT: srli a3, a3, 8
-; RV32-NEXT: sb a3, 1(a0)
-; RV32-NEXT: srli a3, a2, 24
-; RV32-NEXT: sb a3, 7(a0)
-; RV32-NEXT: srli a3, a2, 16
-; RV32-NEXT: sb a3, 6(a0)
-; RV32-NEXT: srli a2, a2, 8
-; RV32-NEXT: sb a2, 5(a0)
+; RV32-NEXT: sw a3, 0(a0)
+; RV32-NEXT: sw a2, 4(a0)
; RV32-NEXT: addi a0, a0, 8
; RV32-NEXT: slli a2, a1, 10
; RV32-NEXT: bgez a2, .LBB29_22
@@ -17244,20 +15104,8 @@ define void @test_compresstore_i64_v32(ptr %p, <32 x i1> %mask, <32 x i64> %data
; RV32-NEXT: vsrl.vx v24, v8, a2
; RV32-NEXT: vmv.x.s a2, v24
; RV32-NEXT: vmv.x.s a3, v8
-; RV32-NEXT: sb a3, 0(a0)
-; RV32-NEXT: sb a2, 4(a0)
-; RV32-NEXT: srli a4, a3, 24
-; RV32-NEXT: sb a4, 3(a0)
-; RV32-NEXT: srli a4, a3, 16
-; RV32-NEXT: sb a4, 2(a0)
-; RV32-NEXT: srli a3, a3, 8
-; RV32-NEXT: sb a3, 1(a0)
-; RV32-NEXT: srli a3, a2, 24
-; RV32-NEXT: sb a3, 7(a0)
-; RV32-NEXT: srli a3, a2, 16
-; RV32-NEXT: sb a3, 6(a0)
-; RV32-NEXT: srli a2, a2, 8
-; RV32-NEXT: sb a2, 5(a0)
+; RV32-NEXT: sw a3, 0(a0)
+; RV32-NEXT: sw a2, 4(a0)
; RV32-NEXT: addi a0, a0, 8
; RV32-NEXT: slli a2, a1, 9
; RV32-NEXT: bgez a2, .LBB29_23
@@ -17268,20 +15116,8 @@ define void @test_compresstore_i64_v32(ptr %p, <32 x i1> %mask, <32 x i64> %data
; RV32-NEXT: vsrl.vx v24, v8, a2
; RV32-NEXT: vmv.x.s a2, v24
; RV32-NEXT: vmv.x.s a3, v8
-; RV32-NEXT: sb a3, 0(a0)
-; RV32-NEXT: sb a2, 4(a0)
-; RV32-NEXT: srli a4, a3, 24
-; RV32-NEXT: sb a4, 3(a0)
-; RV32-NEXT: srli a4, a3, 16
-; RV32-NEXT: sb a4, 2(a0)
-; RV32-NEXT: srli a3, a3, 8
-; RV32-NEXT: sb a3, 1(a0)
-; RV32-NEXT: srli a3, a2, 24
-; RV32-NEXT: sb a3, 7(a0)
-; RV32-NEXT: srli a3, a2, 16
-; RV32-NEXT: sb a3, 6(a0)
-; RV32-NEXT: srli a2, a2, 8
-; RV32-NEXT: sb a2, 5(a0)
+; RV32-NEXT: sw a3, 0(a0)
+; RV32-NEXT: sw a2, 4(a0)
; RV32-NEXT: addi a0, a0, 8
; RV32-NEXT: slli a2, a1, 8
; RV32-NEXT: bgez a2, .LBB29_24
@@ -17292,20 +15128,8 @@ define void @test_compresstore_i64_v32(ptr %p, <32 x i1> %mask, <32 x i64> %data
; RV32-NEXT: vsrl.vx v24, v8, a2
; RV32-NEXT: vmv.x.s a2, v24
; RV32-NEXT: vmv.x.s a3, v8
-; RV32-NEXT: sb a3, 0(a0)
-; RV32-NEXT: sb a2, 4(a0)
-; RV32-NEXT: srli a4, a3, 24
-; RV32-NEXT: sb a4, 3(a0)
-; RV32-NEXT: srli a4, a3, 16
-; RV32-NEXT: sb a4, 2(a0)
-; RV32-NEXT: srli a3, a3, 8
-; RV32-NEXT: sb a3, 1(a0)
-; RV32-NEXT: srli a3, a2, 24
-; RV32-NEXT: sb a3, 7(a0)
-; RV32-NEXT: srli a3, a2, 16
-; RV32-NEXT: sb a3, 6(a0)
-; RV32-NEXT: srli a2, a2, 8
-; RV32-NEXT: sb a2, 5(a0)
+; RV32-NEXT: sw a3, 0(a0)
+; RV32-NEXT: sw a2, 4(a0)
; RV32-NEXT: addi a0, a0, 8
; RV32-NEXT: slli a2, a1, 7
; RV32-NEXT: bgez a2, .LBB29_25
@@ -17316,20 +15140,8 @@ define void @test_compresstore_i64_v32(ptr %p, <32 x i1> %mask, <32 x i64> %data
; RV32-NEXT: vsrl.vx v24, v8, a2
; RV32-NEXT: vmv.x.s a2, v24
; RV32-NEXT: vmv.x.s a3, v8
-; RV32-NEXT: sb a3, 0(a0)
-; RV32-NEXT: sb a2, 4(a0)
-; RV32-NEXT: srli a4, a3, 24
-; RV32-NEXT: sb a4, 3(a0)
-; RV32-NEXT: srli a4, a3, 16
-; RV32-NEXT: sb a4, 2(a0)
-; RV32-NEXT: srli a3, a3, 8
-; RV32-NEXT: sb a3, 1(a0)
-; RV32-NEXT: srli a3, a2, 24
-; RV32-NEXT: sb a3, 7(a0)
-; RV32-NEXT: srli a3, a2, 16
-; RV32-NEXT: sb a3, 6(a0)
-; RV32-NEXT: srli a2, a2, 8
-; RV32-NEXT: sb a2, 5(a0)
+; RV32-NEXT: sw a3, 0(a0)
+; RV32-NEXT: sw a2, 4(a0)
; RV32-NEXT: addi a0, a0, 8
; RV32-NEXT: slli a2, a1, 6
; RV32-NEXT: bgez a2, .LBB29_26
@@ -17340,20 +15152,8 @@ define void @test_compresstore_i64_v32(ptr %p, <32 x i1> %mask, <32 x i64> %data
; RV32-NEXT: vsrl.vx v24, v8, a2
; RV32-NEXT: vmv.x.s a2, v24
; RV32-NEXT: vmv.x.s a3, v8
-; RV32-NEXT: sb a3, 0(a0)
-; RV32-NEXT: sb a2, 4(a0)
-; RV32-NEXT: srli a4, a3, 24
-; RV32-NEXT: sb a4, 3(a0)
-; RV32-NEXT: srli a4, a3, 16
-; RV32-NEXT: sb a4, 2(a0)
-; RV32-NEXT: srli a3, a3, 8
-; RV32-NEXT: sb a3, 1(a0)
-; RV32-NEXT: srli a3, a2, 24
-; RV32-NEXT: sb a3, 7(a0)
-; RV32-NEXT: srli a3, a2, 16
-; RV32-NEXT: sb a3, 6(a0)
-; RV32-NEXT: srli a2, a2, 8
-; RV32-NEXT: sb a2, 5(a0)
+; RV32-NEXT: sw a3, 0(a0)
+; RV32-NEXT: sw a2, 4(a0)
; RV32-NEXT: addi a0, a0, 8
; RV32-NEXT: slli a2, a1, 5
; RV32-NEXT: bgez a2, .LBB29_27
@@ -17364,20 +15164,8 @@ define void @test_compresstore_i64_v32(ptr %p, <32 x i1> %mask, <32 x i64> %data
; RV32-NEXT: vsrl.vx v24, v8, a2
; RV32-NEXT: vmv.x.s a2, v24
; RV32-NEXT: vmv.x.s a3, v8
-; RV32-NEXT: sb a3, 0(a0)
-; RV32-NEXT: sb a2, 4(a0)
-; RV32-NEXT: srli a4, a3, 24
-; RV32-NEXT: sb a4, 3(a0)
-; RV32-NEXT: srli a4, a3, 16
-; RV32-NEXT: sb a4, 2(a0)
-; RV32-NEXT: srli a3, a3, 8
-; RV32-NEXT: sb a3, 1(a0)
-; RV32-NEXT: srli a3, a2, 24
-; RV32-NEXT: sb a3, 7(a0)
-; RV32-NEXT: srli a3, a2, 16
-; RV32-NEXT: sb a3, 6(a0)
-; RV32-NEXT: srli a2, a2, 8
-; RV32-NEXT: sb a2, 5(a0)
+; RV32-NEXT: sw a3, 0(a0)
+; RV32-NEXT: sw a2, 4(a0)
; RV32-NEXT: addi a0, a0, 8
; RV32-NEXT: slli a2, a1, 4
; RV32-NEXT: bgez a2, .LBB29_28
@@ -17388,20 +15176,8 @@ define void @test_compresstore_i64_v32(ptr %p, <32 x i1> %mask, <32 x i64> %data
; RV32-NEXT: vsrl.vx v24, v8, a2
; RV32-NEXT: vmv.x.s a2, v24
; RV32-NEXT: vmv.x.s a3, v8
-; RV32-NEXT: sb a3, 0(a0)
-; RV32-NEXT: sb a2, 4(a0)
-; RV32-NEXT: srli a4, a3, 24
-; RV32-NEXT: sb a4, 3(a0)
-; RV32-NEXT: srli a4, a3, 16
-; RV32-NEXT: sb a4, 2(a0)
-; RV32-NEXT: srli a3, a3, 8
-; RV32-NEXT: sb a3, 1(a0)
-; RV32-NEXT: srli a3, a2, 24
-; RV32-NEXT: sb a3, 7(a0)
-; RV32-NEXT: srli a3, a2, 16
-; RV32-NEXT: sb a3, 6(a0)
-; RV32-NEXT: srli a2, a2, 8
-; RV32-NEXT: sb a2, 5(a0)
+; RV32-NEXT: sw a3, 0(a0)
+; RV32-NEXT: sw a2, 4(a0)
; RV32-NEXT: addi a0, a0, 8
; RV32-NEXT: slli a2, a1, 3
; RV32-NEXT: bgez a2, .LBB29_29
@@ -17412,20 +15188,8 @@ define void @test_compresstore_i64_v32(ptr %p, <32 x i1> %mask, <32 x i64> %data
; RV32-NEXT: vsrl.vx v24, v8, a2
; RV32-NEXT: vmv.x.s a2, v24
; RV32-NEXT: vmv.x.s a3, v8
-; RV32-NEXT: sb a3, 0(a0)
-; RV32-NEXT: sb a2, 4(a0)
-; RV32-NEXT: srli a4, a3, 24
-; RV32-NEXT: sb a4, 3(a0)
-; RV32-NEXT: srli a4, a3, 16
-; RV32-NEXT: sb a4, 2(a0)
-; RV32-NEXT: srli a3, a3, 8
-; RV32-NEXT: sb a3, 1(a0)
-; RV32-NEXT: srli a3, a2, 24
-; RV32-NEXT: sb a3, 7(a0)
-; RV32-NEXT: srli a3, a2, 16
-; RV32-NEXT: sb a3, 6(a0)
-; RV32-NEXT: srli a2, a2, 8
-; RV32-NEXT: sb a2, 5(a0)
+; RV32-NEXT: sw a3, 0(a0)
+; RV32-NEXT: sw a2, 4(a0)
; RV32-NEXT: addi a0, a0, 8
; RV32-NEXT: slli a2, a1, 2
; RV32-NEXT: bgez a2, .LBB29_30
@@ -17436,20 +15200,8 @@ define void @test_compresstore_i64_v32(ptr %p, <32 x i1> %mask, <32 x i64> %data
; RV32-NEXT: vsrl.vx v24, v8, a2
; RV32-NEXT: vmv.x.s a2, v24
; RV32-NEXT: vmv.x.s a3, v8
-; RV32-NEXT: sb a3, 0(a0)
-; RV32-NEXT: sb a2, 4(a0)
-; RV32-NEXT: srli a4, a3, 24
-; RV32-NEXT: sb a4, 3(a0)
-; RV32-NEXT: srli a4, a3, 16
-; RV32-NEXT: sb a4, 2(a0)
-; RV32-NEXT: srli a3, a3, 8
-; RV32-NEXT: sb a3, 1(a0)
-; RV32-NEXT: srli a3, a2, 24
-; RV32-NEXT: sb a3, 7(a0)
-; RV32-NEXT: srli a3, a2, 16
-; RV32-NEXT: sb a3, 6(a0)
-; RV32-NEXT: srli a2, a2, 8
-; RV32-NEXT: sb a2, 5(a0)
+; RV32-NEXT: sw a3, 0(a0)
+; RV32-NEXT: sw a2, 4(a0)
; RV32-NEXT: addi a0, a0, 8
; RV32-NEXT: slli a2, a1, 1
; RV32-NEXT: bgez a2, .LBB29_31
@@ -17460,20 +15212,8 @@ define void @test_compresstore_i64_v32(ptr %p, <32 x i1> %mask, <32 x i64> %data
; RV32-NEXT: vsrl.vx v24, v8, a2
; RV32-NEXT: vmv.x.s a2, v24
; RV32-NEXT: vmv.x.s a3, v8
-; RV32-NEXT: sb a3, 0(a0)
-; RV32-NEXT: sb a2, 4(a0)
-; RV32-NEXT: srli a4, a3, 24
-; RV32-NEXT: sb a4, 3(a0)
-; RV32-NEXT: srli a4, a3, 16
-; RV32-NEXT: sb a4, 2(a0)
-; RV32-NEXT: srli a3, a3, 8
-; RV32-NEXT: sb a3, 1(a0)
-; RV32-NEXT: srli a3, a2, 24
-; RV32-NEXT: sb a3, 7(a0)
-; RV32-NEXT: srli a3, a2, 16
-; RV32-NEXT: sb a3, 6(a0)
-; RV32-NEXT: srli a2, a2, 8
-; RV32-NEXT: sb a2, 5(a0)
+; RV32-NEXT: sw a3, 0(a0)
+; RV32-NEXT: sw a2, 4(a0)
; RV32-NEXT: addi a0, a0, 8
; RV32-NEXT: bgez a1, .LBB29_32
; RV32-NEXT: .LBB29_64: # %cond.store91
@@ -17483,20 +15223,8 @@ define void @test_compresstore_i64_v32(ptr %p, <32 x i1> %mask, <32 x i64> %data
; RV32-NEXT: vsrl.vx v16, v8, a1
; RV32-NEXT: vmv.x.s a1, v16
; RV32-NEXT: vmv.x.s a2, v8
-; RV32-NEXT: sb a2, 0(a0)
-; RV32-NEXT: sb a1, 4(a0)
-; RV32-NEXT: srli a3, a2, 24
-; RV32-NEXT: sb a3, 3(a0)
-; RV32-NEXT: srli a3, a2, 16
-; RV32-NEXT: sb a3, 2(a0)
-; RV32-NEXT: srli a2, a2, 8
-; RV32-NEXT: sb a2, 1(a0)
-; RV32-NEXT: srli a2, a1, 24
-; RV32-NEXT: sb a2, 7(a0)
-; RV32-NEXT: srli a2, a1, 16
-; RV32-NEXT: sb a2, 6(a0)
-; RV32-NEXT: srli a1, a1, 8
-; RV32-NEXT: sb a1, 5(a0)
+; RV32-NEXT: sw a2, 0(a0)
+; RV32-NEXT: sw a1, 4(a0)
; RV32-NEXT: ret
entry:
tail call void @llvm.masked.compressstore.v32i64(<32 x i64> %data, ptr align 8 %p, <32 x i1> %mask)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-compressstore-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-compressstore-fp.ll
index 52c52921e7e1d2..36fbdd8e0664fd 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-compressstore-fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-compressstore-fp.ll
@@ -6,24 +6,20 @@ declare void @llvm.masked.compressstore.v1f16(<1 x half>, ptr, <1 x i1>)
define void @compressstore_v1f16(ptr %base, <1 x half> %v, <1 x i1> %mask) {
; RV32-LABEL: compressstore_v1f16:
; RV32: # %bb.0:
-; RV32-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
-; RV32-NEXT: vfirst.m a1, v0
-; RV32-NEXT: bnez a1, .LBB0_2
-; RV32-NEXT: # %bb.1: # %cond.store
; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; RV32-NEXT: vse16.v v8, (a0)
-; RV32-NEXT: .LBB0_2: # %else
+; RV32-NEXT: vcompress.vm v9, v8, v0
+; RV32-NEXT: vcpop.m a1, v0
+; RV32-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; RV32-NEXT: vse16.v v9, (a0)
; RV32-NEXT: ret
;
; RV64-LABEL: compressstore_v1f16:
; RV64: # %bb.0:
-; RV64-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
-; RV64-NEXT: vfirst.m a1, v0
-; RV64-NEXT: bnez a1, .LBB0_2
-; RV64-NEXT: # %bb.1: # %cond.store
; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; RV64-NEXT: vse16.v v8, (a0)
-; RV64-NEXT: .LBB0_2: # %else
+; RV64-NEXT: vcompress.vm v9, v8, v0
+; RV64-NEXT: vcpop.m a1, v0
+; RV64-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; RV64-NEXT: vse16.v v9, (a0)
; RV64-NEXT: ret
call void @llvm.masked.compressstore.v1f16(<1 x half> %v, ptr align 2 %base, <1 x i1> %mask)
ret void
@@ -33,48 +29,20 @@ declare void @llvm.masked.compressstore.v2f16(<2 x half>, ptr, <2 x i1>)
define void @compressstore_v2f16(ptr %base, <2 x half> %v, <2 x i1> %mask) {
; RV32-LABEL: compressstore_v2f16:
; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV32-NEXT: vmv.x.s a1, v0
-; RV32-NEXT: andi a2, a1, 1
-; RV32-NEXT: bnez a2, .LBB1_3
-; RV32-NEXT: # %bb.1: # %else
-; RV32-NEXT: andi a1, a1, 2
-; RV32-NEXT: bnez a1, .LBB1_4
-; RV32-NEXT: .LBB1_2: # %else2
-; RV32-NEXT: ret
-; RV32-NEXT: .LBB1_3: # %cond.store
-; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; RV32-NEXT: vse16.v v8, (a0)
-; RV32-NEXT: addi a0, a0, 2
-; RV32-NEXT: andi a1, a1, 2
-; RV32-NEXT: beqz a1, .LBB1_2
-; RV32-NEXT: .LBB1_4: # %cond.store1
-; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v8, 1
-; RV32-NEXT: vse16.v v8, (a0)
+; RV32-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; RV32-NEXT: vcompress.vm v9, v8, v0
+; RV32-NEXT: vcpop.m a1, v0
+; RV32-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; RV32-NEXT: vse16.v v9, (a0)
; RV32-NEXT: ret
;
; RV64-LABEL: compressstore_v2f16:
; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64-NEXT: vmv.x.s a1, v0
-; RV64-NEXT: andi a2, a1, 1
-; RV64-NEXT: bnez a2, .LBB1_3
-; RV64-NEXT: # %bb.1: # %else
-; RV64-NEXT: andi a1, a1, 2
-; RV64-NEXT: bnez a1, .LBB1_4
-; RV64-NEXT: .LBB1_2: # %else2
-; RV64-NEXT: ret
-; RV64-NEXT: .LBB1_3: # %cond.store
-; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; RV64-NEXT: vse16.v v8, (a0)
-; RV64-NEXT: addi a0, a0, 2
-; RV64-NEXT: andi a1, a1, 2
-; RV64-NEXT: beqz a1, .LBB1_2
-; RV64-NEXT: .LBB1_4: # %cond.store1
-; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; RV64-NEXT: vslidedown.vi v8, v8, 1
-; RV64-NEXT: vse16.v v8, (a0)
+; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; RV64-NEXT: vcompress.vm v9, v8, v0
+; RV64-NEXT: vcpop.m a1, v0
+; RV64-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; RV64-NEXT: vse16.v v9, (a0)
; RV64-NEXT: ret
call void @llvm.masked.compressstore.v2f16(<2 x half> %v, ptr align 2 %base, <2 x i1> %mask)
ret void
@@ -84,88 +52,20 @@ declare void @llvm.masked.compressstore.v4f16(<4 x half>, ptr, <4 x i1>)
define void @compressstore_v4f16(ptr %base, <4 x half> %v, <4 x i1> %mask) {
; RV32-LABEL: compressstore_v4f16:
; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV32-NEXT: vmv.x.s a1, v0
-; RV32-NEXT: andi a2, a1, 1
-; RV32-NEXT: bnez a2, .LBB2_5
-; RV32-NEXT: # %bb.1: # %else
-; RV32-NEXT: andi a2, a1, 2
-; RV32-NEXT: bnez a2, .LBB2_6
-; RV32-NEXT: .LBB2_2: # %else2
-; RV32-NEXT: andi a2, a1, 4
-; RV32-NEXT: bnez a2, .LBB2_7
-; RV32-NEXT: .LBB2_3: # %else5
-; RV32-NEXT: andi a1, a1, 8
-; RV32-NEXT: bnez a1, .LBB2_8
-; RV32-NEXT: .LBB2_4: # %else8
-; RV32-NEXT: ret
-; RV32-NEXT: .LBB2_5: # %cond.store
-; RV32-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; RV32-NEXT: vse16.v v8, (a0)
-; RV32-NEXT: addi a0, a0, 2
-; RV32-NEXT: andi a2, a1, 2
-; RV32-NEXT: beqz a2, .LBB2_2
-; RV32-NEXT: .LBB2_6: # %cond.store1
-; RV32-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v8, 1
-; RV32-NEXT: vse16.v v9, (a0)
-; RV32-NEXT: addi a0, a0, 2
-; RV32-NEXT: andi a2, a1, 4
-; RV32-NEXT: beqz a2, .LBB2_3
-; RV32-NEXT: .LBB2_7: # %cond.store4
-; RV32-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v8, 2
+; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; RV32-NEXT: vcompress.vm v9, v8, v0
+; RV32-NEXT: vcpop.m a1, v0
+; RV32-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
; RV32-NEXT: vse16.v v9, (a0)
-; RV32-NEXT: addi a0, a0, 2
-; RV32-NEXT: andi a1, a1, 8
-; RV32-NEXT: beqz a1, .LBB2_4
-; RV32-NEXT: .LBB2_8: # %cond.store7
-; RV32-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v8, 3
-; RV32-NEXT: vse16.v v8, (a0)
; RV32-NEXT: ret
;
; RV64-LABEL: compressstore_v4f16:
; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64-NEXT: vmv.x.s a1, v0
-; RV64-NEXT: andi a2, a1, 1
-; RV64-NEXT: bnez a2, .LBB2_5
-; RV64-NEXT: # %bb.1: # %else
-; RV64-NEXT: andi a2, a1, 2
-; RV64-NEXT: bnez a2, .LBB2_6
-; RV64-NEXT: .LBB2_2: # %else2
-; RV64-NEXT: andi a2, a1, 4
-; RV64-NEXT: bnez a2, .LBB2_7
-; RV64-NEXT: .LBB2_3: # %else5
-; RV64-NEXT: andi a1, a1, 8
-; RV64-NEXT: bnez a1, .LBB2_8
-; RV64-NEXT: .LBB2_4: # %else8
-; RV64-NEXT: ret
-; RV64-NEXT: .LBB2_5: # %cond.store
-; RV64-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; RV64-NEXT: vse16.v v8, (a0)
-; RV64-NEXT: addi a0, a0, 2
-; RV64-NEXT: andi a2, a1, 2
-; RV64-NEXT: beqz a2, .LBB2_2
-; RV64-NEXT: .LBB2_6: # %cond.store1
-; RV64-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; RV64-NEXT: vslidedown.vi v9, v8, 1
-; RV64-NEXT: vse16.v v9, (a0)
-; RV64-NEXT: addi a0, a0, 2
-; RV64-NEXT: andi a2, a1, 4
-; RV64-NEXT: beqz a2, .LBB2_3
-; RV64-NEXT: .LBB2_7: # %cond.store4
-; RV64-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; RV64-NEXT: vslidedown.vi v9, v8, 2
+; RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; RV64-NEXT: vcompress.vm v9, v8, v0
+; RV64-NEXT: vcpop.m a1, v0
+; RV64-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
; RV64-NEXT: vse16.v v9, (a0)
-; RV64-NEXT: addi a0, a0, 2
-; RV64-NEXT: andi a1, a1, 8
-; RV64-NEXT: beqz a1, .LBB2_4
-; RV64-NEXT: .LBB2_8: # %cond.store7
-; RV64-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; RV64-NEXT: vslidedown.vi v8, v8, 3
-; RV64-NEXT: vse16.v v8, (a0)
; RV64-NEXT: ret
call void @llvm.masked.compressstore.v4f16(<4 x half> %v, ptr align 2 %base, <4 x i1> %mask)
ret void
@@ -175,168 +75,20 @@ declare void @llvm.masked.compressstore.v8f16(<8 x half>, ptr, <8 x i1>)
define void @compressstore_v8f16(ptr %base, <8 x half> %v, <8 x i1> %mask) {
; RV32-LABEL: compressstore_v8f16:
; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV32-NEXT: vmv.x.s a1, v0
-; RV32-NEXT: andi a2, a1, 1
-; RV32-NEXT: bnez a2, .LBB3_9
-; RV32-NEXT: # %bb.1: # %else
-; RV32-NEXT: andi a2, a1, 2
-; RV32-NEXT: bnez a2, .LBB3_10
-; RV32-NEXT: .LBB3_2: # %else2
-; RV32-NEXT: andi a2, a1, 4
-; RV32-NEXT: bnez a2, .LBB3_11
-; RV32-NEXT: .LBB3_3: # %else5
-; RV32-NEXT: andi a2, a1, 8
-; RV32-NEXT: bnez a2, .LBB3_12
-; RV32-NEXT: .LBB3_4: # %else8
-; RV32-NEXT: andi a2, a1, 16
-; RV32-NEXT: bnez a2, .LBB3_13
-; RV32-NEXT: .LBB3_5: # %else11
-; RV32-NEXT: andi a2, a1, 32
-; RV32-NEXT: bnez a2, .LBB3_14
-; RV32-NEXT: .LBB3_6: # %else14
-; RV32-NEXT: andi a2, a1, 64
-; RV32-NEXT: bnez a2, .LBB3_15
-; RV32-NEXT: .LBB3_7: # %else17
-; RV32-NEXT: andi a1, a1, -128
-; RV32-NEXT: bnez a1, .LBB3_16
-; RV32-NEXT: .LBB3_8: # %else20
-; RV32-NEXT: ret
-; RV32-NEXT: .LBB3_9: # %cond.store
-; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32-NEXT: vse16.v v8, (a0)
-; RV32-NEXT: addi a0, a0, 2
-; RV32-NEXT: andi a2, a1, 2
-; RV32-NEXT: beqz a2, .LBB3_2
-; RV32-NEXT: .LBB3_10: # %cond.store1
-; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v8, 1
-; RV32-NEXT: vse16.v v9, (a0)
-; RV32-NEXT: addi a0, a0, 2
-; RV32-NEXT: andi a2, a1, 4
-; RV32-NEXT: beqz a2, .LBB3_3
-; RV32-NEXT: .LBB3_11: # %cond.store4
-; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v8, 2
-; RV32-NEXT: vse16.v v9, (a0)
-; RV32-NEXT: addi a0, a0, 2
-; RV32-NEXT: andi a2, a1, 8
-; RV32-NEXT: beqz a2, .LBB3_4
-; RV32-NEXT: .LBB3_12: # %cond.store7
-; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v8, 3
-; RV32-NEXT: vse16.v v9, (a0)
-; RV32-NEXT: addi a0, a0, 2
-; RV32-NEXT: andi a2, a1, 16
-; RV32-NEXT: beqz a2, .LBB3_5
-; RV32-NEXT: .LBB3_13: # %cond.store10
-; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v8, 4
+; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV32-NEXT: vcompress.vm v9, v8, v0
+; RV32-NEXT: vcpop.m a1, v0
+; RV32-NEXT: vsetvli zero, a1, e16, m1, ta, ma
; RV32-NEXT: vse16.v v9, (a0)
-; RV32-NEXT: addi a0, a0, 2
-; RV32-NEXT: andi a2, a1, 32
-; RV32-NEXT: beqz a2, .LBB3_6
-; RV32-NEXT: .LBB3_14: # %cond.store13
-; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v8, 5
-; RV32-NEXT: vse16.v v9, (a0)
-; RV32-NEXT: addi a0, a0, 2
-; RV32-NEXT: andi a2, a1, 64
-; RV32-NEXT: beqz a2, .LBB3_7
-; RV32-NEXT: .LBB3_15: # %cond.store16
-; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v8, 6
-; RV32-NEXT: vse16.v v9, (a0)
-; RV32-NEXT: addi a0, a0, 2
-; RV32-NEXT: andi a1, a1, -128
-; RV32-NEXT: beqz a1, .LBB3_8
-; RV32-NEXT: .LBB3_16: # %cond.store19
-; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v8, 7
-; RV32-NEXT: vse16.v v8, (a0)
; RV32-NEXT: ret
;
; RV64-LABEL: compressstore_v8f16:
; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64-NEXT: vmv.x.s a1, v0
-; RV64-NEXT: andi a2, a1, 1
-; RV64-NEXT: bnez a2, .LBB3_9
-; RV64-NEXT: # %bb.1: # %else
-; RV64-NEXT: andi a2, a1, 2
-; RV64-NEXT: bnez a2, .LBB3_10
-; RV64-NEXT: .LBB3_2: # %else2
-; RV64-NEXT: andi a2, a1, 4
-; RV64-NEXT: bnez a2, .LBB3_11
-; RV64-NEXT: .LBB3_3: # %else5
-; RV64-NEXT: andi a2, a1, 8
-; RV64-NEXT: bnez a2, .LBB3_12
-; RV64-NEXT: .LBB3_4: # %else8
-; RV64-NEXT: andi a2, a1, 16
-; RV64-NEXT: bnez a2, .LBB3_13
-; RV64-NEXT: .LBB3_5: # %else11
-; RV64-NEXT: andi a2, a1, 32
-; RV64-NEXT: bnez a2, .LBB3_14
-; RV64-NEXT: .LBB3_6: # %else14
-; RV64-NEXT: andi a2, a1, 64
-; RV64-NEXT: bnez a2, .LBB3_15
-; RV64-NEXT: .LBB3_7: # %else17
-; RV64-NEXT: andi a1, a1, -128
-; RV64-NEXT: bnez a1, .LBB3_16
-; RV64-NEXT: .LBB3_8: # %else20
-; RV64-NEXT: ret
-; RV64-NEXT: .LBB3_9: # %cond.store
-; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64-NEXT: vse16.v v8, (a0)
-; RV64-NEXT: addi a0, a0, 2
-; RV64-NEXT: andi a2, a1, 2
-; RV64-NEXT: beqz a2, .LBB3_2
-; RV64-NEXT: .LBB3_10: # %cond.store1
-; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64-NEXT: vslidedown.vi v9, v8, 1
-; RV64-NEXT: vse16.v v9, (a0)
-; RV64-NEXT: addi a0, a0, 2
-; RV64-NEXT: andi a2, a1, 4
-; RV64-NEXT: beqz a2, .LBB3_3
-; RV64-NEXT: .LBB3_11: # %cond.store4
-; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64-NEXT: vslidedown.vi v9, v8, 2
-; RV64-NEXT: vse16.v v9, (a0)
-; RV64-NEXT: addi a0, a0, 2
-; RV64-NEXT: andi a2, a1, 8
-; RV64-NEXT: beqz a2, .LBB3_4
-; RV64-NEXT: .LBB3_12: # %cond.store7
-; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64-NEXT: vslidedown.vi v9, v8, 3
+; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV64-NEXT: vcompress.vm v9, v8, v0
+; RV64-NEXT: vcpop.m a1, v0
+; RV64-NEXT: vsetvli zero, a1, e16, m1, ta, ma
; RV64-NEXT: vse16.v v9, (a0)
-; RV64-NEXT: addi a0, a0, 2
-; RV64-NEXT: andi a2, a1, 16
-; RV64-NEXT: beqz a2, .LBB3_5
-; RV64-NEXT: .LBB3_13: # %cond.store10
-; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64-NEXT: vslidedown.vi v9, v8, 4
-; RV64-NEXT: vse16.v v9, (a0)
-; RV64-NEXT: addi a0, a0, 2
-; RV64-NEXT: andi a2, a1, 32
-; RV64-NEXT: beqz a2, .LBB3_6
-; RV64-NEXT: .LBB3_14: # %cond.store13
-; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64-NEXT: vslidedown.vi v9, v8, 5
-; RV64-NEXT: vse16.v v9, (a0)
-; RV64-NEXT: addi a0, a0, 2
-; RV64-NEXT: andi a2, a1, 64
-; RV64-NEXT: beqz a2, .LBB3_7
-; RV64-NEXT: .LBB3_15: # %cond.store16
-; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64-NEXT: vslidedown.vi v9, v8, 6
-; RV64-NEXT: vse16.v v9, (a0)
-; RV64-NEXT: addi a0, a0, 2
-; RV64-NEXT: andi a1, a1, -128
-; RV64-NEXT: beqz a1, .LBB3_8
-; RV64-NEXT: .LBB3_16: # %cond.store19
-; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64-NEXT: vslidedown.vi v8, v8, 7
-; RV64-NEXT: vse16.v v8, (a0)
; RV64-NEXT: ret
call void @llvm.masked.compressstore.v8f16(<8 x half> %v, ptr align 2 %base, <8 x i1> %mask)
ret void
@@ -346,24 +98,20 @@ declare void @llvm.masked.compressstore.v1f32(<1 x float>, ptr, <1 x i1>)
define void @compressstore_v1f32(ptr %base, <1 x float> %v, <1 x i1> %mask) {
; RV32-LABEL: compressstore_v1f32:
; RV32: # %bb.0:
-; RV32-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
-; RV32-NEXT: vfirst.m a1, v0
-; RV32-NEXT: bnez a1, .LBB4_2
-; RV32-NEXT: # %bb.1: # %cond.store
; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; RV32-NEXT: vse32.v v8, (a0)
-; RV32-NEXT: .LBB4_2: # %else
+; RV32-NEXT: vcompress.vm v9, v8, v0
+; RV32-NEXT: vcpop.m a1, v0
+; RV32-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; RV32-NEXT: vse32.v v9, (a0)
; RV32-NEXT: ret
;
; RV64-LABEL: compressstore_v1f32:
; RV64: # %bb.0:
-; RV64-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
-; RV64-NEXT: vfirst.m a1, v0
-; RV64-NEXT: bnez a1, .LBB4_2
-; RV64-NEXT: # %bb.1: # %cond.store
; RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; RV64-NEXT: vse32.v v8, (a0)
-; RV64-NEXT: .LBB4_2: # %else
+; RV64-NEXT: vcompress.vm v9, v8, v0
+; RV64-NEXT: vcpop.m a1, v0
+; RV64-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; RV64-NEXT: vse32.v v9, (a0)
; RV64-NEXT: ret
call void @llvm.masked.compressstore.v1f32(<1 x float> %v, ptr align 4 %base, <1 x i1> %mask)
ret void
@@ -373,48 +121,20 @@ declare void @llvm.masked.compressstore.v2f32(<2 x float>, ptr, <2 x i1>)
define void @compressstore_v2f32(ptr %base, <2 x float> %v, <2 x i1> %mask) {
; RV32-LABEL: compressstore_v2f32:
; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV32-NEXT: vmv.x.s a1, v0
-; RV32-NEXT: andi a2, a1, 1
-; RV32-NEXT: bnez a2, .LBB5_3
-; RV32-NEXT: # %bb.1: # %else
-; RV32-NEXT: andi a1, a1, 2
-; RV32-NEXT: bnez a1, .LBB5_4
-; RV32-NEXT: .LBB5_2: # %else2
-; RV32-NEXT: ret
-; RV32-NEXT: .LBB5_3: # %cond.store
-; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; RV32-NEXT: vse32.v v8, (a0)
-; RV32-NEXT: addi a0, a0, 4
-; RV32-NEXT: andi a1, a1, 2
-; RV32-NEXT: beqz a1, .LBB5_2
-; RV32-NEXT: .LBB5_4: # %cond.store1
-; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v8, 1
-; RV32-NEXT: vse32.v v8, (a0)
+; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; RV32-NEXT: vcompress.vm v9, v8, v0
+; RV32-NEXT: vcpop.m a1, v0
+; RV32-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; RV32-NEXT: vse32.v v9, (a0)
; RV32-NEXT: ret
;
; RV64-LABEL: compressstore_v2f32:
; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64-NEXT: vmv.x.s a1, v0
-; RV64-NEXT: andi a2, a1, 1
-; RV64-NEXT: bnez a2, .LBB5_3
-; RV64-NEXT: # %bb.1: # %else
-; RV64-NEXT: andi a1, a1, 2
-; RV64-NEXT: bnez a1, .LBB5_4
-; RV64-NEXT: .LBB5_2: # %else2
-; RV64-NEXT: ret
-; RV64-NEXT: .LBB5_3: # %cond.store
-; RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; RV64-NEXT: vse32.v v8, (a0)
-; RV64-NEXT: addi a0, a0, 4
-; RV64-NEXT: andi a1, a1, 2
-; RV64-NEXT: beqz a1, .LBB5_2
-; RV64-NEXT: .LBB5_4: # %cond.store1
-; RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; RV64-NEXT: vslidedown.vi v8, v8, 1
-; RV64-NEXT: vse32.v v8, (a0)
+; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; RV64-NEXT: vcompress.vm v9, v8, v0
+; RV64-NEXT: vcpop.m a1, v0
+; RV64-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; RV64-NEXT: vse32.v v9, (a0)
; RV64-NEXT: ret
call void @llvm.masked.compressstore.v2f32(<2 x float> %v, ptr align 4 %base, <2 x i1> %mask)
ret void
@@ -424,88 +144,20 @@ declare void @llvm.masked.compressstore.v4f32(<4 x float>, ptr, <4 x i1>)
define void @compressstore_v4f32(ptr %base, <4 x float> %v, <4 x i1> %mask) {
; RV32-LABEL: compressstore_v4f32:
; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV32-NEXT: vmv.x.s a1, v0
-; RV32-NEXT: andi a2, a1, 1
-; RV32-NEXT: bnez a2, .LBB6_5
-; RV32-NEXT: # %bb.1: # %else
-; RV32-NEXT: andi a2, a1, 2
-; RV32-NEXT: bnez a2, .LBB6_6
-; RV32-NEXT: .LBB6_2: # %else2
-; RV32-NEXT: andi a2, a1, 4
-; RV32-NEXT: bnez a2, .LBB6_7
-; RV32-NEXT: .LBB6_3: # %else5
-; RV32-NEXT: andi a1, a1, 8
-; RV32-NEXT: bnez a1, .LBB6_8
-; RV32-NEXT: .LBB6_4: # %else8
-; RV32-NEXT: ret
-; RV32-NEXT: .LBB6_5: # %cond.store
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vse32.v v8, (a0)
-; RV32-NEXT: addi a0, a0, 4
-; RV32-NEXT: andi a2, a1, 2
-; RV32-NEXT: beqz a2, .LBB6_2
-; RV32-NEXT: .LBB6_6: # %cond.store1
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v8, 1
-; RV32-NEXT: vse32.v v9, (a0)
-; RV32-NEXT: addi a0, a0, 4
-; RV32-NEXT: andi a2, a1, 4
-; RV32-NEXT: beqz a2, .LBB6_3
-; RV32-NEXT: .LBB6_7: # %cond.store4
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v8, 2
+; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV32-NEXT: vcompress.vm v9, v8, v0
+; RV32-NEXT: vcpop.m a1, v0
+; RV32-NEXT: vsetvli zero, a1, e32, m1, ta, ma
; RV32-NEXT: vse32.v v9, (a0)
-; RV32-NEXT: addi a0, a0, 4
-; RV32-NEXT: andi a1, a1, 8
-; RV32-NEXT: beqz a1, .LBB6_4
-; RV32-NEXT: .LBB6_8: # %cond.store7
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v8, 3
-; RV32-NEXT: vse32.v v8, (a0)
; RV32-NEXT: ret
;
; RV64-LABEL: compressstore_v4f32:
; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64-NEXT: vmv.x.s a1, v0
-; RV64-NEXT: andi a2, a1, 1
-; RV64-NEXT: bnez a2, .LBB6_5
-; RV64-NEXT: # %bb.1: # %else
-; RV64-NEXT: andi a2, a1, 2
-; RV64-NEXT: bnez a2, .LBB6_6
-; RV64-NEXT: .LBB6_2: # %else2
-; RV64-NEXT: andi a2, a1, 4
-; RV64-NEXT: bnez a2, .LBB6_7
-; RV64-NEXT: .LBB6_3: # %else5
-; RV64-NEXT: andi a1, a1, 8
-; RV64-NEXT: bnez a1, .LBB6_8
-; RV64-NEXT: .LBB6_4: # %else8
-; RV64-NEXT: ret
-; RV64-NEXT: .LBB6_5: # %cond.store
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT: vse32.v v8, (a0)
-; RV64-NEXT: addi a0, a0, 4
-; RV64-NEXT: andi a2, a1, 2
-; RV64-NEXT: beqz a2, .LBB6_2
-; RV64-NEXT: .LBB6_6: # %cond.store1
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT: vslidedown.vi v9, v8, 1
-; RV64-NEXT: vse32.v v9, (a0)
-; RV64-NEXT: addi a0, a0, 4
-; RV64-NEXT: andi a2, a1, 4
-; RV64-NEXT: beqz a2, .LBB6_3
-; RV64-NEXT: .LBB6_7: # %cond.store4
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT: vslidedown.vi v9, v8, 2
+; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV64-NEXT: vcompress.vm v9, v8, v0
+; RV64-NEXT: vcpop.m a1, v0
+; RV64-NEXT: vsetvli zero, a1, e32, m1, ta, ma
; RV64-NEXT: vse32.v v9, (a0)
-; RV64-NEXT: addi a0, a0, 4
-; RV64-NEXT: andi a1, a1, 8
-; RV64-NEXT: beqz a1, .LBB6_4
-; RV64-NEXT: .LBB6_8: # %cond.store7
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT: vslidedown.vi v8, v8, 3
-; RV64-NEXT: vse32.v v8, (a0)
; RV64-NEXT: ret
call void @llvm.masked.compressstore.v4f32(<4 x float> %v, ptr align 4 %base, <4 x i1> %mask)
ret void
@@ -515,176 +167,20 @@ declare void @llvm.masked.compressstore.v8f32(<8 x float>, ptr, <8 x i1>)
define void @compressstore_v8f32(ptr %base, <8 x float> %v, <8 x i1> %mask) {
; RV32-LABEL: compressstore_v8f32:
; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV32-NEXT: vmv.x.s a1, v0
-; RV32-NEXT: andi a2, a1, 1
-; RV32-NEXT: bnez a2, .LBB7_9
-; RV32-NEXT: # %bb.1: # %else
-; RV32-NEXT: andi a2, a1, 2
-; RV32-NEXT: bnez a2, .LBB7_10
-; RV32-NEXT: .LBB7_2: # %else2
-; RV32-NEXT: andi a2, a1, 4
-; RV32-NEXT: bnez a2, .LBB7_11
-; RV32-NEXT: .LBB7_3: # %else5
-; RV32-NEXT: andi a2, a1, 8
-; RV32-NEXT: bnez a2, .LBB7_12
-; RV32-NEXT: .LBB7_4: # %else8
-; RV32-NEXT: andi a2, a1, 16
-; RV32-NEXT: bnez a2, .LBB7_13
-; RV32-NEXT: .LBB7_5: # %else11
-; RV32-NEXT: andi a2, a1, 32
-; RV32-NEXT: bnez a2, .LBB7_14
-; RV32-NEXT: .LBB7_6: # %else14
-; RV32-NEXT: andi a2, a1, 64
-; RV32-NEXT: bnez a2, .LBB7_15
-; RV32-NEXT: .LBB7_7: # %else17
-; RV32-NEXT: andi a1, a1, -128
-; RV32-NEXT: bnez a1, .LBB7_16
-; RV32-NEXT: .LBB7_8: # %else20
-; RV32-NEXT: ret
-; RV32-NEXT: .LBB7_9: # %cond.store
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vse32.v v8, (a0)
-; RV32-NEXT: addi a0, a0, 4
-; RV32-NEXT: andi a2, a1, 2
-; RV32-NEXT: beqz a2, .LBB7_2
-; RV32-NEXT: .LBB7_10: # %cond.store1
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v10, v8, 1
-; RV32-NEXT: vse32.v v10, (a0)
-; RV32-NEXT: addi a0, a0, 4
-; RV32-NEXT: andi a2, a1, 4
-; RV32-NEXT: beqz a2, .LBB7_3
-; RV32-NEXT: .LBB7_11: # %cond.store4
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v10, v8, 2
-; RV32-NEXT: vse32.v v10, (a0)
-; RV32-NEXT: addi a0, a0, 4
-; RV32-NEXT: andi a2, a1, 8
-; RV32-NEXT: beqz a2, .LBB7_4
-; RV32-NEXT: .LBB7_12: # %cond.store7
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v10, v8, 3
-; RV32-NEXT: vse32.v v10, (a0)
-; RV32-NEXT: addi a0, a0, 4
-; RV32-NEXT: andi a2, a1, 16
-; RV32-NEXT: beqz a2, .LBB7_5
-; RV32-NEXT: .LBB7_13: # %cond.store10
-; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV32-NEXT: vslidedown.vi v10, v8, 4
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vse32.v v10, (a0)
-; RV32-NEXT: addi a0, a0, 4
-; RV32-NEXT: andi a2, a1, 32
-; RV32-NEXT: beqz a2, .LBB7_6
-; RV32-NEXT: .LBB7_14: # %cond.store13
-; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV32-NEXT: vslidedown.vi v10, v8, 5
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vse32.v v10, (a0)
-; RV32-NEXT: addi a0, a0, 4
-; RV32-NEXT: andi a2, a1, 64
-; RV32-NEXT: beqz a2, .LBB7_7
-; RV32-NEXT: .LBB7_15: # %cond.store16
-; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV32-NEXT: vslidedown.vi v10, v8, 6
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV32-NEXT: vcompress.vm v10, v8, v0
+; RV32-NEXT: vcpop.m a1, v0
+; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma
; RV32-NEXT: vse32.v v10, (a0)
-; RV32-NEXT: addi a0, a0, 4
-; RV32-NEXT: andi a1, a1, -128
-; RV32-NEXT: beqz a1, .LBB7_8
-; RV32-NEXT: .LBB7_16: # %cond.store19
-; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v8, 7
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vse32.v v8, (a0)
; RV32-NEXT: ret
;
; RV64-LABEL: compressstore_v8f32:
; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64-NEXT: vmv.x.s a1, v0
-; RV64-NEXT: andi a2, a1, 1
-; RV64-NEXT: bnez a2, .LBB7_9
-; RV64-NEXT: # %bb.1: # %else
-; RV64-NEXT: andi a2, a1, 2
-; RV64-NEXT: bnez a2, .LBB7_10
-; RV64-NEXT: .LBB7_2: # %else2
-; RV64-NEXT: andi a2, a1, 4
-; RV64-NEXT: bnez a2, .LBB7_11
-; RV64-NEXT: .LBB7_3: # %else5
-; RV64-NEXT: andi a2, a1, 8
-; RV64-NEXT: bnez a2, .LBB7_12
-; RV64-NEXT: .LBB7_4: # %else8
-; RV64-NEXT: andi a2, a1, 16
-; RV64-NEXT: bnez a2, .LBB7_13
-; RV64-NEXT: .LBB7_5: # %else11
-; RV64-NEXT: andi a2, a1, 32
-; RV64-NEXT: bnez a2, .LBB7_14
-; RV64-NEXT: .LBB7_6: # %else14
-; RV64-NEXT: andi a2, a1, 64
-; RV64-NEXT: bnez a2, .LBB7_15
-; RV64-NEXT: .LBB7_7: # %else17
-; RV64-NEXT: andi a1, a1, -128
-; RV64-NEXT: bnez a1, .LBB7_16
-; RV64-NEXT: .LBB7_8: # %else20
-; RV64-NEXT: ret
-; RV64-NEXT: .LBB7_9: # %cond.store
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT: vse32.v v8, (a0)
-; RV64-NEXT: addi a0, a0, 4
-; RV64-NEXT: andi a2, a1, 2
-; RV64-NEXT: beqz a2, .LBB7_2
-; RV64-NEXT: .LBB7_10: # %cond.store1
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT: vslidedown.vi v10, v8, 1
-; RV64-NEXT: vse32.v v10, (a0)
-; RV64-NEXT: addi a0, a0, 4
-; RV64-NEXT: andi a2, a1, 4
-; RV64-NEXT: beqz a2, .LBB7_3
-; RV64-NEXT: .LBB7_11: # %cond.store4
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT: vslidedown.vi v10, v8, 2
-; RV64-NEXT: vse32.v v10, (a0)
-; RV64-NEXT: addi a0, a0, 4
-; RV64-NEXT: andi a2, a1, 8
-; RV64-NEXT: beqz a2, .LBB7_4
-; RV64-NEXT: .LBB7_12: # %cond.store7
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT: vslidedown.vi v10, v8, 3
-; RV64-NEXT: vse32.v v10, (a0)
-; RV64-NEXT: addi a0, a0, 4
-; RV64-NEXT: andi a2, a1, 16
-; RV64-NEXT: beqz a2, .LBB7_5
-; RV64-NEXT: .LBB7_13: # %cond.store10
-; RV64-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV64-NEXT: vslidedown.vi v10, v8, 4
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT: vse32.v v10, (a0)
-; RV64-NEXT: addi a0, a0, 4
-; RV64-NEXT: andi a2, a1, 32
-; RV64-NEXT: beqz a2, .LBB7_6
-; RV64-NEXT: .LBB7_14: # %cond.store13
-; RV64-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV64-NEXT: vslidedown.vi v10, v8, 5
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT: vse32.v v10, (a0)
-; RV64-NEXT: addi a0, a0, 4
-; RV64-NEXT: andi a2, a1, 64
-; RV64-NEXT: beqz a2, .LBB7_7
-; RV64-NEXT: .LBB7_15: # %cond.store16
-; RV64-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV64-NEXT: vslidedown.vi v10, v8, 6
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV64-NEXT: vcompress.vm v10, v8, v0
+; RV64-NEXT: vcpop.m a1, v0
+; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma
; RV64-NEXT: vse32.v v10, (a0)
-; RV64-NEXT: addi a0, a0, 4
-; RV64-NEXT: andi a1, a1, -128
-; RV64-NEXT: beqz a1, .LBB7_8
-; RV64-NEXT: .LBB7_16: # %cond.store19
-; RV64-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV64-NEXT: vslidedown.vi v8, v8, 7
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT: vse32.v v8, (a0)
; RV64-NEXT: ret
call void @llvm.masked.compressstore.v8f32(<8 x float> %v, ptr align 4 %base, <8 x i1> %mask)
ret void
@@ -694,24 +190,20 @@ declare void @llvm.masked.compressstore.v1f64(<1 x double>, ptr, <1 x i1>)
define void @compressstore_v1f64(ptr %base, <1 x double> %v, <1 x i1> %mask) {
; RV32-LABEL: compressstore_v1f64:
; RV32: # %bb.0:
-; RV32-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
-; RV32-NEXT: vfirst.m a1, v0
-; RV32-NEXT: bnez a1, .LBB8_2
-; RV32-NEXT: # %bb.1: # %cond.store
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV32-NEXT: vse64.v v8, (a0)
-; RV32-NEXT: .LBB8_2: # %else
+; RV32-NEXT: vcompress.vm v9, v8, v0
+; RV32-NEXT: vcpop.m a1, v0
+; RV32-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; RV32-NEXT: vse64.v v9, (a0)
; RV32-NEXT: ret
;
; RV64-LABEL: compressstore_v1f64:
; RV64: # %bb.0:
-; RV64-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
-; RV64-NEXT: vfirst.m a1, v0
-; RV64-NEXT: bnez a1, .LBB8_2
-; RV64-NEXT: # %bb.1: # %cond.store
; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vse64.v v8, (a0)
-; RV64-NEXT: .LBB8_2: # %else
+; RV64-NEXT: vcompress.vm v9, v8, v0
+; RV64-NEXT: vcpop.m a1, v0
+; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; RV64-NEXT: vse64.v v9, (a0)
; RV64-NEXT: ret
call void @llvm.masked.compressstore.v1f64(<1 x double> %v, ptr align 8 %base, <1 x i1> %mask)
ret void
@@ -721,48 +213,20 @@ declare void @llvm.masked.compressstore.v2f64(<2 x double>, ptr, <2 x i1>)
define void @compressstore_v2f64(ptr %base, <2 x double> %v, <2 x i1> %mask) {
; RV32-LABEL: compressstore_v2f64:
; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV32-NEXT: vmv.x.s a1, v0
-; RV32-NEXT: andi a2, a1, 1
-; RV32-NEXT: bnez a2, .LBB9_3
-; RV32-NEXT: # %bb.1: # %else
-; RV32-NEXT: andi a1, a1, 2
-; RV32-NEXT: bnez a1, .LBB9_4
-; RV32-NEXT: .LBB9_2: # %else2
-; RV32-NEXT: ret
-; RV32-NEXT: .LBB9_3: # %cond.store
-; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV32-NEXT: vse64.v v8, (a0)
-; RV32-NEXT: addi a0, a0, 8
-; RV32-NEXT: andi a1, a1, 2
-; RV32-NEXT: beqz a1, .LBB9_2
-; RV32-NEXT: .LBB9_4: # %cond.store1
-; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v8, 1
-; RV32-NEXT: vse64.v v8, (a0)
+; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; RV32-NEXT: vcompress.vm v9, v8, v0
+; RV32-NEXT: vcpop.m a1, v0
+; RV32-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; RV32-NEXT: vse64.v v9, (a0)
; RV32-NEXT: ret
;
; RV64-LABEL: compressstore_v2f64:
; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64-NEXT: vmv.x.s a1, v0
-; RV64-NEXT: andi a2, a1, 1
-; RV64-NEXT: bnez a2, .LBB9_3
-; RV64-NEXT: # %bb.1: # %else
-; RV64-NEXT: andi a1, a1, 2
-; RV64-NEXT: bnez a1, .LBB9_4
-; RV64-NEXT: .LBB9_2: # %else2
-; RV64-NEXT: ret
-; RV64-NEXT: .LBB9_3: # %cond.store
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vse64.v v8, (a0)
-; RV64-NEXT: addi a0, a0, 8
-; RV64-NEXT: andi a1, a1, 2
-; RV64-NEXT: beqz a1, .LBB9_2
-; RV64-NEXT: .LBB9_4: # %cond.store1
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vslidedown.vi v8, v8, 1
-; RV64-NEXT: vse64.v v8, (a0)
+; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; RV64-NEXT: vcompress.vm v9, v8, v0
+; RV64-NEXT: vcpop.m a1, v0
+; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; RV64-NEXT: vse64.v v9, (a0)
; RV64-NEXT: ret
call void @llvm.masked.compressstore.v2f64(<2 x double> %v, ptr align 8 %base, <2 x i1> %mask)
ret void
@@ -772,92 +236,20 @@ declare void @llvm.masked.compressstore.v4f64(<4 x double>, ptr, <4 x i1>)
define void @compressstore_v4f64(ptr %base, <4 x double> %v, <4 x i1> %mask) {
; RV32-LABEL: compressstore_v4f64:
; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV32-NEXT: vmv.x.s a1, v0
-; RV32-NEXT: andi a2, a1, 1
-; RV32-NEXT: bnez a2, .LBB10_5
-; RV32-NEXT: # %bb.1: # %else
-; RV32-NEXT: andi a2, a1, 2
-; RV32-NEXT: bnez a2, .LBB10_6
-; RV32-NEXT: .LBB10_2: # %else2
-; RV32-NEXT: andi a2, a1, 4
-; RV32-NEXT: bnez a2, .LBB10_7
-; RV32-NEXT: .LBB10_3: # %else5
-; RV32-NEXT: andi a1, a1, 8
-; RV32-NEXT: bnez a1, .LBB10_8
-; RV32-NEXT: .LBB10_4: # %else8
-; RV32-NEXT: ret
-; RV32-NEXT: .LBB10_5: # %cond.store
-; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV32-NEXT: vse64.v v8, (a0)
-; RV32-NEXT: addi a0, a0, 8
-; RV32-NEXT: andi a2, a1, 2
-; RV32-NEXT: beqz a2, .LBB10_2
-; RV32-NEXT: .LBB10_6: # %cond.store1
-; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v10, v8, 1
+; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV32-NEXT: vcompress.vm v10, v8, v0
+; RV32-NEXT: vcpop.m a1, v0
+; RV32-NEXT: vsetvli zero, a1, e64, m2, ta, ma
; RV32-NEXT: vse64.v v10, (a0)
-; RV32-NEXT: addi a0, a0, 8
-; RV32-NEXT: andi a2, a1, 4
-; RV32-NEXT: beqz a2, .LBB10_3
-; RV32-NEXT: .LBB10_7: # %cond.store4
-; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV32-NEXT: vslidedown.vi v10, v8, 2
-; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV32-NEXT: vse64.v v10, (a0)
-; RV32-NEXT: addi a0, a0, 8
-; RV32-NEXT: andi a1, a1, 8
-; RV32-NEXT: beqz a1, .LBB10_4
-; RV32-NEXT: .LBB10_8: # %cond.store7
-; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v8, 3
-; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV32-NEXT: vse64.v v8, (a0)
; RV32-NEXT: ret
;
; RV64-LABEL: compressstore_v4f64:
; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64-NEXT: vmv.x.s a1, v0
-; RV64-NEXT: andi a2, a1, 1
-; RV64-NEXT: bnez a2, .LBB10_5
-; RV64-NEXT: # %bb.1: # %else
-; RV64-NEXT: andi a2, a1, 2
-; RV64-NEXT: bnez a2, .LBB10_6
-; RV64-NEXT: .LBB10_2: # %else2
-; RV64-NEXT: andi a2, a1, 4
-; RV64-NEXT: bnez a2, .LBB10_7
-; RV64-NEXT: .LBB10_3: # %else5
-; RV64-NEXT: andi a1, a1, 8
-; RV64-NEXT: bnez a1, .LBB10_8
-; RV64-NEXT: .LBB10_4: # %else8
-; RV64-NEXT: ret
-; RV64-NEXT: .LBB10_5: # %cond.store
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vse64.v v8, (a0)
-; RV64-NEXT: addi a0, a0, 8
-; RV64-NEXT: andi a2, a1, 2
-; RV64-NEXT: beqz a2, .LBB10_2
-; RV64-NEXT: .LBB10_6: # %cond.store1
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vslidedown.vi v10, v8, 1
-; RV64-NEXT: vse64.v v10, (a0)
-; RV64-NEXT: addi a0, a0, 8
-; RV64-NEXT: andi a2, a1, 4
-; RV64-NEXT: beqz a2, .LBB10_3
-; RV64-NEXT: .LBB10_7: # %cond.store4
-; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV64-NEXT: vslidedown.vi v10, v8, 2
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT: vcompress.vm v10, v8, v0
+; RV64-NEXT: vcpop.m a1, v0
+; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma
; RV64-NEXT: vse64.v v10, (a0)
-; RV64-NEXT: addi a0, a0, 8
-; RV64-NEXT: andi a1, a1, 8
-; RV64-NEXT: beqz a1, .LBB10_4
-; RV64-NEXT: .LBB10_8: # %cond.store7
-; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV64-NEXT: vslidedown.vi v8, v8, 3
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vse64.v v8, (a0)
; RV64-NEXT: ret
call void @llvm.masked.compressstore.v4f64(<4 x double> %v, ptr align 8 %base, <4 x i1> %mask)
ret void
@@ -867,213 +259,21 @@ declare void @llvm.masked.compressstore.v8f64(<8 x double>, ptr, <8 x i1>)
define void @compressstore_v8f64(ptr %base, <8 x double> %v, <8 x i1> %mask) {
; RV32-LABEL: compressstore_v8f64:
; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV32-NEXT: vmv.x.s a1, v0
-; RV32-NEXT: andi a2, a1, 1
-; RV32-NEXT: bnez a2, .LBB11_11
-; RV32-NEXT: # %bb.1: # %else
-; RV32-NEXT: andi a2, a1, 2
-; RV32-NEXT: bnez a2, .LBB11_12
-; RV32-NEXT: .LBB11_2: # %else2
-; RV32-NEXT: andi a2, a1, 4
-; RV32-NEXT: bnez a2, .LBB11_13
-; RV32-NEXT: .LBB11_3: # %else5
-; RV32-NEXT: andi a2, a1, 8
-; RV32-NEXT: beqz a2, .LBB11_5
-; RV32-NEXT: .LBB11_4: # %cond.store7
-; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV32-NEXT: vslidedown.vi v12, v8, 3
-; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV32-NEXT: vse64.v v12, (a0)
-; RV32-NEXT: addi a0, a0, 8
-; RV32-NEXT: .LBB11_5: # %else8
-; RV32-NEXT: addi sp, sp, -320
-; RV32-NEXT: .cfi_def_cfa_offset 320
-; RV32-NEXT: sw ra, 316(sp) # 4-byte Folded Spill
-; RV32-NEXT: sw s0, 312(sp) # 4-byte Folded Spill
-; RV32-NEXT: .cfi_offset ra, -4
-; RV32-NEXT: .cfi_offset s0, -8
-; RV32-NEXT: addi s0, sp, 320
-; RV32-NEXT: .cfi_def_cfa s0, 0
-; RV32-NEXT: andi sp, sp, -64
-; RV32-NEXT: andi a2, a1, 16
-; RV32-NEXT: bnez a2, .LBB11_14
-; RV32-NEXT: # %bb.6: # %else11
-; RV32-NEXT: andi a2, a1, 32
-; RV32-NEXT: bnez a2, .LBB11_15
-; RV32-NEXT: .LBB11_7: # %else14
-; RV32-NEXT: andi a2, a1, 64
-; RV32-NEXT: bnez a2, .LBB11_16
-; RV32-NEXT: .LBB11_8: # %else17
-; RV32-NEXT: andi a1, a1, -128
-; RV32-NEXT: beqz a1, .LBB11_10
-; RV32-NEXT: .LBB11_9: # %cond.store19
-; RV32-NEXT: mv a1, sp
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT: vse64.v v8, (a1)
-; RV32-NEXT: fld fa5, 56(sp)
-; RV32-NEXT: fsd fa5, 0(a0)
-; RV32-NEXT: .LBB11_10: # %else20
-; RV32-NEXT: addi sp, s0, -320
-; RV32-NEXT: lw ra, 316(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw s0, 312(sp) # 4-byte Folded Reload
-; RV32-NEXT: addi sp, sp, 320
-; RV32-NEXT: ret
-; RV32-NEXT: .LBB11_11: # %cond.store
-; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV32-NEXT: vse64.v v8, (a0)
-; RV32-NEXT: addi a0, a0, 8
-; RV32-NEXT: andi a2, a1, 2
-; RV32-NEXT: beqz a2, .LBB11_2
-; RV32-NEXT: .LBB11_12: # %cond.store1
-; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v12, v8, 1
+; RV32-NEXT: vcompress.vm v12, v8, v0
+; RV32-NEXT: vcpop.m a1, v0
+; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma
; RV32-NEXT: vse64.v v12, (a0)
-; RV32-NEXT: addi a0, a0, 8
-; RV32-NEXT: andi a2, a1, 4
-; RV32-NEXT: beqz a2, .LBB11_3
-; RV32-NEXT: .LBB11_13: # %cond.store4
-; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV32-NEXT: vslidedown.vi v12, v8, 2
-; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV32-NEXT: vse64.v v12, (a0)
-; RV32-NEXT: addi a0, a0, 8
-; RV32-NEXT: andi a2, a1, 8
-; RV32-NEXT: bnez a2, .LBB11_4
-; RV32-NEXT: j .LBB11_5
-; RV32-NEXT: .LBB11_14: # %cond.store10
-; RV32-NEXT: addi a2, sp, 192
-; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT: vse64.v v8, (a2)
-; RV32-NEXT: fld fa5, 224(sp)
-; RV32-NEXT: fsd fa5, 0(a0)
-; RV32-NEXT: addi a0, a0, 8
-; RV32-NEXT: andi a2, a1, 32
-; RV32-NEXT: beqz a2, .LBB11_7
-; RV32-NEXT: .LBB11_15: # %cond.store13
-; RV32-NEXT: addi a2, sp, 128
-; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT: vse64.v v8, (a2)
-; RV32-NEXT: fld fa5, 168(sp)
-; RV32-NEXT: fsd fa5, 0(a0)
-; RV32-NEXT: addi a0, a0, 8
-; RV32-NEXT: andi a2, a1, 64
-; RV32-NEXT: beqz a2, .LBB11_8
-; RV32-NEXT: .LBB11_16: # %cond.store16
-; RV32-NEXT: addi a2, sp, 64
-; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT: vse64.v v8, (a2)
-; RV32-NEXT: fld fa5, 112(sp)
-; RV32-NEXT: fsd fa5, 0(a0)
-; RV32-NEXT: addi a0, a0, 8
-; RV32-NEXT: andi a1, a1, -128
-; RV32-NEXT: bnez a1, .LBB11_9
-; RV32-NEXT: j .LBB11_10
+; RV32-NEXT: ret
;
; RV64-LABEL: compressstore_v8f64:
; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64-NEXT: vmv.x.s a1, v0
-; RV64-NEXT: andi a2, a1, 1
-; RV64-NEXT: bnez a2, .LBB11_11
-; RV64-NEXT: # %bb.1: # %else
-; RV64-NEXT: andi a2, a1, 2
-; RV64-NEXT: bnez a2, .LBB11_12
-; RV64-NEXT: .LBB11_2: # %else2
-; RV64-NEXT: andi a2, a1, 4
-; RV64-NEXT: bnez a2, .LBB11_13
-; RV64-NEXT: .LBB11_3: # %else5
-; RV64-NEXT: andi a2, a1, 8
-; RV64-NEXT: beqz a2, .LBB11_5
-; RV64-NEXT: .LBB11_4: # %cond.store7
-; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV64-NEXT: vslidedown.vi v12, v8, 3
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vse64.v v12, (a0)
-; RV64-NEXT: addi a0, a0, 8
-; RV64-NEXT: .LBB11_5: # %else8
-; RV64-NEXT: addi sp, sp, -320
-; RV64-NEXT: .cfi_def_cfa_offset 320
-; RV64-NEXT: sd ra, 312(sp) # 8-byte Folded Spill
-; RV64-NEXT: sd s0, 304(sp) # 8-byte Folded Spill
-; RV64-NEXT: .cfi_offset ra, -8
-; RV64-NEXT: .cfi_offset s0, -16
-; RV64-NEXT: addi s0, sp, 320
-; RV64-NEXT: .cfi_def_cfa s0, 0
-; RV64-NEXT: andi sp, sp, -64
-; RV64-NEXT: andi a2, a1, 16
-; RV64-NEXT: bnez a2, .LBB11_14
-; RV64-NEXT: # %bb.6: # %else11
-; RV64-NEXT: andi a2, a1, 32
-; RV64-NEXT: bnez a2, .LBB11_15
-; RV64-NEXT: .LBB11_7: # %else14
-; RV64-NEXT: andi a2, a1, 64
-; RV64-NEXT: bnez a2, .LBB11_16
-; RV64-NEXT: .LBB11_8: # %else17
-; RV64-NEXT: andi a1, a1, -128
-; RV64-NEXT: beqz a1, .LBB11_10
-; RV64-NEXT: .LBB11_9: # %cond.store19
-; RV64-NEXT: mv a1, sp
; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64-NEXT: vse64.v v8, (a1)
-; RV64-NEXT: fld fa5, 56(sp)
-; RV64-NEXT: fsd fa5, 0(a0)
-; RV64-NEXT: .LBB11_10: # %else20
-; RV64-NEXT: addi sp, s0, -320
-; RV64-NEXT: ld ra, 312(sp) # 8-byte Folded Reload
-; RV64-NEXT: ld s0, 304(sp) # 8-byte Folded Reload
-; RV64-NEXT: addi sp, sp, 320
-; RV64-NEXT: ret
-; RV64-NEXT: .LBB11_11: # %cond.store
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vse64.v v8, (a0)
-; RV64-NEXT: addi a0, a0, 8
-; RV64-NEXT: andi a2, a1, 2
-; RV64-NEXT: beqz a2, .LBB11_2
-; RV64-NEXT: .LBB11_12: # %cond.store1
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vslidedown.vi v12, v8, 1
+; RV64-NEXT: vcompress.vm v12, v8, v0
+; RV64-NEXT: vcpop.m a1, v0
+; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
; RV64-NEXT: vse64.v v12, (a0)
-; RV64-NEXT: addi a0, a0, 8
-; RV64-NEXT: andi a2, a1, 4
-; RV64-NEXT: beqz a2, .LBB11_3
-; RV64-NEXT: .LBB11_13: # %cond.store4
-; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV64-NEXT: vslidedown.vi v12, v8, 2
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vse64.v v12, (a0)
-; RV64-NEXT: addi a0, a0, 8
-; RV64-NEXT: andi a2, a1, 8
-; RV64-NEXT: bnez a2, .LBB11_4
-; RV64-NEXT: j .LBB11_5
-; RV64-NEXT: .LBB11_14: # %cond.store10
-; RV64-NEXT: addi a2, sp, 192
-; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64-NEXT: vse64.v v8, (a2)
-; RV64-NEXT: fld fa5, 224(sp)
-; RV64-NEXT: fsd fa5, 0(a0)
-; RV64-NEXT: addi a0, a0, 8
-; RV64-NEXT: andi a2, a1, 32
-; RV64-NEXT: beqz a2, .LBB11_7
-; RV64-NEXT: .LBB11_15: # %cond.store13
-; RV64-NEXT: addi a2, sp, 128
-; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64-NEXT: vse64.v v8, (a2)
-; RV64-NEXT: fld fa5, 168(sp)
-; RV64-NEXT: fsd fa5, 0(a0)
-; RV64-NEXT: addi a0, a0, 8
-; RV64-NEXT: andi a2, a1, 64
-; RV64-NEXT: beqz a2, .LBB11_8
-; RV64-NEXT: .LBB11_16: # %cond.store16
-; RV64-NEXT: addi a2, sp, 64
-; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64-NEXT: vse64.v v8, (a2)
-; RV64-NEXT: fld fa5, 112(sp)
-; RV64-NEXT: fsd fa5, 0(a0)
-; RV64-NEXT: addi a0, a0, 8
-; RV64-NEXT: andi a1, a1, -128
-; RV64-NEXT: bnez a1, .LBB11_9
-; RV64-NEXT: j .LBB11_10
+; RV64-NEXT: ret
call void @llvm.masked.compressstore.v8f64(<8 x double> %v, ptr align 8 %base, <8 x i1> %mask)
ret void
}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-compressstore-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-compressstore-int.ll
index eb0096dbfba6de..a388ba92f302bf 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-compressstore-int.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-compressstore-int.ll
@@ -6,13 +6,11 @@ declare void @llvm.masked.compressstore.v1i8(<1 x i8>, ptr, <1 x i1>)
define void @compressstore_v1i8(ptr %base, <1 x i8> %v, <1 x i1> %mask) {
; CHECK-LABEL: compressstore_v1i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
-; CHECK-NEXT: vfirst.m a1, v0
-; CHECK-NEXT: bnez a1, .LBB0_2
-; CHECK-NEXT: # %bb.1: # %cond.store
; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
-; CHECK-NEXT: vse8.v v8, (a0)
-; CHECK-NEXT: .LBB0_2: # %else
+; CHECK-NEXT: vcompress.vm v9, v8, v0
+; CHECK-NEXT: vcpop.m a1, v0
+; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT: vse8.v v9, (a0)
; CHECK-NEXT: ret
call void @llvm.masked.compressstore.v1i8(<1 x i8> %v, ptr %base, <1 x i1> %mask)
ret void
@@ -22,25 +20,11 @@ declare void @llvm.masked.compressstore.v2i8(<2 x i8>, ptr, <2 x i1>)
define void @compressstore_v2i8(ptr %base, <2 x i8> %v, <2 x i1> %mask) {
; CHECK-LABEL: compressstore_v2i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-NEXT: vmv.x.s a1, v0
-; CHECK-NEXT: andi a2, a1, 1
-; CHECK-NEXT: bnez a2, .LBB1_3
-; CHECK-NEXT: # %bb.1: # %else
-; CHECK-NEXT: andi a1, a1, 2
-; CHECK-NEXT: bnez a1, .LBB1_4
-; CHECK-NEXT: .LBB1_2: # %else2
-; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB1_3: # %cond.store
-; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
-; CHECK-NEXT: vse8.v v8, (a0)
-; CHECK-NEXT: addi a0, a0, 1
-; CHECK-NEXT: andi a1, a1, 2
-; CHECK-NEXT: beqz a1, .LBB1_2
-; CHECK-NEXT: .LBB1_4: # %cond.store1
-; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
-; CHECK-NEXT: vslidedown.vi v8, v8, 1
-; CHECK-NEXT: vse8.v v8, (a0)
+; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
+; CHECK-NEXT: vcompress.vm v9, v8, v0
+; CHECK-NEXT: vcpop.m a1, v0
+; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT: vse8.v v9, (a0)
; CHECK-NEXT: ret
call void @llvm.masked.compressstore.v2i8(<2 x i8> %v, ptr %base, <2 x i1> %mask)
ret void
@@ -50,45 +34,11 @@ declare void @llvm.masked.compressstore.v4i8(<4 x i8>, ptr, <4 x i1>)
define void @compressstore_v4i8(ptr %base, <4 x i8> %v, <4 x i1> %mask) {
; CHECK-LABEL: compressstore_v4i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-NEXT: vmv.x.s a1, v0
-; CHECK-NEXT: andi a2, a1, 1
-; CHECK-NEXT: bnez a2, .LBB2_5
-; CHECK-NEXT: # %bb.1: # %else
-; CHECK-NEXT: andi a2, a1, 2
-; CHECK-NEXT: bnez a2, .LBB2_6
-; CHECK-NEXT: .LBB2_2: # %else2
-; CHECK-NEXT: andi a2, a1, 4
-; CHECK-NEXT: bnez a2, .LBB2_7
-; CHECK-NEXT: .LBB2_3: # %else5
-; CHECK-NEXT: andi a1, a1, 8
-; CHECK-NEXT: bnez a1, .LBB2_8
-; CHECK-NEXT: .LBB2_4: # %else8
-; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB2_5: # %cond.store
-; CHECK-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; CHECK-NEXT: vse8.v v8, (a0)
-; CHECK-NEXT: addi a0, a0, 1
-; CHECK-NEXT: andi a2, a1, 2
-; CHECK-NEXT: beqz a2, .LBB2_2
-; CHECK-NEXT: .LBB2_6: # %cond.store1
-; CHECK-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; CHECK-NEXT: vslidedown.vi v9, v8, 1
+; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
+; CHECK-NEXT: vcompress.vm v9, v8, v0
+; CHECK-NEXT: vcpop.m a1, v0
+; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
; CHECK-NEXT: vse8.v v9, (a0)
-; CHECK-NEXT: addi a0, a0, 1
-; CHECK-NEXT: andi a2, a1, 4
-; CHECK-NEXT: beqz a2, .LBB2_3
-; CHECK-NEXT: .LBB2_7: # %cond.store4
-; CHECK-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; CHECK-NEXT: vslidedown.vi v9, v8, 2
-; CHECK-NEXT: vse8.v v9, (a0)
-; CHECK-NEXT: addi a0, a0, 1
-; CHECK-NEXT: andi a1, a1, 8
-; CHECK-NEXT: beqz a1, .LBB2_4
-; CHECK-NEXT: .LBB2_8: # %cond.store7
-; CHECK-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; CHECK-NEXT: vslidedown.vi v8, v8, 3
-; CHECK-NEXT: vse8.v v8, (a0)
; CHECK-NEXT: ret
call void @llvm.masked.compressstore.v4i8(<4 x i8> %v, ptr %base, <4 x i1> %mask)
ret void
@@ -98,85 +48,11 @@ declare void @llvm.masked.compressstore.v8i8(<8 x i8>, ptr, <8 x i1>)
define void @compressstore_v8i8(ptr %base, <8 x i8> %v, <8 x i1> %mask) {
; CHECK-LABEL: compressstore_v8i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-NEXT: vmv.x.s a1, v0
-; CHECK-NEXT: andi a2, a1, 1
-; CHECK-NEXT: bnez a2, .LBB3_9
-; CHECK-NEXT: # %bb.1: # %else
-; CHECK-NEXT: andi a2, a1, 2
-; CHECK-NEXT: bnez a2, .LBB3_10
-; CHECK-NEXT: .LBB3_2: # %else2
-; CHECK-NEXT: andi a2, a1, 4
-; CHECK-NEXT: bnez a2, .LBB3_11
-; CHECK-NEXT: .LBB3_3: # %else5
-; CHECK-NEXT: andi a2, a1, 8
-; CHECK-NEXT: bnez a2, .LBB3_12
-; CHECK-NEXT: .LBB3_4: # %else8
-; CHECK-NEXT: andi a2, a1, 16
-; CHECK-NEXT: bnez a2, .LBB3_13
-; CHECK-NEXT: .LBB3_5: # %else11
-; CHECK-NEXT: andi a2, a1, 32
-; CHECK-NEXT: bnez a2, .LBB3_14
-; CHECK-NEXT: .LBB3_6: # %else14
-; CHECK-NEXT: andi a2, a1, 64
-; CHECK-NEXT: bnez a2, .LBB3_15
-; CHECK-NEXT: .LBB3_7: # %else17
-; CHECK-NEXT: andi a1, a1, -128
-; CHECK-NEXT: bnez a1, .LBB3_16
-; CHECK-NEXT: .LBB3_8: # %else20
-; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB3_9: # %cond.store
-; CHECK-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
-; CHECK-NEXT: vse8.v v8, (a0)
-; CHECK-NEXT: addi a0, a0, 1
-; CHECK-NEXT: andi a2, a1, 2
-; CHECK-NEXT: beqz a2, .LBB3_2
-; CHECK-NEXT: .LBB3_10: # %cond.store1
-; CHECK-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
-; CHECK-NEXT: vslidedown.vi v9, v8, 1
-; CHECK-NEXT: vse8.v v9, (a0)
-; CHECK-NEXT: addi a0, a0, 1
-; CHECK-NEXT: andi a2, a1, 4
-; CHECK-NEXT: beqz a2, .LBB3_3
-; CHECK-NEXT: .LBB3_11: # %cond.store4
-; CHECK-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
-; CHECK-NEXT: vslidedown.vi v9, v8, 2
-; CHECK-NEXT: vse8.v v9, (a0)
-; CHECK-NEXT: addi a0, a0, 1
-; CHECK-NEXT: andi a2, a1, 8
-; CHECK-NEXT: beqz a2, .LBB3_4
-; CHECK-NEXT: .LBB3_12: # %cond.store7
-; CHECK-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
-; CHECK-NEXT: vslidedown.vi v9, v8, 3
-; CHECK-NEXT: vse8.v v9, (a0)
-; CHECK-NEXT: addi a0, a0, 1
-; CHECK-NEXT: andi a2, a1, 16
-; CHECK-NEXT: beqz a2, .LBB3_5
-; CHECK-NEXT: .LBB3_13: # %cond.store10
-; CHECK-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
-; CHECK-NEXT: vslidedown.vi v9, v8, 4
-; CHECK-NEXT: vse8.v v9, (a0)
-; CHECK-NEXT: addi a0, a0, 1
-; CHECK-NEXT: andi a2, a1, 32
-; CHECK-NEXT: beqz a2, .LBB3_6
-; CHECK-NEXT: .LBB3_14: # %cond.store13
-; CHECK-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
-; CHECK-NEXT: vslidedown.vi v9, v8, 5
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT: vcompress.vm v9, v8, v0
+; CHECK-NEXT: vcpop.m a1, v0
+; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
; CHECK-NEXT: vse8.v v9, (a0)
-; CHECK-NEXT: addi a0, a0, 1
-; CHECK-NEXT: andi a2, a1, 64
-; CHECK-NEXT: beqz a2, .LBB3_7
-; CHECK-NEXT: .LBB3_15: # %cond.store16
-; CHECK-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
-; CHECK-NEXT: vslidedown.vi v9, v8, 6
-; CHECK-NEXT: vse8.v v9, (a0)
-; CHECK-NEXT: addi a0, a0, 1
-; CHECK-NEXT: andi a1, a1, -128
-; CHECK-NEXT: beqz a1, .LBB3_8
-; CHECK-NEXT: .LBB3_16: # %cond.store19
-; CHECK-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
-; CHECK-NEXT: vslidedown.vi v8, v8, 7
-; CHECK-NEXT: vse8.v v8, (a0)
; CHECK-NEXT: ret
call void @llvm.masked.compressstore.v8i8(<8 x i8> %v, ptr %base, <8 x i1> %mask)
ret void
@@ -186,13 +62,11 @@ declare void @llvm.masked.compressstore.v1i16(<1 x i16>, ptr, <1 x i1>)
define void @compressstore_v1i16(ptr %base, <1 x i16> %v, <1 x i1> %mask) {
; CHECK-LABEL: compressstore_v1i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
-; CHECK-NEXT: vfirst.m a1, v0
-; CHECK-NEXT: bnez a1, .LBB4_2
-; CHECK-NEXT: # %bb.1: # %cond.store
; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; CHECK-NEXT: vse16.v v8, (a0)
-; CHECK-NEXT: .LBB4_2: # %else
+; CHECK-NEXT: vcompress.vm v9, v8, v0
+; CHECK-NEXT: vcpop.m a1, v0
+; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-NEXT: vse16.v v9, (a0)
; CHECK-NEXT: ret
call void @llvm.masked.compressstore.v1i16(<1 x i16> %v, ptr align 2 %base, <1 x i1> %mask)
ret void
@@ -202,25 +76,11 @@ declare void @llvm.masked.compressstore.v2i16(<2 x i16>, ptr, <2 x i1>)
define void @compressstore_v2i16(ptr %base, <2 x i16> %v, <2 x i1> %mask) {
; CHECK-LABEL: compressstore_v2i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-NEXT: vmv.x.s a1, v0
-; CHECK-NEXT: andi a2, a1, 1
-; CHECK-NEXT: bnez a2, .LBB5_3
-; CHECK-NEXT: # %bb.1: # %else
-; CHECK-NEXT: andi a1, a1, 2
-; CHECK-NEXT: bnez a1, .LBB5_4
-; CHECK-NEXT: .LBB5_2: # %else2
-; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB5_3: # %cond.store
-; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; CHECK-NEXT: vse16.v v8, (a0)
-; CHECK-NEXT: addi a0, a0, 2
-; CHECK-NEXT: andi a1, a1, 2
-; CHECK-NEXT: beqz a1, .LBB5_2
-; CHECK-NEXT: .LBB5_4: # %cond.store1
-; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; CHECK-NEXT: vslidedown.vi v8, v8, 1
-; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; CHECK-NEXT: vcompress.vm v9, v8, v0
+; CHECK-NEXT: vcpop.m a1, v0
+; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-NEXT: vse16.v v9, (a0)
; CHECK-NEXT: ret
call void @llvm.masked.compressstore.v2i16(<2 x i16> %v, ptr align 2 %base, <2 x i1> %mask)
ret void
@@ -230,45 +90,11 @@ declare void @llvm.masked.compressstore.v4i16(<4 x i16>, ptr, <4 x i1>)
define void @compressstore_v4i16(ptr %base, <4 x i16> %v, <4 x i1> %mask) {
; CHECK-LABEL: compressstore_v4i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-NEXT: vmv.x.s a1, v0
-; CHECK-NEXT: andi a2, a1, 1
-; CHECK-NEXT: bnez a2, .LBB6_5
-; CHECK-NEXT: # %bb.1: # %else
-; CHECK-NEXT: andi a2, a1, 2
-; CHECK-NEXT: bnez a2, .LBB6_6
-; CHECK-NEXT: .LBB6_2: # %else2
-; CHECK-NEXT: andi a2, a1, 4
-; CHECK-NEXT: bnez a2, .LBB6_7
-; CHECK-NEXT: .LBB6_3: # %else5
-; CHECK-NEXT: andi a1, a1, 8
-; CHECK-NEXT: bnez a1, .LBB6_8
-; CHECK-NEXT: .LBB6_4: # %else8
-; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB6_5: # %cond.store
-; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; CHECK-NEXT: vse16.v v8, (a0)
-; CHECK-NEXT: addi a0, a0, 2
-; CHECK-NEXT: andi a2, a1, 2
-; CHECK-NEXT: beqz a2, .LBB6_2
-; CHECK-NEXT: .LBB6_6: # %cond.store1
-; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; CHECK-NEXT: vslidedown.vi v9, v8, 1
-; CHECK-NEXT: vse16.v v9, (a0)
-; CHECK-NEXT: addi a0, a0, 2
-; CHECK-NEXT: andi a2, a1, 4
-; CHECK-NEXT: beqz a2, .LBB6_3
-; CHECK-NEXT: .LBB6_7: # %cond.store4
-; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; CHECK-NEXT: vslidedown.vi v9, v8, 2
+; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; CHECK-NEXT: vcompress.vm v9, v8, v0
+; CHECK-NEXT: vcpop.m a1, v0
+; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
; CHECK-NEXT: vse16.v v9, (a0)
-; CHECK-NEXT: addi a0, a0, 2
-; CHECK-NEXT: andi a1, a1, 8
-; CHECK-NEXT: beqz a1, .LBB6_4
-; CHECK-NEXT: .LBB6_8: # %cond.store7
-; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; CHECK-NEXT: vslidedown.vi v8, v8, 3
-; CHECK-NEXT: vse16.v v8, (a0)
; CHECK-NEXT: ret
call void @llvm.masked.compressstore.v4i16(<4 x i16> %v, ptr align 2 %base, <4 x i1> %mask)
ret void
@@ -278,85 +104,11 @@ declare void @llvm.masked.compressstore.v8i16(<8 x i16>, ptr, <8 x i1>)
define void @compressstore_v8i16(ptr %base, <8 x i16> %v, <8 x i1> %mask) {
; CHECK-LABEL: compressstore_v8i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-NEXT: vmv.x.s a1, v0
-; CHECK-NEXT: andi a2, a1, 1
-; CHECK-NEXT: bnez a2, .LBB7_9
-; CHECK-NEXT: # %bb.1: # %else
-; CHECK-NEXT: andi a2, a1, 2
-; CHECK-NEXT: bnez a2, .LBB7_10
-; CHECK-NEXT: .LBB7_2: # %else2
-; CHECK-NEXT: andi a2, a1, 4
-; CHECK-NEXT: bnez a2, .LBB7_11
-; CHECK-NEXT: .LBB7_3: # %else5
-; CHECK-NEXT: andi a2, a1, 8
-; CHECK-NEXT: bnez a2, .LBB7_12
-; CHECK-NEXT: .LBB7_4: # %else8
-; CHECK-NEXT: andi a2, a1, 16
-; CHECK-NEXT: bnez a2, .LBB7_13
-; CHECK-NEXT: .LBB7_5: # %else11
-; CHECK-NEXT: andi a2, a1, 32
-; CHECK-NEXT: bnez a2, .LBB7_14
-; CHECK-NEXT: .LBB7_6: # %else14
-; CHECK-NEXT: andi a2, a1, 64
-; CHECK-NEXT: bnez a2, .LBB7_15
-; CHECK-NEXT: .LBB7_7: # %else17
-; CHECK-NEXT: andi a1, a1, -128
-; CHECK-NEXT: bnez a1, .LBB7_16
-; CHECK-NEXT: .LBB7_8: # %else20
-; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB7_9: # %cond.store
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; CHECK-NEXT: vse16.v v8, (a0)
-; CHECK-NEXT: addi a0, a0, 2
-; CHECK-NEXT: andi a2, a1, 2
-; CHECK-NEXT: beqz a2, .LBB7_2
-; CHECK-NEXT: .LBB7_10: # %cond.store1
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; CHECK-NEXT: vslidedown.vi v9, v8, 1
-; CHECK-NEXT: vse16.v v9, (a0)
-; CHECK-NEXT: addi a0, a0, 2
-; CHECK-NEXT: andi a2, a1, 4
-; CHECK-NEXT: beqz a2, .LBB7_3
-; CHECK-NEXT: .LBB7_11: # %cond.store4
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; CHECK-NEXT: vslidedown.vi v9, v8, 2
-; CHECK-NEXT: vse16.v v9, (a0)
-; CHECK-NEXT: addi a0, a0, 2
-; CHECK-NEXT: andi a2, a1, 8
-; CHECK-NEXT: beqz a2, .LBB7_4
-; CHECK-NEXT: .LBB7_12: # %cond.store7
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; CHECK-NEXT: vslidedown.vi v9, v8, 3
-; CHECK-NEXT: vse16.v v9, (a0)
-; CHECK-NEXT: addi a0, a0, 2
-; CHECK-NEXT: andi a2, a1, 16
-; CHECK-NEXT: beqz a2, .LBB7_5
-; CHECK-NEXT: .LBB7_13: # %cond.store10
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; CHECK-NEXT: vslidedown.vi v9, v8, 4
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vcompress.vm v9, v8, v0
+; CHECK-NEXT: vcpop.m a1, v0
+; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
; CHECK-NEXT: vse16.v v9, (a0)
-; CHECK-NEXT: addi a0, a0, 2
-; CHECK-NEXT: andi a2, a1, 32
-; CHECK-NEXT: beqz a2, .LBB7_6
-; CHECK-NEXT: .LBB7_14: # %cond.store13
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; CHECK-NEXT: vslidedown.vi v9, v8, 5
-; CHECK-NEXT: vse16.v v9, (a0)
-; CHECK-NEXT: addi a0, a0, 2
-; CHECK-NEXT: andi a2, a1, 64
-; CHECK-NEXT: beqz a2, .LBB7_7
-; CHECK-NEXT: .LBB7_15: # %cond.store16
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; CHECK-NEXT: vslidedown.vi v9, v8, 6
-; CHECK-NEXT: vse16.v v9, (a0)
-; CHECK-NEXT: addi a0, a0, 2
-; CHECK-NEXT: andi a1, a1, -128
-; CHECK-NEXT: beqz a1, .LBB7_8
-; CHECK-NEXT: .LBB7_16: # %cond.store19
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; CHECK-NEXT: vslidedown.vi v8, v8, 7
-; CHECK-NEXT: vse16.v v8, (a0)
; CHECK-NEXT: ret
call void @llvm.masked.compressstore.v8i16(<8 x i16> %v, ptr align 2 %base, <8 x i1> %mask)
ret void
@@ -366,13 +118,11 @@ declare void @llvm.masked.compressstore.v1i32(<1 x i32>, ptr, <1 x i1>)
define void @compressstore_v1i32(ptr %base, <1 x i32> %v, <1 x i1> %mask) {
; CHECK-LABEL: compressstore_v1i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
-; CHECK-NEXT: vfirst.m a1, v0
-; CHECK-NEXT: bnez a1, .LBB8_2
-; CHECK-NEXT: # %bb.1: # %cond.store
; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT: vse32.v v8, (a0)
-; CHECK-NEXT: .LBB8_2: # %else
+; CHECK-NEXT: vcompress.vm v9, v8, v0
+; CHECK-NEXT: vcpop.m a1, v0
+; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-NEXT: vse32.v v9, (a0)
; CHECK-NEXT: ret
call void @llvm.masked.compressstore.v1i32(<1 x i32> %v, ptr align 4 %base, <1 x i1> %mask)
ret void
@@ -382,25 +132,11 @@ declare void @llvm.masked.compressstore.v2i32(<2 x i32>, ptr, <2 x i1>)
define void @compressstore_v2i32(ptr %base, <2 x i32> %v, <2 x i1> %mask) {
; CHECK-LABEL: compressstore_v2i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-NEXT: vmv.x.s a1, v0
-; CHECK-NEXT: andi a2, a1, 1
-; CHECK-NEXT: bnez a2, .LBB9_3
-; CHECK-NEXT: # %bb.1: # %else
-; CHECK-NEXT: andi a1, a1, 2
-; CHECK-NEXT: bnez a1, .LBB9_4
-; CHECK-NEXT: .LBB9_2: # %else2
-; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB9_3: # %cond.store
-; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT: vse32.v v8, (a0)
-; CHECK-NEXT: addi a0, a0, 4
-; CHECK-NEXT: andi a1, a1, 2
-; CHECK-NEXT: beqz a1, .LBB9_2
-; CHECK-NEXT: .LBB9_4: # %cond.store1
-; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT: vslidedown.vi v8, v8, 1
-; CHECK-NEXT: vse32.v v8, (a0)
+; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; CHECK-NEXT: vcompress.vm v9, v8, v0
+; CHECK-NEXT: vcpop.m a1, v0
+; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-NEXT: vse32.v v9, (a0)
; CHECK-NEXT: ret
call void @llvm.masked.compressstore.v2i32(<2 x i32> %v, ptr align 4 %base, <2 x i1> %mask)
ret void
@@ -410,45 +146,11 @@ declare void @llvm.masked.compressstore.v4i32(<4 x i32>, ptr, <4 x i1>)
define void @compressstore_v4i32(ptr %base, <4 x i32> %v, <4 x i1> %mask) {
; CHECK-LABEL: compressstore_v4i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-NEXT: vmv.x.s a1, v0
-; CHECK-NEXT: andi a2, a1, 1
-; CHECK-NEXT: bnez a2, .LBB10_5
-; CHECK-NEXT: # %bb.1: # %else
-; CHECK-NEXT: andi a2, a1, 2
-; CHECK-NEXT: bnez a2, .LBB10_6
-; CHECK-NEXT: .LBB10_2: # %else2
-; CHECK-NEXT: andi a2, a1, 4
-; CHECK-NEXT: bnez a2, .LBB10_7
-; CHECK-NEXT: .LBB10_3: # %else5
-; CHECK-NEXT: andi a1, a1, 8
-; CHECK-NEXT: bnez a1, .LBB10_8
-; CHECK-NEXT: .LBB10_4: # %else8
-; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB10_5: # %cond.store
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vse32.v v8, (a0)
-; CHECK-NEXT: addi a0, a0, 4
-; CHECK-NEXT: andi a2, a1, 2
-; CHECK-NEXT: beqz a2, .LBB10_2
-; CHECK-NEXT: .LBB10_6: # %cond.store1
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vslidedown.vi v9, v8, 1
-; CHECK-NEXT: vse32.v v9, (a0)
-; CHECK-NEXT: addi a0, a0, 4
-; CHECK-NEXT: andi a2, a1, 4
-; CHECK-NEXT: beqz a2, .LBB10_3
-; CHECK-NEXT: .LBB10_7: # %cond.store4
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vslidedown.vi v9, v8, 2
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vcompress.vm v9, v8, v0
+; CHECK-NEXT: vcpop.m a1, v0
+; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
; CHECK-NEXT: vse32.v v9, (a0)
-; CHECK-NEXT: addi a0, a0, 4
-; CHECK-NEXT: andi a1, a1, 8
-; CHECK-NEXT: beqz a1, .LBB10_4
-; CHECK-NEXT: .LBB10_8: # %cond.store7
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vslidedown.vi v8, v8, 3
-; CHECK-NEXT: vse32.v v8, (a0)
; CHECK-NEXT: ret
call void @llvm.masked.compressstore.v4i32(<4 x i32> %v, ptr align 4 %base, <4 x i1> %mask)
ret void
@@ -458,89 +160,11 @@ declare void @llvm.masked.compressstore.v8i32(<8 x i32>, ptr, <8 x i1>)
define void @compressstore_v8i32(ptr %base, <8 x i32> %v, <8 x i1> %mask) {
; CHECK-LABEL: compressstore_v8i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-NEXT: vmv.x.s a1, v0
-; CHECK-NEXT: andi a2, a1, 1
-; CHECK-NEXT: bnez a2, .LBB11_9
-; CHECK-NEXT: # %bb.1: # %else
-; CHECK-NEXT: andi a2, a1, 2
-; CHECK-NEXT: bnez a2, .LBB11_10
-; CHECK-NEXT: .LBB11_2: # %else2
-; CHECK-NEXT: andi a2, a1, 4
-; CHECK-NEXT: bnez a2, .LBB11_11
-; CHECK-NEXT: .LBB11_3: # %else5
-; CHECK-NEXT: andi a2, a1, 8
-; CHECK-NEXT: bnez a2, .LBB11_12
-; CHECK-NEXT: .LBB11_4: # %else8
-; CHECK-NEXT: andi a2, a1, 16
-; CHECK-NEXT: bnez a2, .LBB11_13
-; CHECK-NEXT: .LBB11_5: # %else11
-; CHECK-NEXT: andi a2, a1, 32
-; CHECK-NEXT: bnez a2, .LBB11_14
-; CHECK-NEXT: .LBB11_6: # %else14
-; CHECK-NEXT: andi a2, a1, 64
-; CHECK-NEXT: bnez a2, .LBB11_15
-; CHECK-NEXT: .LBB11_7: # %else17
-; CHECK-NEXT: andi a1, a1, -128
-; CHECK-NEXT: bnez a1, .LBB11_16
-; CHECK-NEXT: .LBB11_8: # %else20
-; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB11_9: # %cond.store
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vse32.v v8, (a0)
-; CHECK-NEXT: addi a0, a0, 4
-; CHECK-NEXT: andi a2, a1, 2
-; CHECK-NEXT: beqz a2, .LBB11_2
-; CHECK-NEXT: .LBB11_10: # %cond.store1
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vslidedown.vi v10, v8, 1
-; CHECK-NEXT: vse32.v v10, (a0)
-; CHECK-NEXT: addi a0, a0, 4
-; CHECK-NEXT: andi a2, a1, 4
-; CHECK-NEXT: beqz a2, .LBB11_3
-; CHECK-NEXT: .LBB11_11: # %cond.store4
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vslidedown.vi v10, v8, 2
-; CHECK-NEXT: vse32.v v10, (a0)
-; CHECK-NEXT: addi a0, a0, 4
-; CHECK-NEXT: andi a2, a1, 8
-; CHECK-NEXT: beqz a2, .LBB11_4
-; CHECK-NEXT: .LBB11_12: # %cond.store7
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vslidedown.vi v10, v8, 3
+; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT: vcompress.vm v10, v8, v0
+; CHECK-NEXT: vcpop.m a1, v0
+; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
; CHECK-NEXT: vse32.v v10, (a0)
-; CHECK-NEXT: addi a0, a0, 4
-; CHECK-NEXT: andi a2, a1, 16
-; CHECK-NEXT: beqz a2, .LBB11_5
-; CHECK-NEXT: .LBB11_13: # %cond.store10
-; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; CHECK-NEXT: vslidedown.vi v10, v8, 4
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vse32.v v10, (a0)
-; CHECK-NEXT: addi a0, a0, 4
-; CHECK-NEXT: andi a2, a1, 32
-; CHECK-NEXT: beqz a2, .LBB11_6
-; CHECK-NEXT: .LBB11_14: # %cond.store13
-; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; CHECK-NEXT: vslidedown.vi v10, v8, 5
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vse32.v v10, (a0)
-; CHECK-NEXT: addi a0, a0, 4
-; CHECK-NEXT: andi a2, a1, 64
-; CHECK-NEXT: beqz a2, .LBB11_7
-; CHECK-NEXT: .LBB11_15: # %cond.store16
-; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; CHECK-NEXT: vslidedown.vi v10, v8, 6
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vse32.v v10, (a0)
-; CHECK-NEXT: addi a0, a0, 4
-; CHECK-NEXT: andi a1, a1, -128
-; CHECK-NEXT: beqz a1, .LBB11_8
-; CHECK-NEXT: .LBB11_16: # %cond.store19
-; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; CHECK-NEXT: vslidedown.vi v8, v8, 7
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vse32.v v8, (a0)
; CHECK-NEXT: ret
call void @llvm.masked.compressstore.v8i32(<8 x i32> %v, ptr align 4 %base, <8 x i1> %mask)
ret void
@@ -548,439 +172,59 @@ define void @compressstore_v8i32(ptr %base, <8 x i32> %v, <8 x i1> %mask) {
declare void @llvm.masked.compressstore.v1i64(<1 x i64>, ptr, <1 x i1>)
define void @compressstore_v1i64(ptr %base, <1 x i64> %v, <1 x i1> %mask) {
-; RV32-LABEL: compressstore_v1i64:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
-; RV32-NEXT: vfirst.m a1, v0
-; RV32-NEXT: bnez a1, .LBB12_2
-; RV32-NEXT: # %bb.1: # %cond.store
-; RV32-NEXT: li a1, 32
-; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV32-NEXT: vsrl.vx v9, v8, a1
-; RV32-NEXT: vmv.x.s a1, v9
-; RV32-NEXT: vmv.x.s a2, v8
-; RV32-NEXT: sw a2, 0(a0)
-; RV32-NEXT: sw a1, 4(a0)
-; RV32-NEXT: .LBB12_2: # %else
-; RV32-NEXT: ret
-;
-; RV64-LABEL: compressstore_v1i64:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
-; RV64-NEXT: vfirst.m a1, v0
-; RV64-NEXT: bnez a1, .LBB12_2
-; RV64-NEXT: # %bb.1: # %cond.store
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vse64.v v8, (a0)
-; RV64-NEXT: .LBB12_2: # %else
-; RV64-NEXT: ret
+; CHECK-LABEL: compressstore_v1i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-NEXT: vcompress.vm v9, v8, v0
+; CHECK-NEXT: vcpop.m a1, v0
+; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-NEXT: vse64.v v9, (a0)
+; CHECK-NEXT: ret
call void @llvm.masked.compressstore.v1i64(<1 x i64> %v, ptr align 8 %base, <1 x i1> %mask)
ret void
}
declare void @llvm.masked.compressstore.v2i64(<2 x i64>, ptr, <2 x i1>)
define void @compressstore_v2i64(ptr %base, <2 x i64> %v, <2 x i1> %mask) {
-; RV32-LABEL: compressstore_v2i64:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV32-NEXT: vmv.x.s a1, v0
-; RV32-NEXT: andi a2, a1, 1
-; RV32-NEXT: bnez a2, .LBB13_3
-; RV32-NEXT: # %bb.1: # %else
-; RV32-NEXT: andi a1, a1, 2
-; RV32-NEXT: bnez a1, .LBB13_4
-; RV32-NEXT: .LBB13_2: # %else2
-; RV32-NEXT: ret
-; RV32-NEXT: .LBB13_3: # %cond.store
-; RV32-NEXT: li a2, 32
-; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV32-NEXT: vsrl.vx v9, v8, a2
-; RV32-NEXT: vmv.x.s a2, v9
-; RV32-NEXT: vmv.x.s a3, v8
-; RV32-NEXT: sw a3, 0(a0)
-; RV32-NEXT: sw a2, 4(a0)
-; RV32-NEXT: addi a0, a0, 8
-; RV32-NEXT: andi a1, a1, 2
-; RV32-NEXT: beqz a1, .LBB13_2
-; RV32-NEXT: .LBB13_4: # %cond.store1
-; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v8, 1
-; RV32-NEXT: li a1, 32
-; RV32-NEXT: vsrl.vx v9, v8, a1
-; RV32-NEXT: vmv.x.s a1, v9
-; RV32-NEXT: vmv.x.s a2, v8
-; RV32-NEXT: sw a2, 0(a0)
-; RV32-NEXT: sw a1, 4(a0)
-; RV32-NEXT: ret
-;
-; RV64-LABEL: compressstore_v2i64:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64-NEXT: vmv.x.s a1, v0
-; RV64-NEXT: andi a2, a1, 1
-; RV64-NEXT: bnez a2, .LBB13_3
-; RV64-NEXT: # %bb.1: # %else
-; RV64-NEXT: andi a1, a1, 2
-; RV64-NEXT: bnez a1, .LBB13_4
-; RV64-NEXT: .LBB13_2: # %else2
-; RV64-NEXT: ret
-; RV64-NEXT: .LBB13_3: # %cond.store
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vse64.v v8, (a0)
-; RV64-NEXT: addi a0, a0, 8
-; RV64-NEXT: andi a1, a1, 2
-; RV64-NEXT: beqz a1, .LBB13_2
-; RV64-NEXT: .LBB13_4: # %cond.store1
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vslidedown.vi v8, v8, 1
-; RV64-NEXT: vse64.v v8, (a0)
-; RV64-NEXT: ret
+; CHECK-LABEL: compressstore_v2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; CHECK-NEXT: vcompress.vm v9, v8, v0
+; CHECK-NEXT: vcpop.m a1, v0
+; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-NEXT: vse64.v v9, (a0)
+; CHECK-NEXT: ret
call void @llvm.masked.compressstore.v2i64(<2 x i64> %v, ptr align 8 %base, <2 x i1> %mask)
ret void
}
declare void @llvm.masked.compressstore.v4i64(<4 x i64>, ptr, <4 x i1>)
define void @compressstore_v4i64(ptr %base, <4 x i64> %v, <4 x i1> %mask) {
-; RV32-LABEL: compressstore_v4i64:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV32-NEXT: vmv.x.s a1, v0
-; RV32-NEXT: andi a2, a1, 1
-; RV32-NEXT: bnez a2, .LBB14_5
-; RV32-NEXT: # %bb.1: # %else
-; RV32-NEXT: andi a2, a1, 2
-; RV32-NEXT: bnez a2, .LBB14_6
-; RV32-NEXT: .LBB14_2: # %else2
-; RV32-NEXT: andi a2, a1, 4
-; RV32-NEXT: bnez a2, .LBB14_7
-; RV32-NEXT: .LBB14_3: # %else5
-; RV32-NEXT: andi a1, a1, 8
-; RV32-NEXT: bnez a1, .LBB14_8
-; RV32-NEXT: .LBB14_4: # %else8
-; RV32-NEXT: ret
-; RV32-NEXT: .LBB14_5: # %cond.store
-; RV32-NEXT: li a2, 32
-; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV32-NEXT: vsrl.vx v10, v8, a2
-; RV32-NEXT: vmv.x.s a2, v10
-; RV32-NEXT: vmv.x.s a3, v8
-; RV32-NEXT: sw a3, 0(a0)
-; RV32-NEXT: sw a2, 4(a0)
-; RV32-NEXT: addi a0, a0, 8
-; RV32-NEXT: andi a2, a1, 2
-; RV32-NEXT: beqz a2, .LBB14_2
-; RV32-NEXT: .LBB14_6: # %cond.store1
-; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV32-NEXT: vslidedown.vi v10, v8, 1
-; RV32-NEXT: li a2, 32
-; RV32-NEXT: vsrl.vx v12, v10, a2
-; RV32-NEXT: vmv.x.s a2, v12
-; RV32-NEXT: vmv.x.s a3, v10
-; RV32-NEXT: sw a3, 0(a0)
-; RV32-NEXT: sw a2, 4(a0)
-; RV32-NEXT: addi a0, a0, 8
-; RV32-NEXT: andi a2, a1, 4
-; RV32-NEXT: beqz a2, .LBB14_3
-; RV32-NEXT: .LBB14_7: # %cond.store4
-; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV32-NEXT: vslidedown.vi v10, v8, 2
-; RV32-NEXT: li a2, 32
-; RV32-NEXT: vsrl.vx v12, v10, a2
-; RV32-NEXT: vmv.x.s a2, v12
-; RV32-NEXT: vmv.x.s a3, v10
-; RV32-NEXT: sw a3, 0(a0)
-; RV32-NEXT: sw a2, 4(a0)
-; RV32-NEXT: addi a0, a0, 8
-; RV32-NEXT: andi a1, a1, 8
-; RV32-NEXT: beqz a1, .LBB14_4
-; RV32-NEXT: .LBB14_8: # %cond.store7
-; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v8, 3
-; RV32-NEXT: li a1, 32
-; RV32-NEXT: vsrl.vx v10, v8, a1
-; RV32-NEXT: vmv.x.s a1, v10
-; RV32-NEXT: vmv.x.s a2, v8
-; RV32-NEXT: sw a2, 0(a0)
-; RV32-NEXT: sw a1, 4(a0)
-; RV32-NEXT: ret
-;
-; RV64-LABEL: compressstore_v4i64:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64-NEXT: vmv.x.s a1, v0
-; RV64-NEXT: andi a2, a1, 1
-; RV64-NEXT: bnez a2, .LBB14_5
-; RV64-NEXT: # %bb.1: # %else
-; RV64-NEXT: andi a2, a1, 2
-; RV64-NEXT: bnez a2, .LBB14_6
-; RV64-NEXT: .LBB14_2: # %else2
-; RV64-NEXT: andi a2, a1, 4
-; RV64-NEXT: bnez a2, .LBB14_7
-; RV64-NEXT: .LBB14_3: # %else5
-; RV64-NEXT: andi a1, a1, 8
-; RV64-NEXT: bnez a1, .LBB14_8
-; RV64-NEXT: .LBB14_4: # %else8
-; RV64-NEXT: ret
-; RV64-NEXT: .LBB14_5: # %cond.store
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vse64.v v8, (a0)
-; RV64-NEXT: addi a0, a0, 8
-; RV64-NEXT: andi a2, a1, 2
-; RV64-NEXT: beqz a2, .LBB14_2
-; RV64-NEXT: .LBB14_6: # %cond.store1
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vslidedown.vi v10, v8, 1
-; RV64-NEXT: vse64.v v10, (a0)
-; RV64-NEXT: addi a0, a0, 8
-; RV64-NEXT: andi a2, a1, 4
-; RV64-NEXT: beqz a2, .LBB14_3
-; RV64-NEXT: .LBB14_7: # %cond.store4
-; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV64-NEXT: vslidedown.vi v10, v8, 2
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vse64.v v10, (a0)
-; RV64-NEXT: addi a0, a0, 8
-; RV64-NEXT: andi a1, a1, 8
-; RV64-NEXT: beqz a1, .LBB14_4
-; RV64-NEXT: .LBB14_8: # %cond.store7
-; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV64-NEXT: vslidedown.vi v8, v8, 3
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vse64.v v8, (a0)
-; RV64-NEXT: ret
+; CHECK-LABEL: compressstore_v4i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; CHECK-NEXT: vcompress.vm v10, v8, v0
+; CHECK-NEXT: vcpop.m a1, v0
+; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-NEXT: vse64.v v10, (a0)
+; CHECK-NEXT: ret
call void @llvm.masked.compressstore.v4i64(<4 x i64> %v, ptr align 8 %base, <4 x i1> %mask)
ret void
}
declare void @llvm.masked.compressstore.v8i64(<8 x i64>, ptr, <8 x i1>)
define void @compressstore_v8i64(ptr %base, <8 x i64> %v, <8 x i1> %mask) {
-; RV32-LABEL: compressstore_v8i64:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV32-NEXT: vmv.x.s a1, v0
-; RV32-NEXT: andi a2, a1, 1
-; RV32-NEXT: bnez a2, .LBB15_9
-; RV32-NEXT: # %bb.1: # %else
-; RV32-NEXT: andi a2, a1, 2
-; RV32-NEXT: bnez a2, .LBB15_10
-; RV32-NEXT: .LBB15_2: # %else2
-; RV32-NEXT: andi a2, a1, 4
-; RV32-NEXT: bnez a2, .LBB15_11
-; RV32-NEXT: .LBB15_3: # %else5
-; RV32-NEXT: andi a2, a1, 8
-; RV32-NEXT: bnez a2, .LBB15_12
-; RV32-NEXT: .LBB15_4: # %else8
-; RV32-NEXT: andi a2, a1, 16
-; RV32-NEXT: bnez a2, .LBB15_13
-; RV32-NEXT: .LBB15_5: # %else11
-; RV32-NEXT: andi a2, a1, 32
-; RV32-NEXT: bnez a2, .LBB15_14
-; RV32-NEXT: .LBB15_6: # %else14
-; RV32-NEXT: andi a2, a1, 64
-; RV32-NEXT: bnez a2, .LBB15_15
-; RV32-NEXT: .LBB15_7: # %else17
-; RV32-NEXT: andi a1, a1, -128
-; RV32-NEXT: bnez a1, .LBB15_16
-; RV32-NEXT: .LBB15_8: # %else20
-; RV32-NEXT: ret
-; RV32-NEXT: .LBB15_9: # %cond.store
-; RV32-NEXT: li a2, 32
-; RV32-NEXT: vsetivli zero, 1, e64, m4, ta, ma
-; RV32-NEXT: vsrl.vx v12, v8, a2
-; RV32-NEXT: vmv.x.s a2, v12
-; RV32-NEXT: vmv.x.s a3, v8
-; RV32-NEXT: sw a3, 0(a0)
-; RV32-NEXT: sw a2, 4(a0)
-; RV32-NEXT: addi a0, a0, 8
-; RV32-NEXT: andi a2, a1, 2
-; RV32-NEXT: beqz a2, .LBB15_2
-; RV32-NEXT: .LBB15_10: # %cond.store1
-; RV32-NEXT: vsetivli zero, 1, e64, m4, ta, ma
-; RV32-NEXT: vslidedown.vi v12, v8, 1
-; RV32-NEXT: li a2, 32
-; RV32-NEXT: vsrl.vx v16, v12, a2
-; RV32-NEXT: vmv.x.s a2, v16
-; RV32-NEXT: vmv.x.s a3, v12
-; RV32-NEXT: sw a3, 0(a0)
-; RV32-NEXT: sw a2, 4(a0)
-; RV32-NEXT: addi a0, a0, 8
-; RV32-NEXT: andi a2, a1, 4
-; RV32-NEXT: beqz a2, .LBB15_3
-; RV32-NEXT: .LBB15_11: # %cond.store4
-; RV32-NEXT: vsetivli zero, 1, e64, m4, ta, ma
-; RV32-NEXT: vslidedown.vi v12, v8, 2
-; RV32-NEXT: li a2, 32
-; RV32-NEXT: vsrl.vx v16, v12, a2
-; RV32-NEXT: vmv.x.s a2, v16
-; RV32-NEXT: vmv.x.s a3, v12
-; RV32-NEXT: sw a3, 0(a0)
-; RV32-NEXT: sw a2, 4(a0)
-; RV32-NEXT: addi a0, a0, 8
-; RV32-NEXT: andi a2, a1, 8
-; RV32-NEXT: beqz a2, .LBB15_4
-; RV32-NEXT: .LBB15_12: # %cond.store7
-; RV32-NEXT: vsetivli zero, 1, e64, m4, ta, ma
-; RV32-NEXT: vslidedown.vi v12, v8, 3
-; RV32-NEXT: li a2, 32
-; RV32-NEXT: vsrl.vx v16, v12, a2
-; RV32-NEXT: vmv.x.s a2, v16
-; RV32-NEXT: vmv.x.s a3, v12
-; RV32-NEXT: sw a3, 0(a0)
-; RV32-NEXT: sw a2, 4(a0)
-; RV32-NEXT: addi a0, a0, 8
-; RV32-NEXT: andi a2, a1, 16
-; RV32-NEXT: beqz a2, .LBB15_5
-; RV32-NEXT: .LBB15_13: # %cond.store10
-; RV32-NEXT: vsetivli zero, 1, e64, m4, ta, ma
-; RV32-NEXT: vslidedown.vi v12, v8, 4
-; RV32-NEXT: li a2, 32
-; RV32-NEXT: vsrl.vx v16, v12, a2
-; RV32-NEXT: vmv.x.s a2, v16
-; RV32-NEXT: vmv.x.s a3, v12
-; RV32-NEXT: sw a3, 0(a0)
-; RV32-NEXT: sw a2, 4(a0)
-; RV32-NEXT: addi a0, a0, 8
-; RV32-NEXT: andi a2, a1, 32
-; RV32-NEXT: beqz a2, .LBB15_6
-; RV32-NEXT: .LBB15_14: # %cond.store13
-; RV32-NEXT: vsetivli zero, 1, e64, m4, ta, ma
-; RV32-NEXT: vslidedown.vi v12, v8, 5
-; RV32-NEXT: li a2, 32
-; RV32-NEXT: vsrl.vx v16, v12, a2
-; RV32-NEXT: vmv.x.s a2, v16
-; RV32-NEXT: vmv.x.s a3, v12
-; RV32-NEXT: sw a3, 0(a0)
-; RV32-NEXT: sw a2, 4(a0)
-; RV32-NEXT: addi a0, a0, 8
-; RV32-NEXT: andi a2, a1, 64
-; RV32-NEXT: beqz a2, .LBB15_7
-; RV32-NEXT: .LBB15_15: # %cond.store16
-; RV32-NEXT: vsetivli zero, 1, e64, m4, ta, ma
-; RV32-NEXT: vslidedown.vi v12, v8, 6
-; RV32-NEXT: li a2, 32
-; RV32-NEXT: vsrl.vx v16, v12, a2
-; RV32-NEXT: vmv.x.s a2, v16
-; RV32-NEXT: vmv.x.s a3, v12
-; RV32-NEXT: sw a3, 0(a0)
-; RV32-NEXT: sw a2, 4(a0)
-; RV32-NEXT: addi a0, a0, 8
-; RV32-NEXT: andi a1, a1, -128
-; RV32-NEXT: beqz a1, .LBB15_8
-; RV32-NEXT: .LBB15_16: # %cond.store19
-; RV32-NEXT: vsetivli zero, 1, e64, m4, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v8, 7
-; RV32-NEXT: li a1, 32
-; RV32-NEXT: vsrl.vx v12, v8, a1
-; RV32-NEXT: vmv.x.s a1, v12
-; RV32-NEXT: vmv.x.s a2, v8
-; RV32-NEXT: sw a2, 0(a0)
-; RV32-NEXT: sw a1, 4(a0)
-; RV32-NEXT: ret
-;
-; RV64-LABEL: compressstore_v8i64:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64-NEXT: vmv.x.s a1, v0
-; RV64-NEXT: andi a2, a1, 1
-; RV64-NEXT: bnez a2, .LBB15_11
-; RV64-NEXT: # %bb.1: # %else
-; RV64-NEXT: andi a2, a1, 2
-; RV64-NEXT: bnez a2, .LBB15_12
-; RV64-NEXT: .LBB15_2: # %else2
-; RV64-NEXT: andi a2, a1, 4
-; RV64-NEXT: bnez a2, .LBB15_13
-; RV64-NEXT: .LBB15_3: # %else5
-; RV64-NEXT: andi a2, a1, 8
-; RV64-NEXT: beqz a2, .LBB15_5
-; RV64-NEXT: .LBB15_4: # %cond.store7
-; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV64-NEXT: vslidedown.vi v12, v8, 3
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vse64.v v12, (a0)
-; RV64-NEXT: addi a0, a0, 8
-; RV64-NEXT: .LBB15_5: # %else8
-; RV64-NEXT: addi sp, sp, -320
-; RV64-NEXT: .cfi_def_cfa_offset 320
-; RV64-NEXT: sd ra, 312(sp) # 8-byte Folded Spill
-; RV64-NEXT: sd s0, 304(sp) # 8-byte Folded Spill
-; RV64-NEXT: .cfi_offset ra, -8
-; RV64-NEXT: .cfi_offset s0, -16
-; RV64-NEXT: addi s0, sp, 320
-; RV64-NEXT: .cfi_def_cfa s0, 0
-; RV64-NEXT: andi sp, sp, -64
-; RV64-NEXT: andi a2, a1, 16
-; RV64-NEXT: bnez a2, .LBB15_14
-; RV64-NEXT: # %bb.6: # %else11
-; RV64-NEXT: andi a2, a1, 32
-; RV64-NEXT: bnez a2, .LBB15_15
-; RV64-NEXT: .LBB15_7: # %else14
-; RV64-NEXT: andi a2, a1, 64
-; RV64-NEXT: bnez a2, .LBB15_16
-; RV64-NEXT: .LBB15_8: # %else17
-; RV64-NEXT: andi a1, a1, -128
-; RV64-NEXT: beqz a1, .LBB15_10
-; RV64-NEXT: .LBB15_9: # %cond.store19
-; RV64-NEXT: mv a1, sp
-; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64-NEXT: vse64.v v8, (a1)
-; RV64-NEXT: ld a1, 56(sp)
-; RV64-NEXT: sd a1, 0(a0)
-; RV64-NEXT: .LBB15_10: # %else20
-; RV64-NEXT: addi sp, s0, -320
-; RV64-NEXT: ld ra, 312(sp) # 8-byte Folded Reload
-; RV64-NEXT: ld s0, 304(sp) # 8-byte Folded Reload
-; RV64-NEXT: addi sp, sp, 320
-; RV64-NEXT: ret
-; RV64-NEXT: .LBB15_11: # %cond.store
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vse64.v v8, (a0)
-; RV64-NEXT: addi a0, a0, 8
-; RV64-NEXT: andi a2, a1, 2
-; RV64-NEXT: beqz a2, .LBB15_2
-; RV64-NEXT: .LBB15_12: # %cond.store1
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vslidedown.vi v12, v8, 1
-; RV64-NEXT: vse64.v v12, (a0)
-; RV64-NEXT: addi a0, a0, 8
-; RV64-NEXT: andi a2, a1, 4
-; RV64-NEXT: beqz a2, .LBB15_3
-; RV64-NEXT: .LBB15_13: # %cond.store4
-; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV64-NEXT: vslidedown.vi v12, v8, 2
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vse64.v v12, (a0)
-; RV64-NEXT: addi a0, a0, 8
-; RV64-NEXT: andi a2, a1, 8
-; RV64-NEXT: bnez a2, .LBB15_4
-; RV64-NEXT: j .LBB15_5
-; RV64-NEXT: .LBB15_14: # %cond.store10
-; RV64-NEXT: addi a2, sp, 192
-; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64-NEXT: vse64.v v8, (a2)
-; RV64-NEXT: ld a2, 224(sp)
-; RV64-NEXT: sd a2, 0(a0)
-; RV64-NEXT: addi a0, a0, 8
-; RV64-NEXT: andi a2, a1, 32
-; RV64-NEXT: beqz a2, .LBB15_7
-; RV64-NEXT: .LBB15_15: # %cond.store13
-; RV64-NEXT: addi a2, sp, 128
-; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64-NEXT: vse64.v v8, (a2)
-; RV64-NEXT: ld a2, 168(sp)
-; RV64-NEXT: sd a2, 0(a0)
-; RV64-NEXT: addi a0, a0, 8
-; RV64-NEXT: andi a2, a1, 64
-; RV64-NEXT: beqz a2, .LBB15_8
-; RV64-NEXT: .LBB15_16: # %cond.store16
-; RV64-NEXT: addi a2, sp, 64
-; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64-NEXT: vse64.v v8, (a2)
-; RV64-NEXT: ld a2, 112(sp)
-; RV64-NEXT: sd a2, 0(a0)
-; RV64-NEXT: addi a0, a0, 8
-; RV64-NEXT: andi a1, a1, -128
-; RV64-NEXT: bnez a1, .LBB15_9
-; RV64-NEXT: j .LBB15_10
+; CHECK-LABEL: compressstore_v8i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; CHECK-NEXT: vcompress.vm v12, v8, v0
+; CHECK-NEXT: vcpop.m a1, v0
+; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-NEXT: vse64.v v12, (a0)
+; CHECK-NEXT: ret
call void @llvm.masked.compressstore.v8i64(<8 x i64> %v, ptr align 8 %base, <8 x i1> %mask)
ret void
}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; RV32: {{.*}}
+; RV64: {{.*}}
>From f5fb36dbdfdd312431285add97bf00cc44b34ccb Mon Sep 17 00:00:00 2001
From: Kolya Panchenko <kolya.panchenko at sifive.com>
Date: Tue, 12 Mar 2024 12:32:16 -0700
Subject: [PATCH 5/5] Addressed comments
---
.../Target/RISCV/RISCVTargetTransformInfo.cpp | 4 +
llvm/test/CodeGen/RISCV/rvv/compressstore.ll | 180 +++++++++---------
2 files changed, 94 insertions(+), 90 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index c28f22ff6a6146..8c7691ae0c99ee 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -1630,5 +1630,9 @@ bool RISCVTTIImpl::isLegalMaskedCompressStore(Type *DataTy, Align Alignment) {
TLI->getValueType(DL, VTy->getElementType())))
return false;
+ if (!isLegalMaskedLoadStore(DataTy, Alignment))
+ return false;
+
+ // Splitting of vcompress for LMUL > 8 is yet not implemented.
return getRegUsageForType(VTy) <= 8;
}
diff --git a/llvm/test/CodeGen/RISCV/rvv/compressstore.ll b/llvm/test/CodeGen/RISCV/rvv/compressstore.ll
index d1d4138e539776..e1e16e8ac42528 100644
--- a/llvm/test/CodeGen/RISCV/rvv/compressstore.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/compressstore.ll
@@ -4,8 +4,8 @@
; Compress + store for i8 type
-define void @test_compresstore_i8_v1(ptr %p, <1 x i1> %mask, <1 x i8> %data) {
-; RV64-LABEL: test_compresstore_i8_v1:
+define void @test_compresstore_v1i8(ptr %p, <1 x i1> %mask, <1 x i8> %data) {
+; RV64-LABEL: test_compresstore_v1i8:
; RV64: # %bb.0: # %entry
; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
; RV64-NEXT: vcompress.vm v9, v8, v0
@@ -14,7 +14,7 @@ define void @test_compresstore_i8_v1(ptr %p, <1 x i1> %mask, <1 x i8> %data) {
; RV64-NEXT: vse8.v v9, (a0)
; RV64-NEXT: ret
;
-; RV32-LABEL: test_compresstore_i8_v1:
+; RV32-LABEL: test_compresstore_v1i8:
; RV32: # %bb.0: # %entry
; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
; RV32-NEXT: vcompress.vm v9, v8, v0
@@ -27,8 +27,8 @@ entry:
ret void
}
-define void @test_compresstore_i8_v2(ptr %p, <2 x i1> %mask, <2 x i8> %data) {
-; RV64-LABEL: test_compresstore_i8_v2:
+define void @test_compresstore_v2i8(ptr %p, <2 x i1> %mask, <2 x i8> %data) {
+; RV64-LABEL: test_compresstore_v2i8:
; RV64: # %bb.0: # %entry
; RV64-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
; RV64-NEXT: vcompress.vm v9, v8, v0
@@ -37,7 +37,7 @@ define void @test_compresstore_i8_v2(ptr %p, <2 x i1> %mask, <2 x i8> %data) {
; RV64-NEXT: vse8.v v9, (a0)
; RV64-NEXT: ret
;
-; RV32-LABEL: test_compresstore_i8_v2:
+; RV32-LABEL: test_compresstore_v2i8:
; RV32: # %bb.0: # %entry
; RV32-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
; RV32-NEXT: vcompress.vm v9, v8, v0
@@ -50,8 +50,8 @@ entry:
ret void
}
-define void @test_compresstore_i8_v4(ptr %p, <4 x i1> %mask, <4 x i8> %data) {
-; RV64-LABEL: test_compresstore_i8_v4:
+define void @test_compresstore_v4i8(ptr %p, <4 x i1> %mask, <4 x i8> %data) {
+; RV64-LABEL: test_compresstore_v4i8:
; RV64: # %bb.0: # %entry
; RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; RV64-NEXT: vcompress.vm v9, v8, v0
@@ -60,7 +60,7 @@ define void @test_compresstore_i8_v4(ptr %p, <4 x i1> %mask, <4 x i8> %data) {
; RV64-NEXT: vse8.v v9, (a0)
; RV64-NEXT: ret
;
-; RV32-LABEL: test_compresstore_i8_v4:
+; RV32-LABEL: test_compresstore_v4i8:
; RV32: # %bb.0: # %entry
; RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; RV32-NEXT: vcompress.vm v9, v8, v0
@@ -73,8 +73,8 @@ entry:
ret void
}
-define void @test_compresstore_i8_v8(ptr %p, <8 x i1> %mask, <8 x i8> %data) {
-; RV64-LABEL: test_compresstore_i8_v8:
+define void @test_compresstore_v8i8(ptr %p, <8 x i1> %mask, <8 x i8> %data) {
+; RV64-LABEL: test_compresstore_v8i8:
; RV64: # %bb.0: # %entry
; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; RV64-NEXT: vcompress.vm v9, v8, v0
@@ -83,7 +83,7 @@ define void @test_compresstore_i8_v8(ptr %p, <8 x i1> %mask, <8 x i8> %data) {
; RV64-NEXT: vse8.v v9, (a0)
; RV64-NEXT: ret
;
-; RV32-LABEL: test_compresstore_i8_v8:
+; RV32-LABEL: test_compresstore_v8i8:
; RV32: # %bb.0: # %entry
; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; RV32-NEXT: vcompress.vm v9, v8, v0
@@ -96,8 +96,8 @@ entry:
ret void
}
-define void @test_compresstore_i8_v16(ptr %p, <16 x i1> %mask, <16 x i8> %data) {
-; RV64-LABEL: test_compresstore_i8_v16:
+define void @test_compresstore_v16i8(ptr %p, <16 x i1> %mask, <16 x i8> %data) {
+; RV64-LABEL: test_compresstore_v16i8:
; RV64: # %bb.0: # %entry
; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; RV64-NEXT: vcompress.vm v9, v8, v0
@@ -106,7 +106,7 @@ define void @test_compresstore_i8_v16(ptr %p, <16 x i1> %mask, <16 x i8> %data)
; RV64-NEXT: vse8.v v9, (a0)
; RV64-NEXT: ret
;
-; RV32-LABEL: test_compresstore_i8_v16:
+; RV32-LABEL: test_compresstore_v16i8:
; RV32: # %bb.0: # %entry
; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; RV32-NEXT: vcompress.vm v9, v8, v0
@@ -119,8 +119,8 @@ entry:
ret void
}
-define void @test_compresstore_i8_v32(ptr %p, <32 x i1> %mask, <32 x i8> %data) {
-; RV64-LABEL: test_compresstore_i8_v32:
+define void @test_compresstore_v32i8(ptr %p, <32 x i1> %mask, <32 x i8> %data) {
+; RV64-LABEL: test_compresstore_v32i8:
; RV64: # %bb.0: # %entry
; RV64-NEXT: li a1, 32
; RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma
@@ -130,7 +130,7 @@ define void @test_compresstore_i8_v32(ptr %p, <32 x i1> %mask, <32 x i8> %data)
; RV64-NEXT: vse8.v v10, (a0)
; RV64-NEXT: ret
;
-; RV32-LABEL: test_compresstore_i8_v32:
+; RV32-LABEL: test_compresstore_v32i8:
; RV32: # %bb.0: # %entry
; RV32-NEXT: li a1, 32
; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma
@@ -144,8 +144,8 @@ entry:
ret void
}
-define void @test_compresstore_i8_v64(ptr %p, <64 x i1> %mask, <64 x i8> %data) {
-; RV64-LABEL: test_compresstore_i8_v64:
+define void @test_compresstore_v64i8(ptr %p, <64 x i1> %mask, <64 x i8> %data) {
+; RV64-LABEL: test_compresstore_v64i8:
; RV64: # %bb.0: # %entry
; RV64-NEXT: li a1, 64
; RV64-NEXT: vsetvli zero, a1, e8, m4, ta, ma
@@ -155,7 +155,7 @@ define void @test_compresstore_i8_v64(ptr %p, <64 x i1> %mask, <64 x i8> %data)
; RV64-NEXT: vse8.v v12, (a0)
; RV64-NEXT: ret
;
-; RV32-LABEL: test_compresstore_i8_v64:
+; RV32-LABEL: test_compresstore_v64i8:
; RV32: # %bb.0: # %entry
; RV32-NEXT: li a1, 64
; RV32-NEXT: vsetvli zero, a1, e8, m4, ta, ma
@@ -169,8 +169,8 @@ entry:
ret void
}
-define void @test_compresstore_i8_v128(ptr %p, <128 x i1> %mask, <128 x i8> %data) {
-; RV64-LABEL: test_compresstore_i8_v128:
+define void @test_compresstore_v128i8(ptr %p, <128 x i1> %mask, <128 x i8> %data) {
+; RV64-LABEL: test_compresstore_v128i8:
; RV64: # %bb.0: # %entry
; RV64-NEXT: li a1, 128
; RV64-NEXT: vsetvli zero, a1, e8, m8, ta, ma
@@ -180,7 +180,7 @@ define void @test_compresstore_i8_v128(ptr %p, <128 x i1> %mask, <128 x i8> %dat
; RV64-NEXT: vse8.v v16, (a0)
; RV64-NEXT: ret
;
-; RV32-LABEL: test_compresstore_i8_v128:
+; RV32-LABEL: test_compresstore_v128i8:
; RV32: # %bb.0: # %entry
; RV32-NEXT: li a1, 128
; RV32-NEXT: vsetvli zero, a1, e8, m8, ta, ma
@@ -194,8 +194,8 @@ entry:
ret void
}
-define void @test_compresstore_i8_v256(ptr %p, <256 x i1> %mask, <256 x i8> %data) {
-; RV64-LABEL: test_compresstore_i8_v256:
+define void @test_compresstore_v256i8(ptr %p, <256 x i1> %mask, <256 x i8> %data) {
+; RV64-LABEL: test_compresstore_v256i8:
; RV64: # %bb.0: # %entry
; RV64-NEXT: li a2, 128
; RV64-NEXT: vsetvli zero, a2, e8, m8, ta, ma
@@ -4392,7 +4392,7 @@ define void @test_compresstore_i8_v256(ptr %p, <256 x i1> %mask, <256 x i8> %dat
; RV64-NEXT: .LBB8_525: # %cond.store760
; RV64-NEXT: j .LBB8_272
;
-; RV32-LABEL: test_compresstore_i8_v256:
+; RV32-LABEL: test_compresstore_v256i8:
; RV32: # %bb.0: # %entry
; RV32-NEXT: li a2, 128
; RV32-NEXT: vsetvli zero, a2, e8, m8, ta, ma
@@ -8610,8 +8610,8 @@ entry:
; Compress + store for i16 type
-define void @test_compresstore_i16_v1(ptr %p, <1 x i1> %mask, <1 x i16> %data) {
-; RV64-LABEL: test_compresstore_i16_v1:
+define void @test_compresstore_v1i16(ptr %p, <1 x i1> %mask, <1 x i16> %data) {
+; RV64-LABEL: test_compresstore_v1i16:
; RV64: # %bb.0: # %entry
; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
; RV64-NEXT: vcompress.vm v9, v8, v0
@@ -8620,7 +8620,7 @@ define void @test_compresstore_i16_v1(ptr %p, <1 x i1> %mask, <1 x i16> %data) {
; RV64-NEXT: vse16.v v9, (a0)
; RV64-NEXT: ret
;
-; RV32-LABEL: test_compresstore_i16_v1:
+; RV32-LABEL: test_compresstore_v1i16:
; RV32: # %bb.0: # %entry
; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
; RV32-NEXT: vcompress.vm v9, v8, v0
@@ -8633,8 +8633,8 @@ entry:
ret void
}
-define void @test_compresstore_i16_v2(ptr %p, <2 x i1> %mask, <2 x i16> %data) {
-; RV64-LABEL: test_compresstore_i16_v2:
+define void @test_compresstore_v2i16(ptr %p, <2 x i1> %mask, <2 x i16> %data) {
+; RV64-LABEL: test_compresstore_v2i16:
; RV64: # %bb.0: # %entry
; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
; RV64-NEXT: vcompress.vm v9, v8, v0
@@ -8643,7 +8643,7 @@ define void @test_compresstore_i16_v2(ptr %p, <2 x i1> %mask, <2 x i16> %data) {
; RV64-NEXT: vse16.v v9, (a0)
; RV64-NEXT: ret
;
-; RV32-LABEL: test_compresstore_i16_v2:
+; RV32-LABEL: test_compresstore_v2i16:
; RV32: # %bb.0: # %entry
; RV32-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
; RV32-NEXT: vcompress.vm v9, v8, v0
@@ -8656,8 +8656,8 @@ entry:
ret void
}
-define void @test_compresstore_i16_v4(ptr %p, <4 x i1> %mask, <4 x i16> %data) {
-; RV64-LABEL: test_compresstore_i16_v4:
+define void @test_compresstore_v4i16(ptr %p, <4 x i1> %mask, <4 x i16> %data) {
+; RV64-LABEL: test_compresstore_v4i16:
; RV64: # %bb.0: # %entry
; RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; RV64-NEXT: vcompress.vm v9, v8, v0
@@ -8666,7 +8666,7 @@ define void @test_compresstore_i16_v4(ptr %p, <4 x i1> %mask, <4 x i16> %data) {
; RV64-NEXT: vse16.v v9, (a0)
; RV64-NEXT: ret
;
-; RV32-LABEL: test_compresstore_i16_v4:
+; RV32-LABEL: test_compresstore_v4i16:
; RV32: # %bb.0: # %entry
; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; RV32-NEXT: vcompress.vm v9, v8, v0
@@ -8679,8 +8679,8 @@ entry:
ret void
}
-define void @test_compresstore_i16_v8(ptr %p, <8 x i1> %mask, <8 x i16> %data) {
-; RV64-LABEL: test_compresstore_i16_v8:
+define void @test_compresstore_v8i16(ptr %p, <8 x i1> %mask, <8 x i16> %data) {
+; RV64-LABEL: test_compresstore_v8i16:
; RV64: # %bb.0: # %entry
; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; RV64-NEXT: vcompress.vm v9, v8, v0
@@ -8689,7 +8689,7 @@ define void @test_compresstore_i16_v8(ptr %p, <8 x i1> %mask, <8 x i16> %data) {
; RV64-NEXT: vse16.v v9, (a0)
; RV64-NEXT: ret
;
-; RV32-LABEL: test_compresstore_i16_v8:
+; RV32-LABEL: test_compresstore_v8i16:
; RV32: # %bb.0: # %entry
; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; RV32-NEXT: vcompress.vm v9, v8, v0
@@ -8702,8 +8702,8 @@ entry:
ret void
}
-define void @test_compresstore_i16_v16(ptr %p, <16 x i1> %mask, <16 x i16> %data) {
-; RV64-LABEL: test_compresstore_i16_v16:
+define void @test_compresstore_v16i16(ptr %p, <16 x i1> %mask, <16 x i16> %data) {
+; RV64-LABEL: test_compresstore_v16i16:
; RV64: # %bb.0: # %entry
; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; RV64-NEXT: vcompress.vm v10, v8, v0
@@ -8712,7 +8712,7 @@ define void @test_compresstore_i16_v16(ptr %p, <16 x i1> %mask, <16 x i16> %data
; RV64-NEXT: vse16.v v10, (a0)
; RV64-NEXT: ret
;
-; RV32-LABEL: test_compresstore_i16_v16:
+; RV32-LABEL: test_compresstore_v16i16:
; RV32: # %bb.0: # %entry
; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; RV32-NEXT: vcompress.vm v10, v8, v0
@@ -8725,8 +8725,8 @@ entry:
ret void
}
-define void @test_compresstore_i16_v32(ptr %p, <32 x i1> %mask, <32 x i16> %data) {
-; RV64-LABEL: test_compresstore_i16_v32:
+define void @test_compresstore_v32i16(ptr %p, <32 x i1> %mask, <32 x i16> %data) {
+; RV64-LABEL: test_compresstore_v32i16:
; RV64: # %bb.0: # %entry
; RV64-NEXT: li a1, 32
; RV64-NEXT: vsetvli zero, a1, e16, m4, ta, ma
@@ -8736,7 +8736,7 @@ define void @test_compresstore_i16_v32(ptr %p, <32 x i1> %mask, <32 x i16> %data
; RV64-NEXT: vse16.v v12, (a0)
; RV64-NEXT: ret
;
-; RV32-LABEL: test_compresstore_i16_v32:
+; RV32-LABEL: test_compresstore_v32i16:
; RV32: # %bb.0: # %entry
; RV32-NEXT: li a1, 32
; RV32-NEXT: vsetvli zero, a1, e16, m4, ta, ma
@@ -8750,8 +8750,8 @@ entry:
ret void
}
-define void @test_compresstore_i16_v64(ptr %p, <64 x i1> %mask, <64 x i16> %data) {
-; RV64-LABEL: test_compresstore_i16_v64:
+define void @test_compresstore_v64i16(ptr %p, <64 x i1> %mask, <64 x i16> %data) {
+; RV64-LABEL: test_compresstore_v64i16:
; RV64: # %bb.0: # %entry
; RV64-NEXT: li a1, 64
; RV64-NEXT: vsetvli zero, a1, e16, m8, ta, ma
@@ -8761,7 +8761,7 @@ define void @test_compresstore_i16_v64(ptr %p, <64 x i1> %mask, <64 x i16> %data
; RV64-NEXT: vse16.v v16, (a0)
; RV64-NEXT: ret
;
-; RV32-LABEL: test_compresstore_i16_v64:
+; RV32-LABEL: test_compresstore_v64i16:
; RV32: # %bb.0: # %entry
; RV32-NEXT: li a1, 64
; RV32-NEXT: vsetvli zero, a1, e16, m8, ta, ma
@@ -8775,8 +8775,8 @@ entry:
ret void
}
-define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %data) {
-; RV64-LABEL: test_compresstore_i16_v128:
+define void @test_compresstore_v128i16(ptr %p, <128 x i1> %mask, <128 x i16> %data) {
+; RV64-LABEL: test_compresstore_v128i16:
; RV64: # %bb.0: # %entry
; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV64-NEXT: vmv.x.s a2, v0
@@ -10575,7 +10575,7 @@ define void @test_compresstore_i16_v128(ptr %p, <128 x i1> %mask, <128 x i16> %d
; RV64-NEXT: .LBB16_257: # %cond.store376
; RV64-NEXT: j .LBB16_136
;
-; RV32-LABEL: test_compresstore_i16_v128:
+; RV32-LABEL: test_compresstore_v128i16:
; RV32: # %bb.0: # %entry
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT: vmv.x.s a3, v0
@@ -12361,8 +12361,8 @@ entry:
; Compress + store for i32 type
-define void @test_compresstore_i32_v1(ptr %p, <1 x i1> %mask, <1 x i32> %data) {
-; RV64-LABEL: test_compresstore_i32_v1:
+define void @test_compresstore_v1i32(ptr %p, <1 x i1> %mask, <1 x i32> %data) {
+; RV64-LABEL: test_compresstore_v1i32:
; RV64: # %bb.0: # %entry
; RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
; RV64-NEXT: vcompress.vm v9, v8, v0
@@ -12371,7 +12371,7 @@ define void @test_compresstore_i32_v1(ptr %p, <1 x i1> %mask, <1 x i32> %data) {
; RV64-NEXT: vse32.v v9, (a0)
; RV64-NEXT: ret
;
-; RV32-LABEL: test_compresstore_i32_v1:
+; RV32-LABEL: test_compresstore_v1i32:
; RV32: # %bb.0: # %entry
; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
; RV32-NEXT: vcompress.vm v9, v8, v0
@@ -12384,8 +12384,8 @@ entry:
ret void
}
-define void @test_compresstore_i32_v2(ptr %p, <2 x i1> %mask, <2 x i32> %data) {
-; RV64-LABEL: test_compresstore_i32_v2:
+define void @test_compresstore_v2i32(ptr %p, <2 x i1> %mask, <2 x i32> %data) {
+; RV64-LABEL: test_compresstore_v2i32:
; RV64: # %bb.0: # %entry
; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; RV64-NEXT: vcompress.vm v9, v8, v0
@@ -12394,7 +12394,7 @@ define void @test_compresstore_i32_v2(ptr %p, <2 x i1> %mask, <2 x i32> %data) {
; RV64-NEXT: vse32.v v9, (a0)
; RV64-NEXT: ret
;
-; RV32-LABEL: test_compresstore_i32_v2:
+; RV32-LABEL: test_compresstore_v2i32:
; RV32: # %bb.0: # %entry
; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; RV32-NEXT: vcompress.vm v9, v8, v0
@@ -12407,8 +12407,8 @@ entry:
ret void
}
-define void @test_compresstore_i32_v4(ptr %p, <4 x i1> %mask, <4 x i32> %data) {
-; RV64-LABEL: test_compresstore_i32_v4:
+define void @test_compresstore_v4i32(ptr %p, <4 x i1> %mask, <4 x i32> %data) {
+; RV64-LABEL: test_compresstore_v4i32:
; RV64: # %bb.0: # %entry
; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64-NEXT: vcompress.vm v9, v8, v0
@@ -12417,7 +12417,7 @@ define void @test_compresstore_i32_v4(ptr %p, <4 x i1> %mask, <4 x i32> %data) {
; RV64-NEXT: vse32.v v9, (a0)
; RV64-NEXT: ret
;
-; RV32-LABEL: test_compresstore_i32_v4:
+; RV32-LABEL: test_compresstore_v4i32:
; RV32: # %bb.0: # %entry
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32-NEXT: vcompress.vm v9, v8, v0
@@ -12430,8 +12430,8 @@ entry:
ret void
}
-define void @test_compresstore_i32_v8(ptr %p, <8 x i1> %mask, <8 x i32> %data) {
-; RV64-LABEL: test_compresstore_i32_v8:
+define void @test_compresstore_v8i32(ptr %p, <8 x i1> %mask, <8 x i32> %data) {
+; RV64-LABEL: test_compresstore_v8i32:
; RV64: # %bb.0: # %entry
; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV64-NEXT: vcompress.vm v10, v8, v0
@@ -12440,7 +12440,7 @@ define void @test_compresstore_i32_v8(ptr %p, <8 x i1> %mask, <8 x i32> %data) {
; RV64-NEXT: vse32.v v10, (a0)
; RV64-NEXT: ret
;
-; RV32-LABEL: test_compresstore_i32_v8:
+; RV32-LABEL: test_compresstore_v8i32:
; RV32: # %bb.0: # %entry
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32-NEXT: vcompress.vm v10, v8, v0
@@ -12453,8 +12453,8 @@ entry:
ret void
}
-define void @test_compresstore_i32_v16(ptr %p, <16 x i1> %mask, <16 x i32> %data) {
-; RV64-LABEL: test_compresstore_i32_v16:
+define void @test_compresstore_v16i32(ptr %p, <16 x i1> %mask, <16 x i32> %data) {
+; RV64-LABEL: test_compresstore_v16i32:
; RV64: # %bb.0: # %entry
; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; RV64-NEXT: vcompress.vm v12, v8, v0
@@ -12463,7 +12463,7 @@ define void @test_compresstore_i32_v16(ptr %p, <16 x i1> %mask, <16 x i32> %data
; RV64-NEXT: vse32.v v12, (a0)
; RV64-NEXT: ret
;
-; RV32-LABEL: test_compresstore_i32_v16:
+; RV32-LABEL: test_compresstore_v16i32:
; RV32: # %bb.0: # %entry
; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; RV32-NEXT: vcompress.vm v12, v8, v0
@@ -12476,8 +12476,8 @@ entry:
ret void
}
-define void @test_compresstore_i32_v32(ptr %p, <32 x i1> %mask, <32 x i32> %data) {
-; RV64-LABEL: test_compresstore_i32_v32:
+define void @test_compresstore_v32i32(ptr %p, <32 x i1> %mask, <32 x i32> %data) {
+; RV64-LABEL: test_compresstore_v32i32:
; RV64: # %bb.0: # %entry
; RV64-NEXT: li a1, 32
; RV64-NEXT: vsetvli zero, a1, e32, m8, ta, ma
@@ -12487,7 +12487,7 @@ define void @test_compresstore_i32_v32(ptr %p, <32 x i1> %mask, <32 x i32> %data
; RV64-NEXT: vse32.v v16, (a0)
; RV64-NEXT: ret
;
-; RV32-LABEL: test_compresstore_i32_v32:
+; RV32-LABEL: test_compresstore_v32i32:
; RV32: # %bb.0: # %entry
; RV32-NEXT: li a1, 32
; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma
@@ -12501,8 +12501,8 @@ entry:
ret void
}
-define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data) {
-; RV64-LABEL: test_compresstore_i32_v64:
+define void @test_compresstore_v64i32(ptr %p, <64 x i1> %mask, <64 x i32> %data) {
+; RV64-LABEL: test_compresstore_v64i32:
; RV64: # %bb.0: # %entry
; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV64-NEXT: vmv.x.s a1, v0
@@ -13365,7 +13365,7 @@ define void @test_compresstore_i32_v64(ptr %p, <64 x i1> %mask, <64 x i32> %data
; RV64-NEXT: bltz a1, .LBB23_67
; RV64-NEXT: j .LBB23_68
;
-; RV32-LABEL: test_compresstore_i32_v64:
+; RV32-LABEL: test_compresstore_v64i32:
; RV32: # %bb.0: # %entry
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT: vmv.x.s a2, v0
@@ -14233,8 +14233,8 @@ entry:
; Compress + store for i64 type
-define void @test_compresstore_i64_v1(ptr %p, <1 x i1> %mask, <1 x i64> %data) {
-; RV64-LABEL: test_compresstore_i64_v1:
+define void @test_compresstore_v1i64(ptr %p, <1 x i1> %mask, <1 x i64> %data) {
+; RV64-LABEL: test_compresstore_v1i64:
; RV64: # %bb.0: # %entry
; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV64-NEXT: vcompress.vm v9, v8, v0
@@ -14243,7 +14243,7 @@ define void @test_compresstore_i64_v1(ptr %p, <1 x i1> %mask, <1 x i64> %data) {
; RV64-NEXT: vse64.v v9, (a0)
; RV64-NEXT: ret
;
-; RV32-LABEL: test_compresstore_i64_v1:
+; RV32-LABEL: test_compresstore_v1i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT: vcompress.vm v9, v8, v0
@@ -14256,8 +14256,8 @@ entry:
ret void
}
-define void @test_compresstore_i64_v2(ptr %p, <2 x i1> %mask, <2 x i64> %data) {
-; RV64-LABEL: test_compresstore_i64_v2:
+define void @test_compresstore_v2i64(ptr %p, <2 x i1> %mask, <2 x i64> %data) {
+; RV64-LABEL: test_compresstore_v2i64:
; RV64: # %bb.0: # %entry
; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV64-NEXT: vcompress.vm v9, v8, v0
@@ -14266,7 +14266,7 @@ define void @test_compresstore_i64_v2(ptr %p, <2 x i1> %mask, <2 x i64> %data) {
; RV64-NEXT: vse64.v v9, (a0)
; RV64-NEXT: ret
;
-; RV32-LABEL: test_compresstore_i64_v2:
+; RV32-LABEL: test_compresstore_v2i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT: vcompress.vm v9, v8, v0
@@ -14279,8 +14279,8 @@ entry:
ret void
}
-define void @test_compresstore_i64_v4(ptr %p, <4 x i1> %mask, <4 x i64> %data) {
-; RV64-LABEL: test_compresstore_i64_v4:
+define void @test_compresstore_v4i64(ptr %p, <4 x i1> %mask, <4 x i64> %data) {
+; RV64-LABEL: test_compresstore_v4i64:
; RV64: # %bb.0: # %entry
; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV64-NEXT: vcompress.vm v10, v8, v0
@@ -14289,7 +14289,7 @@ define void @test_compresstore_i64_v4(ptr %p, <4 x i1> %mask, <4 x i64> %data) {
; RV64-NEXT: vse64.v v10, (a0)
; RV64-NEXT: ret
;
-; RV32-LABEL: test_compresstore_i64_v4:
+; RV32-LABEL: test_compresstore_v4i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV32-NEXT: vcompress.vm v10, v8, v0
@@ -14302,8 +14302,8 @@ entry:
ret void
}
-define void @test_compresstore_i64_v8(ptr %p, <8 x i1> %mask, <8 x i64> %data) {
-; RV64-LABEL: test_compresstore_i64_v8:
+define void @test_compresstore_v8i64(ptr %p, <8 x i1> %mask, <8 x i64> %data) {
+; RV64-LABEL: test_compresstore_v8i64:
; RV64: # %bb.0: # %entry
; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV64-NEXT: vcompress.vm v12, v8, v0
@@ -14312,7 +14312,7 @@ define void @test_compresstore_i64_v8(ptr %p, <8 x i1> %mask, <8 x i64> %data) {
; RV64-NEXT: vse64.v v12, (a0)
; RV64-NEXT: ret
;
-; RV32-LABEL: test_compresstore_i64_v8:
+; RV32-LABEL: test_compresstore_v8i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV32-NEXT: vcompress.vm v12, v8, v0
@@ -14325,8 +14325,8 @@ entry:
ret void
}
-define void @test_compresstore_i64_v16(ptr %p, <16 x i1> %mask, <16 x i64> %data) {
-; RV64-LABEL: test_compresstore_i64_v16:
+define void @test_compresstore_v16i64(ptr %p, <16 x i1> %mask, <16 x i64> %data) {
+; RV64-LABEL: test_compresstore_v16i64:
; RV64: # %bb.0: # %entry
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV64-NEXT: vcompress.vm v16, v8, v0
@@ -14335,7 +14335,7 @@ define void @test_compresstore_i64_v16(ptr %p, <16 x i1> %mask, <16 x i64> %data
; RV64-NEXT: vse64.v v16, (a0)
; RV64-NEXT: ret
;
-; RV32-LABEL: test_compresstore_i64_v16:
+; RV32-LABEL: test_compresstore_v16i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV32-NEXT: vcompress.vm v16, v8, v0
@@ -14348,8 +14348,8 @@ entry:
ret void
}
-define void @test_compresstore_i64_v32(ptr %p, <32 x i1> %mask, <32 x i64> %data) {
-; RV64-LABEL: test_compresstore_i64_v32:
+define void @test_compresstore_v32i64(ptr %p, <32 x i1> %mask, <32 x i64> %data) {
+; RV64-LABEL: test_compresstore_v32i64:
; RV64: # %bb.0: # %entry
; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-NEXT: vmv.x.s a1, v0
@@ -14747,7 +14747,7 @@ define void @test_compresstore_i64_v32(ptr %p, <32 x i1> %mask, <32 x i64> %data
; RV64-NEXT: bltz a2, .LBB29_32
; RV64-NEXT: j .LBB29_33
;
-; RV32-LABEL: test_compresstore_i64_v32:
+; RV32-LABEL: test_compresstore_v32i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32-NEXT: vmv.x.s a1, v0
More information about the llvm-commits
mailing list