[llvm] [RISCV] Support `llvm.masked.compressstore` intrinsic (PR #83457)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 29 10:19:45 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-risc-v
Author: Kolya Panchenko (nikolaypanchenko)
<details>
<summary>Changes</summary>
---
Patch is 592.01 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/83457.diff
4 Files Affected:
- (modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+14-2)
- (modified) llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp (+7)
- (modified) llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h (+2)
- (added) llvm/test/CodeGen/RISCV/rvv/compressstore.ll (+17545)
``````````diff
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index e647f56416bfa6..9be5138b05df07 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -10460,6 +10460,7 @@ SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op,
SDValue BasePtr = MemSD->getBasePtr();
SDValue Val, Mask, VL;
+ bool IsCompressingStore = false;
if (const auto *VPStore = dyn_cast<VPStoreSDNode>(Op)) {
Val = VPStore->getValue();
Mask = VPStore->getMask();
@@ -10468,9 +10469,11 @@ SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op,
const auto *MStore = cast<MaskedStoreSDNode>(Op);
Val = MStore->getValue();
Mask = MStore->getMask();
+ IsCompressingStore = MStore->isCompressingStore();
}
- bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
+ bool IsUnmasked =
+ ISD::isConstantSplatVectorAllOnes(Mask.getNode()) || IsCompressingStore;
MVT VT = Val.getSimpleValueType();
MVT XLenVT = Subtarget.getXLenVT();
@@ -10480,7 +10483,7 @@ SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op,
ContainerVT = getContainerForFixedLengthVector(VT);
Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
- if (!IsUnmasked) {
+ if (!IsUnmasked || IsCompressingStore) {
MVT MaskVT = getMaskTypeFor(ContainerVT);
Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
}
@@ -10489,6 +10492,15 @@ SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op,
if (!VL)
VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
+ if (IsCompressingStore) {
+ Val = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ContainerVT,
+ DAG.getConstant(Intrinsic::riscv_vcompress, DL, XLenVT),
+ DAG.getUNDEF(ContainerVT), Val, Mask, VL);
+ VL =
+ DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Mask,
+ getAllOnesMask(Mask.getSimpleValueType(), VL, DL, DAG), VL);
+ }
+
unsigned IntID =
IsUnmasked ? Intrinsic::riscv_vse : Intrinsic::riscv_vse_mask;
SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 2e4e69fb4f920f..b54474b14b0543 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -1609,3 +1609,10 @@ bool RISCVTTIImpl::isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
C2.NumIVMuls, C2.NumBaseAdds,
C2.ScaleCost, C2.ImmCost, C2.SetupCost);
}
+
+bool RISCVTTIImpl::isLegalMaskedCompressStore(Type *DataTy) {
+ auto *VTy = dyn_cast<VectorType>(DataTy);
+ if (!VTy || VTy->isScalableTy() || !ST->hasVInstructions())
+ return false;
+ return getRegUsageForType(VTy) <= 8;
+}
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
index af36e9d5d5e886..6433027cce0e27 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -261,6 +261,8 @@ class RISCVTTIImpl : public BasicTTIImplBase<RISCVTTIImpl> {
return TLI->isLegalStridedLoadStore(DataTypeVT, Alignment);
}
+ bool isLegalMaskedCompressStore(Type *DataTy);
+
bool isVScaleKnownToBeAPowerOfTwo() const {
return TLI->isVScaleKnownToBeAPowerOfTwo();
}
diff --git a/llvm/test/CodeGen/RISCV/rvv/compressstore.ll b/llvm/test/CodeGen/RISCV/rvv/compressstore.ll
new file mode 100644
index 00000000000000..f227f39740a003
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/compressstore.ll
@@ -0,0 +1,17545 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -verify-machineinstrs -mtriple=riscv64 -mattr=+v,+d %s -o - | FileCheck %s --check-prefix=RV64
+; RUN: llc -verify-machineinstrs -mtriple=riscv32 -mattr=+v,+d %s -o - | FileCheck %s --check-prefix=RV32
+
+; Compress + store for i8 type
+
+define void @test_compresstore_i8_v1(ptr %p, <1 x i1> %mask, <1 x i8> %data) {
+; RV64-LABEL: test_compresstore_i8_v1:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
+; RV64-NEXT: vcompress.vm v9, v8, v0
+; RV64-NEXT: vcpop.m a1, v0
+; RV64-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; RV64-NEXT: vse8.v v9, (a0)
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_compresstore_i8_v1:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
+; RV32-NEXT: vcompress.vm v9, v8, v0
+; RV32-NEXT: vcpop.m a1, v0
+; RV32-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; RV32-NEXT: vse8.v v9, (a0)
+; RV32-NEXT: ret
+entry:
+ tail call void @llvm.masked.compressstore.v1i8(<1 x i8> %data, ptr %p, <1 x i1> %mask)
+ ret void
+}
+
+define void @test_compresstore_i8_v2(ptr %p, <2 x i1> %mask, <2 x i8> %data) {
+; RV64-LABEL: test_compresstore_i8_v2:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
+; RV64-NEXT: vcompress.vm v9, v8, v0
+; RV64-NEXT: vcpop.m a1, v0
+; RV64-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; RV64-NEXT: vse8.v v9, (a0)
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_compresstore_i8_v2:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
+; RV32-NEXT: vcompress.vm v9, v8, v0
+; RV32-NEXT: vcpop.m a1, v0
+; RV32-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; RV32-NEXT: vse8.v v9, (a0)
+; RV32-NEXT: ret
+entry:
+ tail call void @llvm.masked.compressstore.v2i8(<2 x i8> %data, ptr %p, <2 x i1> %mask)
+ ret void
+}
+
+define void @test_compresstore_i8_v4(ptr %p, <4 x i1> %mask, <4 x i8> %data) {
+; RV64-LABEL: test_compresstore_i8_v4:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
+; RV64-NEXT: vcompress.vm v9, v8, v0
+; RV64-NEXT: vcpop.m a1, v0
+; RV64-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; RV64-NEXT: vse8.v v9, (a0)
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_compresstore_i8_v4:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
+; RV32-NEXT: vcompress.vm v9, v8, v0
+; RV32-NEXT: vcpop.m a1, v0
+; RV32-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; RV32-NEXT: vse8.v v9, (a0)
+; RV32-NEXT: ret
+entry:
+ tail call void @llvm.masked.compressstore.v4i8(<4 x i8> %data, ptr %p, <4 x i1> %mask)
+ ret void
+}
+
+define void @test_compresstore_i8_v8(ptr %p, <8 x i1> %mask, <8 x i8> %data) {
+; RV64-LABEL: test_compresstore_i8_v8:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV64-NEXT: vcompress.vm v9, v8, v0
+; RV64-NEXT: vcpop.m a1, v0
+; RV64-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; RV64-NEXT: vse8.v v9, (a0)
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_compresstore_i8_v8:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV32-NEXT: vcompress.vm v9, v8, v0
+; RV32-NEXT: vcpop.m a1, v0
+; RV32-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; RV32-NEXT: vse8.v v9, (a0)
+; RV32-NEXT: ret
+entry:
+ tail call void @llvm.masked.compressstore.v8i8(<8 x i8> %data, ptr %p, <8 x i1> %mask)
+ ret void
+}
+
+define void @test_compresstore_i8_v16(ptr %p, <16 x i1> %mask, <16 x i8> %data) {
+; RV64-LABEL: test_compresstore_i8_v16:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; RV64-NEXT: vcompress.vm v9, v8, v0
+; RV64-NEXT: vcpop.m a1, v0
+; RV64-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; RV64-NEXT: vse8.v v9, (a0)
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_compresstore_i8_v16:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; RV32-NEXT: vcompress.vm v9, v8, v0
+; RV32-NEXT: vcpop.m a1, v0
+; RV32-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; RV32-NEXT: vse8.v v9, (a0)
+; RV32-NEXT: ret
+entry:
+ tail call void @llvm.masked.compressstore.v16i8(<16 x i8> %data, ptr %p, <16 x i1> %mask)
+ ret void
+}
+
+define void @test_compresstore_i8_v32(ptr %p, <32 x i1> %mask, <32 x i8> %data) {
+; RV64-LABEL: test_compresstore_i8_v32:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: li a1, 32
+; RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; RV64-NEXT: vcompress.vm v10, v8, v0
+; RV64-NEXT: vcpop.m a1, v0
+; RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; RV64-NEXT: vse8.v v10, (a0)
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_compresstore_i8_v32:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: li a1, 32
+; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; RV32-NEXT: vcompress.vm v10, v8, v0
+; RV32-NEXT: vcpop.m a1, v0
+; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; RV32-NEXT: vse8.v v10, (a0)
+; RV32-NEXT: ret
+entry:
+ tail call void @llvm.masked.compressstore.v32i8(<32 x i8> %data, ptr %p, <32 x i1> %mask)
+ ret void
+}
+
+define void @test_compresstore_i8_v64(ptr %p, <64 x i1> %mask, <64 x i8> %data) {
+; RV64-LABEL: test_compresstore_i8_v64:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: li a1, 64
+; RV64-NEXT: vsetvli zero, a1, e8, m4, ta, ma
+; RV64-NEXT: vcompress.vm v12, v8, v0
+; RV64-NEXT: vcpop.m a1, v0
+; RV64-NEXT: vsetvli zero, a1, e8, m4, ta, ma
+; RV64-NEXT: vse8.v v12, (a0)
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_compresstore_i8_v64:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: li a1, 64
+; RV32-NEXT: vsetvli zero, a1, e8, m4, ta, ma
+; RV32-NEXT: vcompress.vm v12, v8, v0
+; RV32-NEXT: vcpop.m a1, v0
+; RV32-NEXT: vsetvli zero, a1, e8, m4, ta, ma
+; RV32-NEXT: vse8.v v12, (a0)
+; RV32-NEXT: ret
+entry:
+ tail call void @llvm.masked.compressstore.v64i8(<64 x i8> %data, ptr %p, <64 x i1> %mask)
+ ret void
+}
+
+define void @test_compresstore_i8_v128(ptr %p, <128 x i1> %mask, <128 x i8> %data) {
+; RV64-LABEL: test_compresstore_i8_v128:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: li a1, 128
+; RV64-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; RV64-NEXT: vcompress.vm v16, v8, v0
+; RV64-NEXT: vcpop.m a1, v0
+; RV64-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a0)
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_compresstore_i8_v128:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: li a1, 128
+; RV32-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; RV32-NEXT: vcompress.vm v16, v8, v0
+; RV32-NEXT: vcpop.m a1, v0
+; RV32-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; RV32-NEXT: vse8.v v16, (a0)
+; RV32-NEXT: ret
+entry:
+ tail call void @llvm.masked.compressstore.v128i8(<128 x i8> %data, ptr %p, <128 x i1> %mask)
+ ret void
+}
+
+define void @test_compresstore_i8_v256(ptr %p, <256 x i1> %mask, <256 x i8> %data) {
+; RV64-LABEL: test_compresstore_i8_v256:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: li a2, 128
+; RV64-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; RV64-NEXT: vle8.v v24, (a1)
+; RV64-NEXT: vsetvli zero, a2, e64, m1, ta, ma
+; RV64-NEXT: vmv.x.s a2, v0
+; RV64-NEXT: andi a1, a2, 1
+; RV64-NEXT: bnez a1, .LBB8_273
+; RV64-NEXT: # %bb.1: # %else
+; RV64-NEXT: andi a1, a2, 2
+; RV64-NEXT: bnez a1, .LBB8_274
+; RV64-NEXT: .LBB8_2: # %else2
+; RV64-NEXT: andi a1, a2, 4
+; RV64-NEXT: bnez a1, .LBB8_275
+; RV64-NEXT: .LBB8_3: # %else5
+; RV64-NEXT: andi a1, a2, 8
+; RV64-NEXT: bnez a1, .LBB8_276
+; RV64-NEXT: .LBB8_4: # %else8
+; RV64-NEXT: andi a1, a2, 16
+; RV64-NEXT: bnez a1, .LBB8_277
+; RV64-NEXT: .LBB8_5: # %else11
+; RV64-NEXT: andi a1, a2, 32
+; RV64-NEXT: bnez a1, .LBB8_278
+; RV64-NEXT: .LBB8_6: # %else14
+; RV64-NEXT: andi a1, a2, 64
+; RV64-NEXT: bnez a1, .LBB8_279
+; RV64-NEXT: .LBB8_7: # %else17
+; RV64-NEXT: andi a1, a2, 128
+; RV64-NEXT: bnez a1, .LBB8_280
+; RV64-NEXT: .LBB8_8: # %else20
+; RV64-NEXT: andi a1, a2, 256
+; RV64-NEXT: bnez a1, .LBB8_281
+; RV64-NEXT: .LBB8_9: # %else23
+; RV64-NEXT: andi a1, a2, 512
+; RV64-NEXT: bnez a1, .LBB8_282
+; RV64-NEXT: .LBB8_10: # %else26
+; RV64-NEXT: andi a1, a2, 1024
+; RV64-NEXT: bnez a1, .LBB8_283
+; RV64-NEXT: .LBB8_11: # %else29
+; RV64-NEXT: slli a1, a2, 52
+; RV64-NEXT: bltz a1, .LBB8_284
+; RV64-NEXT: .LBB8_12: # %else32
+; RV64-NEXT: slli a1, a2, 51
+; RV64-NEXT: bltz a1, .LBB8_285
+; RV64-NEXT: .LBB8_13: # %else35
+; RV64-NEXT: slli a1, a2, 50
+; RV64-NEXT: bltz a1, .LBB8_286
+; RV64-NEXT: .LBB8_14: # %else38
+; RV64-NEXT: slli a1, a2, 49
+; RV64-NEXT: bltz a1, .LBB8_287
+; RV64-NEXT: .LBB8_15: # %else41
+; RV64-NEXT: slli a1, a2, 48
+; RV64-NEXT: bltz a1, .LBB8_288
+; RV64-NEXT: .LBB8_16: # %else44
+; RV64-NEXT: slli a1, a2, 47
+; RV64-NEXT: bltz a1, .LBB8_289
+; RV64-NEXT: .LBB8_17: # %else47
+; RV64-NEXT: slli a1, a2, 46
+; RV64-NEXT: bltz a1, .LBB8_290
+; RV64-NEXT: .LBB8_18: # %else50
+; RV64-NEXT: slli a1, a2, 45
+; RV64-NEXT: bltz a1, .LBB8_291
+; RV64-NEXT: .LBB8_19: # %else53
+; RV64-NEXT: slli a1, a2, 44
+; RV64-NEXT: bltz a1, .LBB8_292
+; RV64-NEXT: .LBB8_20: # %else56
+; RV64-NEXT: slli a1, a2, 43
+; RV64-NEXT: bltz a1, .LBB8_293
+; RV64-NEXT: .LBB8_21: # %else59
+; RV64-NEXT: slli a1, a2, 42
+; RV64-NEXT: bltz a1, .LBB8_294
+; RV64-NEXT: .LBB8_22: # %else62
+; RV64-NEXT: slli a1, a2, 41
+; RV64-NEXT: bgez a1, .LBB8_23
+; RV64-NEXT: j .LBB8_295
+; RV64-NEXT: .LBB8_23: # %else65
+; RV64-NEXT: slli a1, a2, 40
+; RV64-NEXT: bgez a1, .LBB8_24
+; RV64-NEXT: j .LBB8_296
+; RV64-NEXT: .LBB8_24: # %else68
+; RV64-NEXT: slli a1, a2, 39
+; RV64-NEXT: bgez a1, .LBB8_25
+; RV64-NEXT: j .LBB8_297
+; RV64-NEXT: .LBB8_25: # %else71
+; RV64-NEXT: slli a1, a2, 38
+; RV64-NEXT: bgez a1, .LBB8_26
+; RV64-NEXT: j .LBB8_298
+; RV64-NEXT: .LBB8_26: # %else74
+; RV64-NEXT: slli a1, a2, 37
+; RV64-NEXT: bgez a1, .LBB8_27
+; RV64-NEXT: j .LBB8_299
+; RV64-NEXT: .LBB8_27: # %else77
+; RV64-NEXT: slli a1, a2, 36
+; RV64-NEXT: bgez a1, .LBB8_28
+; RV64-NEXT: j .LBB8_300
+; RV64-NEXT: .LBB8_28: # %else80
+; RV64-NEXT: slli a1, a2, 35
+; RV64-NEXT: bgez a1, .LBB8_29
+; RV64-NEXT: j .LBB8_301
+; RV64-NEXT: .LBB8_29: # %else83
+; RV64-NEXT: slli a1, a2, 34
+; RV64-NEXT: bgez a1, .LBB8_30
+; RV64-NEXT: j .LBB8_302
+; RV64-NEXT: .LBB8_30: # %else86
+; RV64-NEXT: slli a1, a2, 33
+; RV64-NEXT: bgez a1, .LBB8_31
+; RV64-NEXT: j .LBB8_303
+; RV64-NEXT: .LBB8_31: # %else89
+; RV64-NEXT: slli a1, a2, 32
+; RV64-NEXT: bgez a1, .LBB8_33
+; RV64-NEXT: .LBB8_32: # %cond.store91
+; RV64-NEXT: vsetivli zero, 1, e8, m2, ta, ma
+; RV64-NEXT: vslidedown.vi v10, v16, 31
+; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64-NEXT: vse8.v v10, (a0)
+; RV64-NEXT: addi a0, a0, 1
+; RV64-NEXT: .LBB8_33: # %else92
+; RV64-NEXT: addi sp, sp, -2032
+; RV64-NEXT: .cfi_def_cfa_offset 2032
+; RV64-NEXT: sd ra, 2024(sp) # 8-byte Folded Spill
+; RV64-NEXT: sd s0, 2016(sp) # 8-byte Folded Spill
+; RV64-NEXT: .cfi_offset ra, -8
+; RV64-NEXT: .cfi_offset s0, -16
+; RV64-NEXT: addi s0, sp, 2032
+; RV64-NEXT: .cfi_def_cfa s0, 0
+; RV64-NEXT: lui a1, 6
+; RV64-NEXT: addiw a1, a1, -1776
+; RV64-NEXT: sub sp, sp, a1
+; RV64-NEXT: andi sp, sp, -128
+; RV64-NEXT: slli a3, a2, 31
+; RV64-NEXT: lui a1, 6
+; RV64-NEXT: addiw a1, a1, -984
+; RV64-NEXT: add a1, sp, a1
+; RV64-NEXT: bgez a3, .LBB8_34
+; RV64-NEXT: j .LBB8_304
+; RV64-NEXT: .LBB8_34: # %else95
+; RV64-NEXT: slli a3, a2, 30
+; RV64-NEXT: bgez a3, .LBB8_35
+; RV64-NEXT: j .LBB8_305
+; RV64-NEXT: .LBB8_35: # %else98
+; RV64-NEXT: slli a3, a2, 29
+; RV64-NEXT: bgez a3, .LBB8_36
+; RV64-NEXT: j .LBB8_306
+; RV64-NEXT: .LBB8_36: # %else101
+; RV64-NEXT: slli a3, a2, 28
+; RV64-NEXT: bgez a3, .LBB8_37
+; RV64-NEXT: j .LBB8_307
+; RV64-NEXT: .LBB8_37: # %else104
+; RV64-NEXT: slli a3, a2, 27
+; RV64-NEXT: bgez a3, .LBB8_38
+; RV64-NEXT: j .LBB8_308
+; RV64-NEXT: .LBB8_38: # %else107
+; RV64-NEXT: slli a3, a2, 26
+; RV64-NEXT: bgez a3, .LBB8_39
+; RV64-NEXT: j .LBB8_309
+; RV64-NEXT: .LBB8_39: # %else110
+; RV64-NEXT: slli a3, a2, 25
+; RV64-NEXT: bgez a3, .LBB8_40
+; RV64-NEXT: j .LBB8_310
+; RV64-NEXT: .LBB8_40: # %else113
+; RV64-NEXT: slli a3, a2, 24
+; RV64-NEXT: bgez a3, .LBB8_41
+; RV64-NEXT: j .LBB8_311
+; RV64-NEXT: .LBB8_41: # %else116
+; RV64-NEXT: slli a3, a2, 23
+; RV64-NEXT: bgez a3, .LBB8_43
+; RV64-NEXT: .LBB8_42: # %cond.store118
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: li a4, 23
+; RV64-NEXT: slli a4, a4, 10
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a1, 0(a1)
+; RV64-NEXT: sb a1, 0(a0)
+; RV64-NEXT: addi a0, a0, 1
+; RV64-NEXT: .LBB8_43: # %else119
+; RV64-NEXT: slli a3, a2, 22
+; RV64-NEXT: lui a1, 5
+; RV64-NEXT: addiw a1, a1, 953
+; RV64-NEXT: add a1, sp, a1
+; RV64-NEXT: bgez a3, .LBB8_44
+; RV64-NEXT: j .LBB8_312
+; RV64-NEXT: .LBB8_44: # %else122
+; RV64-NEXT: slli a3, a2, 21
+; RV64-NEXT: bgez a3, .LBB8_45
+; RV64-NEXT: j .LBB8_313
+; RV64-NEXT: .LBB8_45: # %else125
+; RV64-NEXT: slli a3, a2, 20
+; RV64-NEXT: bgez a3, .LBB8_46
+; RV64-NEXT: j .LBB8_314
+; RV64-NEXT: .LBB8_46: # %else128
+; RV64-NEXT: slli a3, a2, 19
+; RV64-NEXT: bgez a3, .LBB8_47
+; RV64-NEXT: j .LBB8_315
+; RV64-NEXT: .LBB8_47: # %else131
+; RV64-NEXT: slli a3, a2, 18
+; RV64-NEXT: bgez a3, .LBB8_48
+; RV64-NEXT: j .LBB8_316
+; RV64-NEXT: .LBB8_48: # %else134
+; RV64-NEXT: slli a3, a2, 17
+; RV64-NEXT: bgez a3, .LBB8_49
+; RV64-NEXT: j .LBB8_317
+; RV64-NEXT: .LBB8_49: # %else137
+; RV64-NEXT: slli a3, a2, 16
+; RV64-NEXT: bgez a3, .LBB8_50
+; RV64-NEXT: j .LBB8_318
+; RV64-NEXT: .LBB8_50: # %else140
+; RV64-NEXT: slli a3, a2, 15
+; RV64-NEXT: bgez a3, .LBB8_51
+; RV64-NEXT: j .LBB8_319
+; RV64-NEXT: .LBB8_51: # %else143
+; RV64-NEXT: slli a3, a2, 14
+; RV64-NEXT: bgez a3, .LBB8_52
+; RV64-NEXT: j .LBB8_320
+; RV64-NEXT: .LBB8_52: # %else146
+; RV64-NEXT: slli a3, a2, 13
+; RV64-NEXT: bgez a3, .LBB8_53
+; RV64-NEXT: j .LBB8_321
+; RV64-NEXT: .LBB8_53: # %else149
+; RV64-NEXT: slli a3, a2, 12
+; RV64-NEXT: bgez a3, .LBB8_54
+; RV64-NEXT: j .LBB8_322
+; RV64-NEXT: .LBB8_54: # %else152
+; RV64-NEXT: slli a3, a2, 11
+; RV64-NEXT: bgez a3, .LBB8_55
+; RV64-NEXT: j .LBB8_323
+; RV64-NEXT: .LBB8_55: # %else155
+; RV64-NEXT: slli a3, a2, 10
+; RV64-NEXT: bgez a3, .LBB8_56
+; RV64-NEXT: j .LBB8_324
+; RV64-NEXT: .LBB8_56: # %else158
+; RV64-NEXT: slli a3, a2, 9
+; RV64-NEXT: bgez a3, .LBB8_57
+; RV64-NEXT: j .LBB8_325
+; RV64-NEXT: .LBB8_57: # %else161
+; RV64-NEXT: slli a3, a2, 8
+; RV64-NEXT: bgez a3, .LBB8_58
+; RV64-NEXT: j .LBB8_326
+; RV64-NEXT: .LBB8_58: # %else164
+; RV64-NEXT: slli a3, a2, 7
+; RV64-NEXT: bgez a3, .LBB8_59
+; RV64-NEXT: j .LBB8_327
+; RV64-NEXT: .LBB8_59: # %else167
+; RV64-NEXT: slli a3, a2, 6
+; RV64-NEXT: bgez a3, .LBB8_61
+; RV64-NEXT: .LBB8_60: # %cond.store169
+; RV64-NEXT: li a3, 128
+; RV64-NEXT: lui a4, 5
+; RV64-NEXT: addiw a4, a4, 896
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a1, 0(a1)
+; RV64-NEXT: addi a3, a0, 1
+; RV64-NEXT: sb a1, 0(a0)
+; RV64-NEXT: mv a0, a3
+; RV64-NEXT: .LBB8_61: # %else170
+; RV64-NEXT: slli a1, a2, 5
+; RV64-NEXT: lui a3, 5
+; RV64-NEXT: addiw a3, a3, -1206
+; RV64-NEXT: add a3, sp, a3
+; RV64-NEXT: bgez a1, .LBB8_62
+; RV64-NEXT: j .LBB8_328
+; RV64-NEXT: .LBB8_62: # %else173
+; RV64-NEXT: slli a1, a2, 4
+; RV64-NEXT: bgez a1, .LBB8_63
+; RV64-NEXT: j .LBB8_329
+; RV64-NEXT: .LBB8_63: # %else176
+; RV64-NEXT: slli a1, a2, 3
+; RV64-NEXT: bgez a1, .LBB8_64
+; RV64-NEXT: j .LBB8_330
+; RV64-NEXT: .LBB8_64: # %else179
+; RV64-NEXT: slli a1, a2, 2
+; RV64-NEXT: bgez a1, .LBB8_66
+; RV64-NEXT: .LBB8_65: # %cond.store181
+; RV64-NEXT: li a1, 128
+; RV64-NEXT: lui a4, 5
+; RV64-NEXT: addiw a4, a4, 384
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; RV64-NEXT: vse8.v v16, (a4)
+; RV64-NEXT: lbu a...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/83457
More information about the llvm-commits
mailing list