[llvm] [RISCV] Rewrite deinterleave load as vlse optimization as DAG combine (PR #150049)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 22 09:09:01 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-risc-v
Author: Philip Reames (preames)
<details>
<summary>Changes</summary>
This reworks an existing optimization on the fixed vector (shuffle based) deinterleave lowering into a DAG combine. This has the effect of making it kick in much more widely - in pariicularly on the deinterleave intrinsic (i.e. scalable) path, deinterleaveN (without load) lowering, but also the intrinsic lowering paths.
As posted, this is a POC/WIP as the impact here appears wider than I really expected, and my SDAG knowledge is failing me on a couple details (called out explicitly in code comments addressed to reviewers).
The big question I have is whether the impact here is "too big". I don't have much context on the intrinsic API usage, and am unclear if this rewrite applying would be considered acceptable.
(There's also a couple tests which clearly need rewritten to preserve test intent, but I'll do that once the general direction is agreed.)
---
Patch is 210.07 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/150049.diff
10 Files Affected:
- (modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+48)
- (modified) llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp (-22)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll (+3-1)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-segN-load.ll (+18-6)
- (modified) llvm/test/CodeGen/RISCV/rvv/pr141907.ll (+10-8)
- (modified) llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll (+11-6)
- (modified) llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll (+7-4)
- (modified) llvm/test/CodeGen/RISCV/rvv/vlseg-rv32.ll (+390-130)
- (modified) llvm/test/CodeGen/RISCV/rvv/vlseg-rv64.ll (+408-136)
- (modified) llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll (+6-2)
``````````diff
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 4845a9c84e01f..13169adf20e90 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -20760,6 +20760,54 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
}
break;
}
+ case RISCVISD::TUPLE_EXTRACT: {
+ EVT VT = N->getValueType(0);
+ SDValue Tuple = N->getOperand(0);
+ unsigned Idx = N->getConstantOperandVal(1);
+ if (!Tuple.hasOneUse() || Tuple.getOpcode() != ISD::INTRINSIC_W_CHAIN)
+ break;
+
+ unsigned NF = 0;
+ switch (Tuple.getConstantOperandVal(1)) {
+ default: break;
+ case Intrinsic::riscv_vlseg2_mask: NF = 2; break;
+ case Intrinsic::riscv_vlseg3_mask: NF = 3; break;
+ case Intrinsic::riscv_vlseg4_mask: NF = 4; break;
+ case Intrinsic::riscv_vlseg5_mask: NF = 5; break;
+ case Intrinsic::riscv_vlseg6_mask: NF = 6; break;
+ case Intrinsic::riscv_vlseg7_mask: NF = 7; break;
+ case Intrinsic::riscv_vlseg8_mask: NF = 8; break;
+ }
+ if (!NF || Subtarget.hasOptimizedSegmentLoadStore(NF))
+ break;
+
+ // @REVIEWERS - What's the right value to use for the mem size here?
+ unsigned SEW = VT.getScalarSizeInBits();
+ if (Log2_64(SEW) != Tuple.getConstantOperandVal(7))
+ break;
+ unsigned Stride = SEW/8 * NF;
+ SDValue Offset = DAG.getConstant(SEW/8 * Idx, DL, XLenVT);
+
+ SDValue Ops[] = {
+ /*Chain=*/Tuple.getOperand(0),
+ /*IntID=*/DAG.getTargetConstant(Intrinsic::riscv_vlse_mask, DL, XLenVT),
+ /*Passthru=*/Tuple.getOperand(2),
+ /*Ptr=*/DAG.getNode(ISD::ADD, DL, XLenVT, Tuple.getOperand(3), Offset),
+ /*Stride=*/DAG.getConstant(Stride, DL, XLenVT),
+ /*Mask=*/Tuple.getOperand(4),
+ /*VL=*/Tuple.getOperand(5),
+ /*Policy=*/Tuple.getOperand(6)
+ };
+
+ SDVTList VTs = DAG.getVTList({VT, MVT::Other});
+ // @REVIEWERS - What's the right MemVT and MMO to use here?
+ SDValue Result =
+ DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,
+ cast<MemIntrinsicSDNode>(Tuple)->getMemoryVT(),
+ cast<MemIntrinsicSDNode>(Tuple)->getMemOperand());
+ SDValue Chain = Result.getValue(1);
+ return DAG.getMergeValues({Result, Chain}, DL);
+ }
}
return SDValue();
diff --git a/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp b/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp
index 25817b6d2707f..d1f0184e3cf8a 100644
--- a/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp
@@ -216,28 +216,6 @@ bool RISCVTargetLowering::lowerInterleavedLoad(
if (!isLegalInterleavedAccessType(VTy, Factor, Alignment, AS, DL))
return false;
- // If the segment load is going to be performed segment at a time anyways
- // and there's only one element used, use a strided load instead. This
- // will be equally fast, and create less vector register pressure.
- if (Indices.size() == 1 && !Subtarget.hasOptimizedSegmentLoadStore(Factor)) {
- unsigned ScalarSizeInBytes = DL.getTypeStoreSize(VTy->getElementType());
- Value *Stride = ConstantInt::get(XLenTy, Factor * ScalarSizeInBytes);
- Value *Offset = ConstantInt::get(XLenTy, Indices[0] * ScalarSizeInBytes);
- Value *BasePtr = Builder.CreatePtrAdd(Ptr, Offset);
- // Note: Same VL as above, but i32 not xlen due to signature of
- // vp.strided.load
- VL = Builder.CreateElementCount(Builder.getInt32Ty(),
- VTy->getElementCount());
- CallInst *CI =
- Builder.CreateIntrinsic(Intrinsic::experimental_vp_strided_load,
- {VTy, BasePtr->getType(), Stride->getType()},
- {BasePtr, Stride, Mask, VL});
- CI->addParamAttr(0,
- Attribute::getWithAlignment(CI->getContext(), Alignment));
- Shuffles[0]->replaceAllUsesWith(CI);
- return true;
- };
-
CallInst *VlsegN = Builder.CreateIntrinsic(
FixedVlsegIntrIds[Factor - 2], {VTy, PtrTy, XLenTy}, {Ptr, Mask, VL});
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll
index 807651c9b40c6..0ec7a7e9f7f38 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll
@@ -260,8 +260,10 @@ define {<2 x double>, <2 x double>} @vector_deinterleave_load_v2f64_v4f64(ptr %p
define { <8 x i8>, <8 x i8>, <8 x i8> } @vector_deinterleave_load_factor3(ptr %p) {
; CHECK-LABEL: vector_deinterleave_load_factor3:
; CHECK: # %bb.0:
+; CHECK-NEXT: addi a0, a0, 2
+; CHECK-NEXT: li a1, 3
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT: vlseg3e8.v v6, (a0)
+; CHECK-NEXT: vlse8.v v8, (a0), a1
; CHECK-NEXT: ret
%vec = load <24 x i8>, ptr %p
%d0 = call {<8 x i8>, <8 x i8>, <8 x i8>} @llvm.vector.deinterleave3(<24 x i8> %vec)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-segN-load.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-segN-load.ll
index 4eed3df0d3f16..46c15fbb67319 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-segN-load.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-segN-load.ll
@@ -16,8 +16,10 @@ define <8 x i8> @load_factor2(ptr %ptr) {
define <8 x i8> @load_factor3(ptr %ptr) {
; CHECK-LABEL: load_factor3:
; CHECK: # %bb.0:
+; CHECK-NEXT: addi a0, a0, 2
+; CHECK-NEXT: li a1, 3
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT: vlseg3e8.v v6, (a0)
+; CHECK-NEXT: vlse8.v v8, (a0), a1
; CHECK-NEXT: ret
%1 = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg3.load.mask.v8i8.i64(ptr %ptr, <8 x i1> splat (i1 true), i64 8)
%2 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %1, 0
@@ -29,8 +31,10 @@ define <8 x i8> @load_factor3(ptr %ptr) {
define <8 x i8> @load_factor4(ptr %ptr) {
; CHECK-LABEL: load_factor4:
; CHECK: # %bb.0:
+; CHECK-NEXT: addi a0, a0, 3
+; CHECK-NEXT: li a1, 4
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT: vlseg4e8.v v5, (a0)
+; CHECK-NEXT: vlse8.v v8, (a0), a1
; CHECK-NEXT: ret
%1 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg4.load.mask.v8i8.i64(ptr %ptr, <8 x i1> splat (i1 true), i64 8)
%2 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 0
@@ -43,8 +47,10 @@ define <8 x i8> @load_factor4(ptr %ptr) {
define <8 x i8> @load_factor5(ptr %ptr) {
; CHECK-LABEL: load_factor5:
; CHECK: # %bb.0:
+; CHECK-NEXT: addi a0, a0, 4
+; CHECK-NEXT: li a1, 5
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT: vlseg5e8.v v4, (a0)
+; CHECK-NEXT: vlse8.v v8, (a0), a1
; CHECK-NEXT: ret
%1 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg5.load.mask.v8i8.i64(ptr %ptr, <8 x i1> splat (i1 true), i64 8)
%2 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 0
@@ -58,8 +64,10 @@ define <8 x i8> @load_factor5(ptr %ptr) {
define <8 x i8> @load_factor6(ptr %ptr) {
; CHECK-LABEL: load_factor6:
; CHECK: # %bb.0:
+; CHECK-NEXT: addi a0, a0, 5
+; CHECK-NEXT: li a1, 6
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT: vlseg6e8.v v3, (a0)
+; CHECK-NEXT: vlse8.v v8, (a0), a1
; CHECK-NEXT: ret
%1 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg6.load.mask.v8i8.i64(ptr %ptr, <8 x i1> splat (i1 true), i64 8)
%2 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 0
@@ -74,8 +82,10 @@ define <8 x i8> @load_factor6(ptr %ptr) {
define <8 x i8> @load_factor7(ptr %ptr) {
; CHECK-LABEL: load_factor7:
; CHECK: # %bb.0:
+; CHECK-NEXT: addi a0, a0, 6
+; CHECK-NEXT: li a1, 7
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT: vlseg7e8.v v2, (a0)
+; CHECK-NEXT: vlse8.v v8, (a0), a1
; CHECK-NEXT: ret
%1 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg7.load.mask.v8i8.i64(ptr %ptr, <8 x i1> splat (i1 true), i64 8)
%2 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 0
@@ -91,8 +101,10 @@ define <8 x i8> @load_factor7(ptr %ptr) {
define <8 x i8> @load_factor8(ptr %ptr) {
; CHECK-LABEL: load_factor8:
; CHECK: # %bb.0:
+; CHECK-NEXT: addi a0, a0, 7
+; CHECK-NEXT: li a1, 8
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT: vlseg8e8.v v1, (a0)
+; CHECK-NEXT: vlse8.v v8, (a0), a1
; CHECK-NEXT: ret
%1 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg8.load.mask.v8i8.i64(ptr %ptr, <8 x i1> splat (i1 true), i64 8)
%2 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 0
diff --git a/llvm/test/CodeGen/RISCV/rvv/pr141907.ll b/llvm/test/CodeGen/RISCV/rvv/pr141907.ll
index 648b47dc440c3..f93f88a5bc06c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/pr141907.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/pr141907.ll
@@ -9,27 +9,29 @@ define void @pr141907(ptr %0) nounwind {
; CHECK-NEXT: slli a1, a1, 2
; CHECK-NEXT: sub sp, sp, a1
; CHECK-NEXT: vsetivli zero, 0, e32, m1, ta, ma
-; CHECK-NEXT: vmv.v.i v9, 0
+; CHECK-NEXT: vmv.v.i v8, 0
; CHECK-NEXT: vmclr.m v0
; CHECK-NEXT: li a1, 0
-; CHECK-NEXT: vsetvli a3, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vmv.v.i v12, 0
+; CHECK-NEXT: vsetvli a5, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vmv.v.i v10, 0
; CHECK-NEXT: addi a2, sp, 16
+; CHECK-NEXT: addi a3, sp, 20
+; CHECK-NEXT: li a4, 12
; CHECK-NEXT: .LBB0_1: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vs4r.v v8, (a2)
; CHECK-NEXT: vsetvli a1, a1, e8, mf8, ta, ma
; CHECK-NEXT: vsetivli zero, 0, e16, mf2, ta, ma
-; CHECK-NEXT: vnsrl.wi v11, v9, 0, v0.t
-; CHECK-NEXT: vsetvli a3, zero, e32, m1, ta, ma
-; CHECK-NEXT: vlseg3e32.v v8, (a2)
+; CHECK-NEXT: vnsrl.wi v9, v8, 0, v0.t
+; CHECK-NEXT: vsetvli a5, zero, e32, m1, ta, ma
+; CHECK-NEXT: vlse32.v v8, (a3), a4
; CHECK-NEXT: vsetivli zero, 0, e16, mf2, ta, ma
-; CHECK-NEXT: vsseg2e16.v v11, (zero)
+; CHECK-NEXT: vsseg2e16.v v9, (zero)
; CHECK-NEXT: bnez a1, .LBB0_1
; CHECK-NEXT: .LBB0_2: # %while.body5
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; CHECK-NEXT: vse16.v v9, (a0)
+; CHECK-NEXT: vse16.v v8, (a0)
; CHECK-NEXT: j .LBB0_2
entry:
br label %vector.body
diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll
index f9f0aa67a9034..688a44324c09a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll
@@ -371,8 +371,10 @@ define {<vscale x 2 x ptr>, <vscale x 2 x ptr>} @vector_deinterleave_load_nxv2p0
define { <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8> } @vector_deinterleave_load_factor3(ptr %p) {
; CHECK-LABEL: vector_deinterleave_load_factor3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
-; CHECK-NEXT: vlseg3e8.v v6, (a0)
+; CHECK-NEXT: addi a0, a0, 2
+; CHECK-NEXT: li a1, 3
+; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma
+; CHECK-NEXT: vlse8.v v8, (a0), a1
; CHECK-NEXT: ret
%vec = load <vscale x 24 x i8>, ptr %p
%d0 = call {<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>} @llvm.vector.deinterleave3(<vscale x 24 x i8> %vec)
@@ -407,8 +409,9 @@ define { <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x
define <vscale x 8 x i8> @vector_deinterleave_load_factor4_oneactive(ptr %p) {
; CHECK-LABEL: vector_deinterleave_load_factor4_oneactive:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
-; CHECK-NEXT: vlseg4e8.v v8, (a0)
+; CHECK-NEXT: li a1, 4
+; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma
+; CHECK-NEXT: vlse8.v v8, (a0), a1
; CHECK-NEXT: ret
%vec = load <vscale x 32 x i8>, ptr %p
%d0 = call { <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8> } @llvm.vector.deinterleave4(<vscale x 32 x i8> %vec)
@@ -419,8 +422,10 @@ define <vscale x 8 x i8> @vector_deinterleave_load_factor4_oneactive(ptr %p) {
define <vscale x 8 x i8> @vector_deinterleave_load_factor4_oneactive2(ptr %p) {
; CHECK-LABEL: vector_deinterleave_load_factor4_oneactive2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
-; CHECK-NEXT: vlseg4e8.v v5, (a0)
+; CHECK-NEXT: addi a0, a0, 3
+; CHECK-NEXT: li a1, 4
+; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma
+; CHECK-NEXT: vlse8.v v8, (a0), a1
; CHECK-NEXT: ret
%vec = load <vscale x 32 x i8>, ptr %p
%d0 = call { <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8> } @llvm.vector.deinterleave4(<vscale x 32 x i8> %vec)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll
index 0a96e4ff7fba7..a84608b55f0fc 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll
@@ -3712,8 +3712,9 @@ define <vscale x 1 x float> @vector_deinterleave_nxv1f32_nxv8f32_oneactive(<vsca
; CHECK-NEXT: sub sp, sp, a0
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vs4r.v v8, (a0)
-; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
-; CHECK-NEXT: vlseg8e32.v v8, (a0)
+; CHECK-NEXT: li a1, 32
+; CHECK-NEXT: vsetvli a2, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vlse32.v v8, (a0), a1
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 2
; CHECK-NEXT: add sp, sp, a0
@@ -3732,9 +3733,11 @@ define <vscale x 1 x float> @vector_deinterleave_nxv1f32_nxv8f32_oneactive2(<vsc
; CHECK-NEXT: slli a0, a0, 2
; CHECK-NEXT: sub sp, sp, a0
; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: addi a1, sp, 36
; CHECK-NEXT: vs4r.v v8, (a0)
-; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
-; CHECK-NEXT: vlseg8e32.v v3, (a0)
+; CHECK-NEXT: li a0, 32
+; CHECK-NEXT: vsetvli a2, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vlse32.v v8, (a1), a0
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 2
; CHECK-NEXT: add sp, sp, a0
diff --git a/llvm/test/CodeGen/RISCV/rvv/vlseg-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vlseg-rv32.ll
index e6a98c90037d6..071176ca946a7 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vlseg-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vlseg-rv32.ll
@@ -182,8 +182,10 @@ entry:
define <vscale x 1 x i8> @test_vlseg3_mask_nxv1i8_triscv.vector.tuple_nxv1i8_3t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg3_mask_nxv1i8_triscv.vector.tuple_nxv1i8_3t:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi a0, a0, 1
+; CHECK-NEXT: li a2, 3
; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
-; CHECK-NEXT: vlseg3e8.v v7, (a0), v0.t
+; CHECK-NEXT: vlse8.v v8, (a0), a2, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv1i8_3t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 3) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 3)
@@ -209,8 +211,10 @@ entry:
define <vscale x 2 x i8> @test_vlseg3_mask_nxv2i8_triscv.vector.tuple_nxv2i8_3t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: test_vlseg3_mask_nxv2i8_triscv.vector.tuple_nxv2i8_3t:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi a0, a0, 1
+; CHECK-NEXT: li a2, 3
; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
-; CHECK-NEXT: vlseg3e8.v v7, (a0), v0.t
+; CHECK-NEXT: vlse8.v v8, (a0), a2, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv2i8_3t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 3)
@@ -236,8 +240,10 @@ entry:
define <vscale x 4 x i8> @test_vlseg3_mask_nxv4i8_triscv.vector.tuple_nxv4i8_3t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) {
; CHECK-LABEL: test_vlseg3_mask_nxv4i8_triscv.vector.tuple_nxv4i8_3t:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi a0, a0, 1
+; CHECK-NEXT: li a2, 3
; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
-; CHECK-NEXT: vlseg3e8.v v7, (a0), v0.t
+; CHECK-NEXT: vlse8.v v8, (a0), a2, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv4i8_3t.nxv4i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 3)
@@ -263,8 +269,10 @@ entry:
define <vscale x 8 x i8> @test_vlseg3_mask_nxv8i8_triscv.vector.tuple_nxv8i8_3t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) {
; CHECK-LABEL: test_vlseg3_mask_nxv8i8_triscv.vector.tuple_nxv8i8_3t:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi a0, a0, 1
+; CHECK-NEXT: li a2, 3
; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
-; CHECK-NEXT: vlseg3e8.v v7, (a0), v0.t
+; CHECK-NEXT: vlse8.v v8, (a0), a2, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv8i8_3t.nxv8i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) undef, ptr %base, <vscale x 8 x i1> %mask, i32 %vl, i32 1, i32 3)
@@ -290,8 +298,10 @@ entry:
define <vscale x 16 x i8> @test_vlseg3_mask_nxv16i8_triscv.vector.tuple_nxv16i8_3t(ptr %base, i32 %vl, <vscale x 16 x i1> %mask) {
; CHECK-LABEL: test_vlseg3_mask_nxv16i8_triscv.vector.tuple_nxv16i8_3t:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi a0, a0, 1
+; CHECK-NEXT: li a2, 3
; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
-; CHECK-NEXT: vlseg3e8.v v6, (a0), v0.t
+; CHECK-NEXT: vlse8.v v8, (a0), a2, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv16i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 16 x i1> %mask, i32 %vl, i32 1, i32 3)
@@ -317,8 +327,10 @@ entry:
define <vscale x 1 x i8> @test_vlseg4_mask_nxv1i8_triscv.vector.tuple_nxv1i8_4t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg4_mask_nxv1i8_triscv.vector.tuple_nxv1i8_4t:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi a0, a0, 1
+; CHECK-NEXT: li a2, 4
; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
-; CHECK-NEXT: vlseg4e8.v v7, (a0), v0.t
+; CHECK-NEXT: vlse8.v v8, (a0), a2, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv1i8_4t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 4) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 3)
@@ -344,8 +356,10 @@ entry:
define <vscale x 2 x i8> @test_vlseg4_mask_nxv2i8_triscv.vector.tuple_nxv2i8_4t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: test_vlseg4_mask_nxv2i8_triscv.vector.tuple_nxv2i8_4t:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi a0, a0, 1
+; CHECK-NEXT: li a2, 4
; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
-; CHECK-NEXT: vlseg4e8.v v7, (a0), v0.t
+; CHECK-NEXT: vlse8.v v8, (a0), a2, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv2i8_4t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 4) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 3)
@@ -371,8 +385,10 @@ entry:
define <vscale x 4 x i8> @test_vlseg4_mask_nxv4i8_triscv.vector.tuple_nxv4i8_4t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) {
; CHECK-LABEL: test_vlseg4_mask_nxv4i8_triscv.vector.tuple_nxv4i8_4t:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi a0, a0, 1
+; CHECK-NEXT: li a2, 4
; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
-; CHECK-NEXT: vlseg4e8.v v7, (a0), v0.t
+; CHECK-NEXT: vlse8.v v8, (a0), a2, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/150049
More information about the llvm-commits
mailing list