[llvm] [RISCV] Don't increase vslide or splat vl if +minimize-vl is present (PR #147089)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 4 09:57:42 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-risc-v
Author: Luke Lau (lukel97)
<details>
<summary>Changes</summary>
Stacked on #<!-- -->146746
If the subtarget's latency is dependent on vl, then we shouldn't try to fold away vsetvli toggles if it means increasing vl.
---
Patch is 371.42 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/147089.diff
10 Files Affected:
- (modified) llvm/lib/Target/RISCV/RISCVFeatures.td (+4)
- (modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+4-4)
- (modified) llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp (+4-3)
- (modified) llvm/lib/Target/RISCV/RISCVProcessors.td (+2-1)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vp-splice.ll (+403-201)
- (modified) llvm/test/CodeGen/RISCV/rvv/vector-splice.ll (+3086-1515)
- (modified) llvm/test/CodeGen/RISCV/rvv/vp-splice-mask-fixed-vectors.ll (+458-226)
- (modified) llvm/test/CodeGen/RISCV/rvv/vp-splice-mask-vectors.ll (+802-396)
- (modified) llvm/test/CodeGen/RISCV/rvv/vp-splice.ll (+447-218)
- (modified) llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll (+112-2)
``````````diff
diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td
index 6e103dd7d8c44..d6cb1aa95ad5b 100644
--- a/llvm/lib/Target/RISCV/RISCVFeatures.td
+++ b/llvm/lib/Target/RISCV/RISCVFeatures.td
@@ -1695,6 +1695,10 @@ foreach nf = {2-8} in
"true", "vlseg"#nf#"eN.v and vsseg"#nf#"eN.v are "
"implemented as a wide memory op and shuffle">;
+def TuneMinimizeVL
+ : SubtargetFeature<"minimize-vl", "MinimizeVL", "true",
+ "Prefer reducing vl even it requires more vsetvli instructions">;
+
def Experimental
: SubtargetFeature<"experimental", "HasExperimental",
"true", "Experimental intrinsics">;
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 326dd7149ef96..1ba8aba13f8d3 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -12329,9 +12329,9 @@ SDValue RISCVTargetLowering::lowerVECTOR_SPLICE(SDValue Op,
SDValue TrueMask = getAllOnesMask(VecVT, VLMax, DL, DAG);
- SDValue SlideDown =
- getVSlidedown(DAG, Subtarget, DL, VecVT, DAG.getUNDEF(VecVT), V1,
- DownOffset, TrueMask, UpOffset);
+ SDValue SlideDown = getVSlidedown(
+ DAG, Subtarget, DL, VecVT, DAG.getUNDEF(VecVT), V1, DownOffset, TrueMask,
+ Subtarget.minimizeVL() ? UpOffset : DAG.getRegister(RISCV::X0, XLenVT));
return getVSlideup(DAG, Subtarget, DL, VecVT, SlideDown, V2, UpOffset,
TrueMask, DAG.getRegister(RISCV::X0, XLenVT),
RISCVVType::TAIL_AGNOSTIC);
@@ -13355,7 +13355,7 @@ RISCVTargetLowering::lowerVPSpliceExperimental(SDValue Op,
if (ImmValue != 0)
Op1 = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
DAG.getUNDEF(ContainerVT), Op1, DownOffset, Mask,
- UpOffset);
+ Subtarget.minimizeVL() ? UpOffset : EVL2);
SDValue Result = getVSlideup(DAG, Subtarget, DL, ContainerVT, Op1, Op2,
UpOffset, Mask, EVL2, RISCVVType::TAIL_AGNOSTIC);
diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
index 78d64ea67324f..88461e8461038 100644
--- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
@@ -421,10 +421,11 @@ DemandedFields getDemanded(const MachineInstr &MI, const RISCVSubtarget *ST) {
// * We can't modify SEW here since the slide amount is in units of SEW.
// * VL=1 is special only because we have existing support for zero vs
// non-zero VL. We could generalize this if we had a VL > C predicate.
- // * The LMUL1 restriction is for machines whose latency may depend on VL.
+ // * The LMUL1 restriction is for machines whose latency may depend on LMUL.
// * As above, this is only legal for tail "undefined" not "agnostic".
+ // * We avoid increasing vl if the subtarget has +minimize-vl
if (RISCVInstrInfo::isVSlideInstr(MI) && VLOp.isImm() &&
- VLOp.getImm() == 1 && hasUndefinedPassthru(MI)) {
+ VLOp.getImm() == 1 && hasUndefinedPassthru(MI) && !ST->minimizeVL()) {
Res.VLAny = false;
Res.VLZeroness = true;
Res.LMUL = DemandedFields::LMULLessThanOrEqualToM1;
@@ -438,7 +439,7 @@ DemandedFields getDemanded(const MachineInstr &MI, const RISCVSubtarget *ST) {
// careful to not increase the number of active vector registers (unlike for
// vmv.s.x.)
if (RISCVInstrInfo::isScalarSplatInstr(MI) && VLOp.isImm() &&
- VLOp.getImm() == 1 && hasUndefinedPassthru(MI)) {
+ VLOp.getImm() == 1 && hasUndefinedPassthru(MI) && !ST->minimizeVL()) {
Res.LMUL = DemandedFields::LMULLessThanOrEqualToM1;
Res.SEWLMULRatio = false;
Res.VLAny = false;
diff --git a/llvm/lib/Target/RISCV/RISCVProcessors.td b/llvm/lib/Target/RISCV/RISCVProcessors.td
index 57b415dc713ac..f4f31e25bbde7 100644
--- a/llvm/lib/Target/RISCV/RISCVProcessors.td
+++ b/llvm/lib/Target/RISCV/RISCVProcessors.td
@@ -274,7 +274,8 @@ def SIFIVE_U74 : RISCVProcessorModel<"sifive-u74",
defvar SiFiveIntelligenceTuneFeatures = !listconcat(SiFive7TuneFeatures,
[TuneDLenFactor2,
TuneOptimizedZeroStrideLoad,
- TuneOptimizedNF2SegmentLoadStore]);
+ TuneOptimizedNF2SegmentLoadStore,
+ TuneMinimizeVL]);
def SIFIVE_X280 : RISCVProcessorModel<"sifive-x280", SiFive7Model,
[Feature64Bit,
FeatureStdExtI,
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vp-splice.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vp-splice.ll
index 8160e62a43106..ba9f950390a52 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vp-splice.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vp-splice.ll
@@ -1,32 +1,52 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple riscv64 -mattr=+f,+d,+v,+zvfh,+zvfbfmin -verify-machineinstrs -riscv-v-vector-bits-min=128 \
-; RUN: < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple riscv64 -mattr=+f,+d,+v,+zvfhmin,+zvfbfmin -verify-machineinstrs -riscv-v-vector-bits-min=128 \
-; RUN: < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple riscv64 -mattr=+f,+d,+v,+zfh,+zfbfmin,+zvfh,+zvfbfmin -verify-machineinstrs \
+; RUN: < %s | FileCheck %s --check-prefixes=CHECK,NOMINVL,ZVFH
+; RUN: llc -mtriple riscv64 -mattr=+f,+d,+v,+zfh,+zfbfmin,+zvfhmin,+zvfbfmin -verify-machineinstrs \
+; RUN: < %s | FileCheck %s --check-prefixes=CHECK,NOMINVL,ZVFHMIN
+; RUN: llc -mtriple riscv64 -mattr=+f,+d,+v,+zfh,+zfbfmin,+zvfh,+zvfbfmin,+minimize-vl -verify-machineinstrs \
+; RUN: < %s | FileCheck %s --check-prefixes=CHECK,MINVL,ZVFH
+; RUN: llc -mtriple riscv64 -mattr=+f,+d,+v,+zfh,+zfbfmin,+zvfhmin,+zvfbfmin,+minimize-vl -verify-machineinstrs \
+; RUN: < %s | FileCheck %s --check-prefixes=CHECK,MINVL,ZVFHMIN
define <2 x i64> @test_vp_splice_v2i64(<2 x i64> %va, <2 x i64> %vb, i32 zeroext %evla, i32 zeroext %evlb) {
-; CHECK-LABEL: test_vp_splice_v2i64:
-; CHECK: # %bb.0:
-; CHECK-NEXT: addi a0, a0, -5
-; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
-; CHECK-NEXT: vslidedown.vi v8, v8, 5
-; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
-; CHECK-NEXT: vslideup.vx v8, v9, a0
-; CHECK-NEXT: ret
+; NOMINVL-LABEL: test_vp_splice_v2i64:
+; NOMINVL: # %bb.0:
+; NOMINVL-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; NOMINVL-NEXT: vslidedown.vi v8, v8, 5
+; NOMINVL-NEXT: addi a0, a0, -5
+; NOMINVL-NEXT: vslideup.vx v8, v9, a0
+; NOMINVL-NEXT: ret
+;
+; MINVL-LABEL: test_vp_splice_v2i64:
+; MINVL: # %bb.0:
+; MINVL-NEXT: addi a0, a0, -5
+; MINVL-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; MINVL-NEXT: vslidedown.vi v8, v8, 5
+; MINVL-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; MINVL-NEXT: vslideup.vx v8, v9, a0
+; MINVL-NEXT: ret
%v = call <2 x i64> @llvm.experimental.vp.splice.v2i64(<2 x i64> %va, <2 x i64> %vb, i32 5, <2 x i1> splat (i1 1), i32 %evla, i32 %evlb)
ret <2 x i64> %v
}
define <2 x i64> @test_vp_splice_v2i64_negative_offset(<2 x i64> %va, <2 x i64> %vb, i32 zeroext %evla, i32 zeroext %evlb) {
-; CHECK-LABEL: test_vp_splice_v2i64_negative_offset:
-; CHECK: # %bb.0:
-; CHECK-NEXT: addi a0, a0, -5
-; CHECK-NEXT: vsetivli zero, 5, e64, m1, ta, ma
-; CHECK-NEXT: vslidedown.vx v8, v8, a0
-; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
-; CHECK-NEXT: vslideup.vi v8, v9, 5
-; CHECK-NEXT: ret
+; NOMINVL-LABEL: test_vp_splice_v2i64_negative_offset:
+; NOMINVL: # %bb.0:
+; NOMINVL-NEXT: addi a0, a0, -5
+; NOMINVL-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; NOMINVL-NEXT: vslidedown.vx v8, v8, a0
+; NOMINVL-NEXT: vslideup.vi v8, v9, 5
+; NOMINVL-NEXT: ret
+;
+; MINVL-LABEL: test_vp_splice_v2i64_negative_offset:
+; MINVL: # %bb.0:
+; MINVL-NEXT: addi a0, a0, -5
+; MINVL-NEXT: vsetivli zero, 5, e64, m1, ta, ma
+; MINVL-NEXT: vslidedown.vx v8, v8, a0
+; MINVL-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; MINVL-NEXT: vslideup.vi v8, v9, 5
+; MINVL-NEXT: ret
%v = call <2 x i64> @llvm.experimental.vp.splice.v2i64(<2 x i64> %va, <2 x i64> %vb, i32 -5, <2 x i1> splat (i1 1), i32 %evla, i32 %evlb)
ret <2 x i64> %v
@@ -44,260 +64,419 @@ define <2 x i64> @test_vp_splice_v2i64_zero_offset(<2 x i64> %va, <2 x i64> %vb,
}
define <2 x i64> @test_vp_splice_v2i64_masked(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %mask, i32 zeroext %evla, i32 zeroext %evlb) {
-; CHECK-LABEL: test_vp_splice_v2i64_masked:
-; CHECK: # %bb.0:
-; CHECK-NEXT: addi a0, a0, -5
-; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
-; CHECK-NEXT: vslidedown.vi v8, v8, 5, v0.t
-; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu
-; CHECK-NEXT: vslideup.vx v8, v9, a0, v0.t
-; CHECK-NEXT: ret
+; NOMINVL-LABEL: test_vp_splice_v2i64_masked:
+; NOMINVL: # %bb.0:
+; NOMINVL-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; NOMINVL-NEXT: vslidedown.vi v8, v8, 5, v0.t
+; NOMINVL-NEXT: addi a0, a0, -5
+; NOMINVL-NEXT: vsetvli zero, zero, e64, m1, ta, mu
+; NOMINVL-NEXT: vslideup.vx v8, v9, a0, v0.t
+; NOMINVL-NEXT: ret
+;
+; MINVL-LABEL: test_vp_splice_v2i64_masked:
+; MINVL: # %bb.0:
+; MINVL-NEXT: addi a0, a0, -5
+; MINVL-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; MINVL-NEXT: vslidedown.vi v8, v8, 5, v0.t
+; MINVL-NEXT: vsetvli zero, a1, e64, m1, ta, mu
+; MINVL-NEXT: vslideup.vx v8, v9, a0, v0.t
+; MINVL-NEXT: ret
%v = call <2 x i64> @llvm.experimental.vp.splice.v2i64(<2 x i64> %va, <2 x i64> %vb, i32 5, <2 x i1> %mask, i32 %evla, i32 %evlb)
ret <2 x i64> %v
}
define <4 x i32> @test_vp_splice_v4i32(<4 x i32> %va, <4 x i32> %vb, i32 zeroext %evla, i32 zeroext %evlb) {
-; CHECK-LABEL: test_vp_splice_v4i32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: addi a0, a0, -5
-; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT: vslidedown.vi v8, v8, 5
-; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT: vslideup.vx v8, v9, a0
-; CHECK-NEXT: ret
+; NOMINVL-LABEL: test_vp_splice_v4i32:
+; NOMINVL: # %bb.0:
+; NOMINVL-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; NOMINVL-NEXT: vslidedown.vi v8, v8, 5
+; NOMINVL-NEXT: addi a0, a0, -5
+; NOMINVL-NEXT: vslideup.vx v8, v9, a0
+; NOMINVL-NEXT: ret
+;
+; MINVL-LABEL: test_vp_splice_v4i32:
+; MINVL: # %bb.0:
+; MINVL-NEXT: addi a0, a0, -5
+; MINVL-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; MINVL-NEXT: vslidedown.vi v8, v8, 5
+; MINVL-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; MINVL-NEXT: vslideup.vx v8, v9, a0
+; MINVL-NEXT: ret
%v = call <4 x i32> @llvm.experimental.vp.splice.v4i32(<4 x i32> %va, <4 x i32> %vb, i32 5, <4 x i1> splat (i1 1), i32 %evla, i32 %evlb)
ret <4 x i32> %v
}
define <4 x i32> @test_vp_splice_v4i32_negative_offset(<4 x i32> %va, <4 x i32> %vb, i32 zeroext %evla, i32 zeroext %evlb) {
-; CHECK-LABEL: test_vp_splice_v4i32_negative_offset:
-; CHECK: # %bb.0:
-; CHECK-NEXT: addi a0, a0, -5
-; CHECK-NEXT: vsetivli zero, 5, e32, m1, ta, ma
-; CHECK-NEXT: vslidedown.vx v8, v8, a0
-; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT: vslideup.vi v8, v9, 5
-; CHECK-NEXT: ret
+; NOMINVL-LABEL: test_vp_splice_v4i32_negative_offset:
+; NOMINVL: # %bb.0:
+; NOMINVL-NEXT: addi a0, a0, -5
+; NOMINVL-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; NOMINVL-NEXT: vslidedown.vx v8, v8, a0
+; NOMINVL-NEXT: vslideup.vi v8, v9, 5
+; NOMINVL-NEXT: ret
+;
+; MINVL-LABEL: test_vp_splice_v4i32_negative_offset:
+; MINVL: # %bb.0:
+; MINVL-NEXT: addi a0, a0, -5
+; MINVL-NEXT: vsetivli zero, 5, e32, m1, ta, ma
+; MINVL-NEXT: vslidedown.vx v8, v8, a0
+; MINVL-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; MINVL-NEXT: vslideup.vi v8, v9, 5
+; MINVL-NEXT: ret
%v = call <4 x i32> @llvm.experimental.vp.splice.v4i32(<4 x i32> %va, <4 x i32> %vb, i32 -5, <4 x i1> splat (i1 1), i32 %evla, i32 %evlb)
ret <4 x i32> %v
}
define <4 x i32> @test_vp_splice_v4i32_masked(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %mask, i32 zeroext %evla, i32 zeroext %evlb) {
-; CHECK-LABEL: test_vp_splice_v4i32_masked:
-; CHECK: # %bb.0:
-; CHECK-NEXT: addi a0, a0, -5
-; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT: vslidedown.vi v8, v8, 5, v0.t
-; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu
-; CHECK-NEXT: vslideup.vx v8, v9, a0, v0.t
-; CHECK-NEXT: ret
+; NOMINVL-LABEL: test_vp_splice_v4i32_masked:
+; NOMINVL: # %bb.0:
+; NOMINVL-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; NOMINVL-NEXT: vslidedown.vi v8, v8, 5, v0.t
+; NOMINVL-NEXT: addi a0, a0, -5
+; NOMINVL-NEXT: vsetvli zero, zero, e32, m1, ta, mu
+; NOMINVL-NEXT: vslideup.vx v8, v9, a0, v0.t
+; NOMINVL-NEXT: ret
+;
+; MINVL-LABEL: test_vp_splice_v4i32_masked:
+; MINVL: # %bb.0:
+; MINVL-NEXT: addi a0, a0, -5
+; MINVL-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; MINVL-NEXT: vslidedown.vi v8, v8, 5, v0.t
+; MINVL-NEXT: vsetvli zero, a1, e32, m1, ta, mu
+; MINVL-NEXT: vslideup.vx v8, v9, a0, v0.t
+; MINVL-NEXT: ret
%v = call <4 x i32> @llvm.experimental.vp.splice.v4i32(<4 x i32> %va, <4 x i32> %vb, i32 5, <4 x i1> %mask, i32 %evla, i32 %evlb)
ret <4 x i32> %v
}
define <8 x i16> @test_vp_splice_v8i16(<8 x i16> %va, <8 x i16> %vb, i32 zeroext %evla, i32 zeroext %evlb) {
-; CHECK-LABEL: test_vp_splice_v8i16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: addi a0, a0, -5
-; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT: vslidedown.vi v8, v8, 5
-; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vslideup.vx v8, v9, a0
-; CHECK-NEXT: ret
+; NOMINVL-LABEL: test_vp_splice_v8i16:
+; NOMINVL: # %bb.0:
+; NOMINVL-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; NOMINVL-NEXT: vslidedown.vi v8, v8, 5
+; NOMINVL-NEXT: addi a0, a0, -5
+; NOMINVL-NEXT: vslideup.vx v8, v9, a0
+; NOMINVL-NEXT: ret
+;
+; MINVL-LABEL: test_vp_splice_v8i16:
+; MINVL: # %bb.0:
+; MINVL-NEXT: addi a0, a0, -5
+; MINVL-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; MINVL-NEXT: vslidedown.vi v8, v8, 5
+; MINVL-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; MINVL-NEXT: vslideup.vx v8, v9, a0
+; MINVL-NEXT: ret
%v = call <8 x i16> @llvm.experimental.vp.splice.v8i16(<8 x i16> %va, <8 x i16> %vb, i32 5, <8 x i1> splat (i1 1), i32 %evla, i32 %evlb)
ret <8 x i16> %v
}
define <8 x i16> @test_vp_splice_v8i16_negative_offset(<8 x i16> %va, <8 x i16> %vb, i32 zeroext %evla, i32 zeroext %evlb) {
-; CHECK-LABEL: test_vp_splice_v8i16_negative_offset:
-; CHECK: # %bb.0:
-; CHECK-NEXT: addi a0, a0, -5
-; CHECK-NEXT: vsetivli zero, 5, e16, m1, ta, ma
-; CHECK-NEXT: vslidedown.vx v8, v8, a0
-; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vslideup.vi v8, v9, 5
-; CHECK-NEXT: ret
+; NOMINVL-LABEL: test_vp_splice_v8i16_negative_offset:
+; NOMINVL: # %bb.0:
+; NOMINVL-NEXT: addi a0, a0, -5
+; NOMINVL-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; NOMINVL-NEXT: vslidedown.vx v8, v8, a0
+; NOMINVL-NEXT: vslideup.vi v8, v9, 5
+; NOMINVL-NEXT: ret
+;
+; MINVL-LABEL: test_vp_splice_v8i16_negative_offset:
+; MINVL: # %bb.0:
+; MINVL-NEXT: addi a0, a0, -5
+; MINVL-NEXT: vsetivli zero, 5, e16, m1, ta, ma
+; MINVL-NEXT: vslidedown.vx v8, v8, a0
+; MINVL-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; MINVL-NEXT: vslideup.vi v8, v9, 5
+; MINVL-NEXT: ret
%v = call <8 x i16> @llvm.experimental.vp.splice.v8i16(<8 x i16> %va, <8 x i16> %vb, i32 -5, <8 x i1> splat (i1 1), i32 %evla, i32 %evlb)
ret <8 x i16> %v
}
define <8 x i16> @test_vp_splice_v8i16_masked(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %mask, i32 zeroext %evla, i32 zeroext %evlb) {
-; CHECK-LABEL: test_vp_splice_v8i16_masked:
-; CHECK: # %bb.0:
-; CHECK-NEXT: addi a0, a0, -5
-; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT: vslidedown.vi v8, v8, 5, v0.t
-; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu
-; CHECK-NEXT: vslideup.vx v8, v9, a0, v0.t
-; CHECK-NEXT: ret
+; NOMINVL-LABEL: test_vp_splice_v8i16_masked:
+; NOMINVL: # %bb.0:
+; NOMINVL-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; NOMINVL-NEXT: vslidedown.vi v8, v8, 5, v0.t
+; NOMINVL-NEXT: addi a0, a0, -5
+; NOMINVL-NEXT: vsetvli zero, zero, e16, m1, ta, mu
+; NOMINVL-NEXT: vslideup.vx v8, v9, a0, v0.t
+; NOMINVL-NEXT: ret
+;
+; MINVL-LABEL: test_vp_splice_v8i16_masked:
+; MINVL: # %bb.0:
+; MINVL-NEXT: addi a0, a0, -5
+; MINVL-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; MINVL-NEXT: vslidedown.vi v8, v8, 5, v0.t
+; MINVL-NEXT: vsetvli zero, a1, e16, m1, ta, mu
+; MINVL-NEXT: vslideup.vx v8, v9, a0, v0.t
+; MINVL-NEXT: ret
%v = call <8 x i16> @llvm.experimental.vp.splice.v8i16(<8 x i16> %va, <8 x i16> %vb, i32 5, <8 x i1> %mask, i32 %evla, i32 %evlb)
ret <8 x i16> %v
}
define <16 x i8> @test_vp_splice_v16i8(<16 x i8> %va, <16 x i8> %vb, i32 zeroext %evla, i32 zeroext %evlb) {
-; CHECK-LABEL: test_vp_splice_v16i8:
-; CHECK: # %bb.0:
-; CHECK-NEXT: addi a0, a0, -5
-; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
-; CHECK-NEXT: vslidedown.vi v8, v8, 5
-; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
-; CHECK-NEXT: vslideup.vx v8, v9, a0
-; CHECK-NEXT: ret
+; NOMINVL-LABEL: test_vp_splice_v16i8:
+; NOMINVL: # %bb.0:
+; NOMINVL-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; NOMINVL-NEXT: vslidedown.vi v8, v8, 5
+; NOMINVL-NEXT: addi a0, a0, -5
+; NOMINVL-NEXT: vslideup.vx v8, v9, a0
+; NOMINVL-NEXT: ret
+;
+; MINVL-LABEL: test_vp_splice_v16i8:
+; MINVL: # %bb.0:
+; MINVL-NEXT: addi a0, a0, -5
+; MINVL-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; MINVL-NEXT: vslidedown.vi v8, v8, 5
+; MINVL-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; MINVL-NEXT: vslideup.vx v8, v9, a0
+; MINVL-NEXT: ret
%v = call <16 x i8> @llvm.experimental.vp.splice.v16i8(<16 x i8> %va, <16 x i8> %vb, i32 5, <16 x i1> splat (i1 1), i32 %evla, i32 %evlb)
ret <16 x i8> %v
}
define <16 x i8> @test_vp_splice_v16i8_negative_offset(<16 x i8> %va, <16 x i8> %vb, i32 zeroext %evla, i32 zeroext %evlb) {
-; CHECK-LABEL: test_vp_splice_v16i8_negative_offset:
-; CHECK: # %bb.0:
-; CHECK-NEXT: addi a0, a0, -5
-; CHECK-NEXT: vsetivli zero, 5, e8, m1, ta, ma
-; CHECK-NEXT: vslidedown.vx v8, v8, a0
-; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
-; CHECK-NEXT: vslideup.vi v8, v9, 5
-; CHECK-NEXT: ret
+; NOMINVL-LABEL: test_vp_splice_v16i8_negative_offset:
+; NOMINVL: # %bb.0:
+; NOMINVL-NEXT: addi a0, a0, -5
+; NOMINVL-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; NOMINVL-NEXT: vslidedown.vx v8, v8, a0
+; NOMINVL-NEXT: vslideup.vi v8, v9, 5
+; NOMINVL-NEXT: ret
+;
+; MINVL-LABEL: test_vp_splice_v16i8_negative_offset:
+; MINVL: # %bb.0:
+; MINVL-NEXT: addi a0, a0, -5
+; MINVL-NEXT: vsetivli zero, 5, e8, m1, ta, ma
+; MINVL-NEXT: vslidedown.vx v8, v8, a0
+; MINVL-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; MINVL-NEXT: vslideup.vi v8, v9, 5
+; MINVL-NEXT: ret
%v = call <16 x i8> @llvm.experimental.vp.splice.v16i8(<16 x i8> %va, <16 x i8> %vb, i32 -5, <16 x i1> splat (i1 1), i32 %evla, i32 %evlb)
ret <16 x i8> %v
}
define <16 x i8> @test_vp_splice_v16i8_masked(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %mask, i32 zeroext %evla, i32 zeroext %evlb) {
-; CHECK-LABEL: test_vp_splice_v16i8_masked:
-; CHECK: # %bb.0:
-; CHECK-NEXT: addi a0, a0, -5
-; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
-; CHECK-NEXT: vslidedown.vi v8, v8, 5, v0.t
-; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu
-; CHECK-NEXT: vslideup.vx v8, v9, a0, v0.t
-; CHECK-NEXT: ret
+; NOMINVL-LABEL: test_vp_splice_v16i8_masked:
+; NOMINVL: # %bb.0:
+; NOMINVL-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; NOMINVL-NEXT: vslidedown.vi v8, v8, 5, v0.t
+; NOMINVL-NEXT: addi a0, a0, -5
+; NOMINVL-NEXT: vsetvli zero...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/147089
More information about the llvm-commits
mailing list