[llvm] [RISCV] Fold (vslide1up undef, v, (extract_elt x, 0)) into (vslideup x, v, 1) (PR #154847)
Min-Yih Hsu via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 3 11:05:17 PDT 2025
- Previous message: [llvm] [RISCV] Fold (vslide1up undef, v, (extract_elt x, 0)) into (vslideup x, v, 1) (PR #154847)
- Next message: [llvm] [RISCV] Fold (vslide1up undef, v, (extract_elt x, 0)) into (vslideup x, v, 1) (PR #154847)
- Messages sorted by:
[ date ]
[ thread ]
[ subject ]
[ author ]
https://github.com/mshockwave updated https://github.com/llvm/llvm-project/pull/154847
>From d06b1eb559198f103b4ce73ae116bdb6e6ef5cea Mon Sep 17 00:00:00 2001
From: Min-Yih Hsu <min.hsu at sifive.com>
Date: Thu, 21 Aug 2025 13:54:30 -0700
Subject: [PATCH 1/4] [RISCV] Fold (vslide1up undef, v, (extract_elt x, 0))
into (vslideup x, v, 1)
Co-authored-by: Craig Topper <craig.topper at sifive.com>
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 31 ++++++++
.../RISCV/rvv/fixed-vectors-fp-buildvec.ll | 26 +++----
.../RISCV/rvv/fixed-vectors-int-buildvec.ll | 78 +++++++------------
.../CodeGen/RISCV/rvv/redundant-vfmvsf.ll | 8 +-
4 files changed, 75 insertions(+), 68 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 3d07702c6f78b..6f63a6bf4f3df 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -21151,6 +21151,37 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
return N->getOperand(0);
break;
}
+ case RISCVISD::VSLIDE1UP_VL:
+ case RISCVISD::VFSLIDE1UP_VL: {
+ using namespace SDPatternMatch;
+ SDValue SrcVec;
+ SDLoc DL(N);
+ MVT VT = N->getSimpleValueType(0);
+ // If the scalar we're sliding in was extracted from the first element of a
+ // vector, we can use that vector as the passthru in a normal slideup of 1.
+ // This saves us an extract_element instruction (i.e. vfmv.f.s, vmv.x.s).
+ if (N->getOperand(0).isUndef() &&
+ sd_match(
+ N->getOperand(2),
+ m_OneUse(m_AnyOf(m_ExtractElt(m_Value(SrcVec), m_Zero()),
+ m_Node(RISCVISD::VMV_X_S, m_Value(SrcVec)))))) {
+ MVT SrcVecVT = SrcVec.getSimpleValueType();
+ // Adapt the value type of source vector.
+ if (SrcVecVT.isFixedLengthVector()) {
+ SrcVecVT = getContainerForFixedLengthVector(SrcVecVT);
+ SrcVec = convertToScalableVector(SrcVecVT, SrcVec, DAG, Subtarget);
+ }
+ if (SrcVecVT.getVectorMinNumElements() < VT.getVectorMinNumElements())
+ SrcVec = DAG.getInsertSubvector(DL, DAG.getUNDEF(VT), SrcVec, 0);
+ else
+ SrcVec = DAG.getExtractSubvector(DL, VT, SrcVec, 0);
+
+ return getVSlideup(DAG, Subtarget, DL, VT, SrcVec, N->getOperand(1),
+ DAG.getConstant(1, DL, XLenVT), N->getOperand(3),
+ N->getOperand(4));
+ }
+ break;
+ }
}
return SDValue();
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
index aa3b9abe3a7aa..8af784261c2f7 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
@@ -1837,14 +1837,11 @@ define <4 x float> @buildvec_vfredusum_slideup(float %start, <8 x float> %arg1,
; CHECK-NEXT: vfredusum.vs v8, v8, v16
; CHECK-NEXT: vfredusum.vs v9, v10, v16
; CHECK-NEXT: vfredusum.vs v10, v12, v16
-; CHECK-NEXT: vfmv.f.s fa5, v8
-; CHECK-NEXT: vfmv.f.s fa4, v9
-; CHECK-NEXT: vfmv.f.s fa3, v10
-; CHECK-NEXT: vfredusum.vs v8, v14, v16
-; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vfslide1up.vf v9, v8, fa3
-; CHECK-NEXT: vfslide1up.vf v10, v9, fa4
-; CHECK-NEXT: vfslide1up.vf v8, v10, fa5
+; CHECK-NEXT: vfredusum.vs v11, v14, v16
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, tu, ma
+; CHECK-NEXT: vslideup.vi v10, v11, 1
+; CHECK-NEXT: vslideup.vi v9, v10, 1
+; CHECK-NEXT: vslideup.vi v8, v9, 1
; CHECK-NEXT: ret
%247 = tail call reassoc float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %arg1)
%248 = insertelement <4 x float> poison, float %247, i64 0
@@ -1975,14 +1972,11 @@ define <4 x float> @buildvec_vfredosum_slideup(float %start, <8 x float> %arg1,
; CHECK-NEXT: vfredosum.vs v8, v8, v16
; CHECK-NEXT: vfredosum.vs v9, v10, v16
; CHECK-NEXT: vfredosum.vs v10, v12, v16
-; CHECK-NEXT: vfmv.f.s fa5, v8
-; CHECK-NEXT: vfmv.f.s fa4, v9
-; CHECK-NEXT: vfmv.f.s fa3, v10
-; CHECK-NEXT: vfredosum.vs v8, v14, v16
-; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vfslide1up.vf v9, v8, fa3
-; CHECK-NEXT: vfslide1up.vf v10, v9, fa4
-; CHECK-NEXT: vfslide1up.vf v8, v10, fa5
+; CHECK-NEXT: vfredosum.vs v11, v14, v16
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, tu, ma
+; CHECK-NEXT: vslideup.vi v10, v11, 1
+; CHECK-NEXT: vslideup.vi v9, v10, 1
+; CHECK-NEXT: vslideup.vi v8, v9, 1
; CHECK-NEXT: ret
%247 = tail call float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %arg1)
%248 = insertelement <4 x float> poison, float %247, i64 0
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
index eedf19c38766b..aaa0269ef1c63 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
@@ -3424,14 +3424,11 @@ define <4 x i32> @buildvec_vredsum_slideup(<8 x i32> %arg0, <8 x i32> %arg1, <8
; RV32-NEXT: vredsum.vs v8, v8, v16
; RV32-NEXT: vredsum.vs v9, v10, v16
; RV32-NEXT: vredsum.vs v10, v12, v16
-; RV32-NEXT: vmv.x.s a0, v8
-; RV32-NEXT: vmv.x.s a1, v9
-; RV32-NEXT: vmv.x.s a2, v10
-; RV32-NEXT: vredsum.vs v8, v14, v16
-; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT: vslide1up.vx v9, v8, a2
-; RV32-NEXT: vslide1up.vx v10, v9, a1
-; RV32-NEXT: vslide1up.vx v8, v10, a0
+; RV32-NEXT: vredsum.vs v11, v14, v16
+; RV32-NEXT: vsetivli zero, 4, e32, m1, tu, ma
+; RV32-NEXT: vslideup.vi v10, v11, 1
+; RV32-NEXT: vslideup.vi v9, v10, 1
+; RV32-NEXT: vslideup.vi v8, v9, 1
; RV32-NEXT: ret
;
; RV64V-ONLY-LABEL: buildvec_vredsum_slideup:
@@ -3441,14 +3438,11 @@ define <4 x i32> @buildvec_vredsum_slideup(<8 x i32> %arg0, <8 x i32> %arg1, <8
; RV64V-ONLY-NEXT: vredsum.vs v8, v8, v16
; RV64V-ONLY-NEXT: vredsum.vs v9, v10, v16
; RV64V-ONLY-NEXT: vredsum.vs v10, v12, v16
-; RV64V-ONLY-NEXT: vmv.x.s a0, v8
-; RV64V-ONLY-NEXT: vmv.x.s a1, v9
-; RV64V-ONLY-NEXT: vmv.x.s a2, v10
-; RV64V-ONLY-NEXT: vredsum.vs v8, v14, v16
-; RV64V-ONLY-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV64V-ONLY-NEXT: vslide1up.vx v9, v8, a2
-; RV64V-ONLY-NEXT: vslide1up.vx v10, v9, a1
-; RV64V-ONLY-NEXT: vslide1up.vx v8, v10, a0
+; RV64V-ONLY-NEXT: vredsum.vs v11, v14, v16
+; RV64V-ONLY-NEXT: vsetivli zero, 4, e32, m1, tu, ma
+; RV64V-ONLY-NEXT: vslideup.vi v10, v11, 1
+; RV64V-ONLY-NEXT: vslideup.vi v9, v10, 1
+; RV64V-ONLY-NEXT: vslideup.vi v8, v9, 1
; RV64V-ONLY-NEXT: ret
;
; RVA22U64-LABEL: buildvec_vredsum_slideup:
@@ -3498,14 +3492,11 @@ define <4 x i32> @buildvec_vredsum_slideup(<8 x i32> %arg0, <8 x i32> %arg1, <8
; RV64ZVE32-NEXT: vredsum.vs v8, v8, v16
; RV64ZVE32-NEXT: vredsum.vs v9, v10, v16
; RV64ZVE32-NEXT: vredsum.vs v10, v12, v16
-; RV64ZVE32-NEXT: vmv.x.s a0, v8
-; RV64ZVE32-NEXT: vmv.x.s a1, v9
-; RV64ZVE32-NEXT: vmv.x.s a2, v10
-; RV64ZVE32-NEXT: vredsum.vs v8, v14, v16
-; RV64ZVE32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV64ZVE32-NEXT: vslide1up.vx v9, v8, a2
-; RV64ZVE32-NEXT: vslide1up.vx v10, v9, a1
-; RV64ZVE32-NEXT: vslide1up.vx v8, v10, a0
+; RV64ZVE32-NEXT: vredsum.vs v11, v14, v16
+; RV64ZVE32-NEXT: vsetivli zero, 4, e32, m1, tu, ma
+; RV64ZVE32-NEXT: vslideup.vi v10, v11, 1
+; RV64ZVE32-NEXT: vslideup.vi v9, v10, 1
+; RV64ZVE32-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32-NEXT: ret
%247 = tail call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %arg0)
%248 = insertelement <4 x i32> poison, i32 %247, i64 0
@@ -3525,14 +3516,11 @@ define <4 x i32> @buildvec_vredmax_slideup(<8 x i32> %arg0, <8 x i32> %arg1, <8
; RV32-NEXT: vredmaxu.vs v8, v8, v8
; RV32-NEXT: vredmaxu.vs v9, v10, v10
; RV32-NEXT: vredmaxu.vs v10, v12, v12
-; RV32-NEXT: vmv.x.s a0, v8
-; RV32-NEXT: vmv.x.s a1, v9
-; RV32-NEXT: vmv.x.s a2, v10
-; RV32-NEXT: vredmaxu.vs v8, v14, v14
-; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT: vslide1up.vx v9, v8, a2
-; RV32-NEXT: vslide1up.vx v10, v9, a1
-; RV32-NEXT: vslide1up.vx v8, v10, a0
+; RV32-NEXT: vredmaxu.vs v11, v14, v14
+; RV32-NEXT: vsetivli zero, 4, e32, m1, tu, ma
+; RV32-NEXT: vslideup.vi v10, v11, 1
+; RV32-NEXT: vslideup.vi v9, v10, 1
+; RV32-NEXT: vslideup.vi v8, v9, 1
; RV32-NEXT: ret
;
; RV64V-ONLY-LABEL: buildvec_vredmax_slideup:
@@ -3541,14 +3529,11 @@ define <4 x i32> @buildvec_vredmax_slideup(<8 x i32> %arg0, <8 x i32> %arg1, <8
; RV64V-ONLY-NEXT: vredmaxu.vs v8, v8, v8
; RV64V-ONLY-NEXT: vredmaxu.vs v9, v10, v10
; RV64V-ONLY-NEXT: vredmaxu.vs v10, v12, v12
-; RV64V-ONLY-NEXT: vmv.x.s a0, v8
-; RV64V-ONLY-NEXT: vmv.x.s a1, v9
-; RV64V-ONLY-NEXT: vmv.x.s a2, v10
-; RV64V-ONLY-NEXT: vredmaxu.vs v8, v14, v14
-; RV64V-ONLY-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV64V-ONLY-NEXT: vslide1up.vx v9, v8, a2
-; RV64V-ONLY-NEXT: vslide1up.vx v10, v9, a1
-; RV64V-ONLY-NEXT: vslide1up.vx v8, v10, a0
+; RV64V-ONLY-NEXT: vredmaxu.vs v11, v14, v14
+; RV64V-ONLY-NEXT: vsetivli zero, 4, e32, m1, tu, ma
+; RV64V-ONLY-NEXT: vslideup.vi v10, v11, 1
+; RV64V-ONLY-NEXT: vslideup.vi v9, v10, 1
+; RV64V-ONLY-NEXT: vslideup.vi v8, v9, 1
; RV64V-ONLY-NEXT: ret
;
; RVA22U64-LABEL: buildvec_vredmax_slideup:
@@ -3595,14 +3580,11 @@ define <4 x i32> @buildvec_vredmax_slideup(<8 x i32> %arg0, <8 x i32> %arg1, <8
; RV64ZVE32-NEXT: vredmaxu.vs v8, v8, v8
; RV64ZVE32-NEXT: vredmaxu.vs v9, v10, v10
; RV64ZVE32-NEXT: vredmaxu.vs v10, v12, v12
-; RV64ZVE32-NEXT: vmv.x.s a0, v8
-; RV64ZVE32-NEXT: vmv.x.s a1, v9
-; RV64ZVE32-NEXT: vmv.x.s a2, v10
-; RV64ZVE32-NEXT: vredmaxu.vs v8, v14, v14
-; RV64ZVE32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV64ZVE32-NEXT: vslide1up.vx v9, v8, a2
-; RV64ZVE32-NEXT: vslide1up.vx v10, v9, a1
-; RV64ZVE32-NEXT: vslide1up.vx v8, v10, a0
+; RV64ZVE32-NEXT: vredmaxu.vs v11, v14, v14
+; RV64ZVE32-NEXT: vsetivli zero, 4, e32, m1, tu, ma
+; RV64ZVE32-NEXT: vslideup.vi v10, v11, 1
+; RV64ZVE32-NEXT: vslideup.vi v9, v10, 1
+; RV64ZVE32-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32-NEXT: ret
%247 = tail call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> %arg0)
%248 = insertelement <4 x i32> poison, i32 %247, i64 0
diff --git a/llvm/test/CodeGen/RISCV/rvv/redundant-vfmvsf.ll b/llvm/test/CodeGen/RISCV/rvv/redundant-vfmvsf.ll
index 821d4240827fb..12ac29aa3a39d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/redundant-vfmvsf.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/redundant-vfmvsf.ll
@@ -9,11 +9,11 @@ define <2 x float> @redundant_vfmv(<2 x float> %arg0, <64 x float> %arg1, <64 x
; CHECK-NEXT: vfredusum.vs v9, v12, v8
; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
; CHECK-NEXT: vslidedown.vi v8, v8, 1
-; CHECK-NEXT: vfmv.f.s fa5, v9
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT: vfredusum.vs v9, v16, v8
-; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT: vfslide1up.vf v8, v9, fa5
+; CHECK-NEXT: vfredusum.vs v8, v16, v8
+; CHECK-NEXT: vsetivli zero, 2, e32, mf2, tu, ma
+; CHECK-NEXT: vslideup.vi v9, v8, 1
+; CHECK-NEXT: vmv1r.v v8, v9
; CHECK-NEXT: ret
%s0 = extractelement <2 x float> %arg0, i64 0
%r0 = tail call reassoc float @llvm.vector.reduce.fadd.v64f32(float %s0, <64 x float> %arg1)
>From 2a339252b6ebc68cdecd4f8acccf90bcc397f47b Mon Sep 17 00:00:00 2001
From: Min-Yih Hsu <min.hsu at sifive.com>
Date: Tue, 2 Sep 2025 10:20:11 -0700
Subject: [PATCH 2/4] fixup! Update tests
---
.../RISCV/rvv/fixed-vectors-fp-buildvec.ll | 59 +++++++++----------
1 file changed, 28 insertions(+), 31 deletions(-)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
index 8af784261c2f7..248ec1369076b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
@@ -1858,18 +1858,17 @@ define <8 x float> @buildvec_vfredusum_slideup_leading_undef(float %start, <8 x
; CHECK-LABEL: buildvec_vfredusum_slideup_leading_undef:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT: vfmv.s.f v16, fa0
-; CHECK-NEXT: vfredusum.vs v9, v8, v16
-; CHECK-NEXT: vfredusum.vs v10, v10, v16
-; CHECK-NEXT: vfredusum.vs v11, v12, v16
-; CHECK-NEXT: vfredusum.vs v8, v14, v16
-; CHECK-NEXT: vfmv.f.s fa5, v9
-; CHECK-NEXT: vfmv.f.s fa4, v10
-; CHECK-NEXT: vfmv.f.s fa3, v11
-; CHECK-NEXT: vfslide1up.vf v10, v8, fa3
-; CHECK-NEXT: vfslide1up.vf v8, v10, fa4
-; CHECK-NEXT: vfslide1up.vf v10, v8, fa5
-; CHECK-NEXT: vslideup.vi v8, v10, 4
+; CHECK-NEXT: vfmv.s.f v17, fa0
+; CHECK-NEXT: vfredusum.vs v16, v8, v17
+; CHECK-NEXT: vfredusum.vs v8, v10, v17
+; CHECK-NEXT: vfredusum.vs v10, v12, v17
+; CHECK-NEXT: vfredusum.vs v12, v14, v17
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, tu, ma
+; CHECK-NEXT: vslideup.vi v10, v12, 1
+; CHECK-NEXT: vslideup.vi v8, v10, 1
+; CHECK-NEXT: vslideup.vi v16, v8, 1
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT: vslideup.vi v8, v16, 4
; CHECK-NEXT: ret
%252 = tail call reassoc float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %arg1)
%253 = insertelement <8 x float> poison, float %252, i64 4
@@ -1887,16 +1886,14 @@ define <8 x float> @buildvec_vfredusum_slideup_trailing_undef(float %start, <8 x
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; CHECK-NEXT: vfmv.s.f v16, fa0
-; CHECK-NEXT: vfredusum.vs v9, v8, v16
+; CHECK-NEXT: vfredusum.vs v8, v8, v16
; CHECK-NEXT: vfredusum.vs v10, v10, v16
-; CHECK-NEXT: vfredusum.vs v11, v12, v16
-; CHECK-NEXT: vfredusum.vs v8, v14, v16
-; CHECK-NEXT: vfmv.f.s fa5, v9
-; CHECK-NEXT: vfmv.f.s fa4, v10
-; CHECK-NEXT: vfmv.f.s fa3, v11
-; CHECK-NEXT: vfslide1up.vf v10, v8, fa3
-; CHECK-NEXT: vfslide1up.vf v12, v10, fa4
-; CHECK-NEXT: vfslide1up.vf v8, v12, fa5
+; CHECK-NEXT: vfredusum.vs v12, v12, v16
+; CHECK-NEXT: vfredusum.vs v14, v14, v16
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, tu, ma
+; CHECK-NEXT: vslideup.vi v12, v14, 1
+; CHECK-NEXT: vslideup.vi v10, v12, 1
+; CHECK-NEXT: vslideup.vi v8, v10, 1
; CHECK-NEXT: ret
%252 = tail call reassoc float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %arg1)
%253 = insertelement <8 x float> poison, float %252, i64 0
@@ -1941,17 +1938,17 @@ define <8 x float> @buildvec_vfredusum_slideup_mid_undef(float %start, <8 x floa
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; CHECK-NEXT: vfmv.s.f v16, fa0
-; CHECK-NEXT: vfredusum.vs v9, v8, v16
+; CHECK-NEXT: vfredusum.vs v8, v8, v16
; CHECK-NEXT: vfredusum.vs v10, v10, v16
-; CHECK-NEXT: vfredusum.vs v11, v12, v16
-; CHECK-NEXT: vfredusum.vs v8, v14, v16
-; CHECK-NEXT: vfmv.f.s fa5, v9
-; CHECK-NEXT: vfmv.f.s fa4, v10
-; CHECK-NEXT: vfmv.f.s fa3, v11
-; CHECK-NEXT: vfslide1up.vf v10, v8, fa3
-; CHECK-NEXT: vslideup.vi v8, v10, 4
-; CHECK-NEXT: vfslide1up.vf v10, v8, fa4
-; CHECK-NEXT: vfslide1up.vf v8, v10, fa5
+; CHECK-NEXT: vfredusum.vs v12, v12, v16
+; CHECK-NEXT: vfredusum.vs v14, v14, v16
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, tu, ma
+; CHECK-NEXT: vslideup.vi v12, v14, 1
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT: vslideup.vi v14, v12, 4
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, tu, ma
+; CHECK-NEXT: vslideup.vi v10, v14, 1
+; CHECK-NEXT: vslideup.vi v8, v10, 1
; CHECK-NEXT: ret
%252 = tail call reassoc float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %arg1)
%253 = insertelement <8 x float> poison, float %252, i64 0
>From 1842e03530e09e081ad3c3ad48aa46cfc17a35ee Mon Sep 17 00:00:00 2001
From: Min-Yih Hsu <min.hsu at sifive.com>
Date: Tue, 2 Sep 2025 10:31:50 -0700
Subject: [PATCH 3/4] fixup! Remove the one-use condition
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 6f63a6bf4f3df..8bd0d886c4b3c 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -21161,10 +21161,9 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
// vector, we can use that vector as the passthru in a normal slideup of 1.
// This saves us an extract_element instruction (i.e. vfmv.f.s, vmv.x.s).
if (N->getOperand(0).isUndef() &&
- sd_match(
- N->getOperand(2),
- m_OneUse(m_AnyOf(m_ExtractElt(m_Value(SrcVec), m_Zero()),
- m_Node(RISCVISD::VMV_X_S, m_Value(SrcVec)))))) {
+ sd_match(N->getOperand(2),
+ m_AnyOf(m_ExtractElt(m_Value(SrcVec), m_Zero()),
+ m_Node(RISCVISD::VMV_X_S, m_Value(SrcVec))))) {
MVT SrcVecVT = SrcVec.getSimpleValueType();
// Adapt the value type of source vector.
if (SrcVecVT.isFixedLengthVector()) {
>From 7ea3c5d2ff54b6fce5b0d6a9083c440eb18a62d2 Mon Sep 17 00:00:00 2001
From: Min-Yih Hsu <min.hsu at sifive.com>
Date: Wed, 3 Sep 2025 11:04:45 -0700
Subject: [PATCH 4/4] fixup! Address review comments
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 38 +++++++++++----------
1 file changed, 20 insertions(+), 18 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 8bd0d886c4b3c..f1f3af093e848 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -21160,26 +21160,28 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
// If the scalar we're sliding in was extracted from the first element of a
// vector, we can use that vector as the passthru in a normal slideup of 1.
// This saves us an extract_element instruction (i.e. vfmv.f.s, vmv.x.s).
- if (N->getOperand(0).isUndef() &&
- sd_match(N->getOperand(2),
- m_AnyOf(m_ExtractElt(m_Value(SrcVec), m_Zero()),
- m_Node(RISCVISD::VMV_X_S, m_Value(SrcVec))))) {
- MVT SrcVecVT = SrcVec.getSimpleValueType();
- // Adapt the value type of source vector.
- if (SrcVecVT.isFixedLengthVector()) {
- SrcVecVT = getContainerForFixedLengthVector(SrcVecVT);
- SrcVec = convertToScalableVector(SrcVecVT, SrcVec, DAG, Subtarget);
- }
- if (SrcVecVT.getVectorMinNumElements() < VT.getVectorMinNumElements())
- SrcVec = DAG.getInsertSubvector(DL, DAG.getUNDEF(VT), SrcVec, 0);
- else
- SrcVec = DAG.getExtractSubvector(DL, VT, SrcVec, 0);
+ if (!N->getOperand(0).isUndef() ||
+ !sd_match(N->getOperand(2),
+ m_AnyOf(m_ExtractElt(m_Value(SrcVec), m_Zero()),
+ m_Node(RISCVISD::VMV_X_S, m_Value(SrcVec)))))
+ break;
- return getVSlideup(DAG, Subtarget, DL, VT, SrcVec, N->getOperand(1),
- DAG.getConstant(1, DL, XLenVT), N->getOperand(3),
- N->getOperand(4));
+ MVT SrcVecVT = SrcVec.getSimpleValueType();
+ if (SrcVecVT.getVectorElementType() != VT.getVectorElementType())
+ break;
+ // Adapt the value type of source vector.
+ if (SrcVecVT.isFixedLengthVector()) {
+ SrcVecVT = getContainerForFixedLengthVector(SrcVecVT);
+ SrcVec = convertToScalableVector(SrcVecVT, SrcVec, DAG, Subtarget);
}
- break;
+ if (SrcVecVT.getVectorMinNumElements() < VT.getVectorMinNumElements())
+ SrcVec = DAG.getInsertSubvector(DL, DAG.getUNDEF(VT), SrcVec, 0);
+ else
+ SrcVec = DAG.getExtractSubvector(DL, VT, SrcVec, 0);
+
+ return getVSlideup(DAG, Subtarget, DL, VT, SrcVec, N->getOperand(1),
+ DAG.getConstant(1, DL, XLenVT), N->getOperand(3),
+ N->getOperand(4));
}
}
- Previous message: [llvm] [RISCV] Fold (vslide1up undef, v, (extract_elt x, 0)) into (vslideup x, v, 1) (PR #154847)
- Next message: [llvm] [RISCV] Fold (vslide1up undef, v, (extract_elt x, 0)) into (vslideup x, v, 1) (PR #154847)
- Messages sorted by:
[ date ]
[ thread ]
[ subject ]
[ author ]
More information about the llvm-commits
mailing list