[llvm] [RISCV] Use slideup to lower build_vector when its last operand is an extraction (PR #154450)
Min-Yih Hsu via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 20 13:41:39 PDT 2025
https://github.com/mshockwave updated https://github.com/llvm/llvm-project/pull/154450
>From f74a607fb7a6d3a632cf40a0abcac3078f9cc73b Mon Sep 17 00:00:00 2001
From: Min-Yih Hsu <min.hsu at sifive.com>
Date: Tue, 19 Aug 2025 17:24:30 -0700
Subject: [PATCH 1/4] Pre-commit tests
---
.../RISCV/rvv/fixed-vectors-fp-buildvec.ll | 111 +++++++++
.../RISCV/rvv/fixed-vectors-int-buildvec.ll | 211 ++++++++++++++++++
2 files changed, 322 insertions(+)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
index 3c3e08d387faa..d4d4ed682f6cf 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
@@ -1828,3 +1828,114 @@ define <8 x double> @buildvec_v8f64_zvl512(double %e0, double %e1, double %e2, d
%v7 = insertelement <8 x double> %v6, double %e7, i64 7
ret <8 x double> %v7
}
+
+define <8 x double> @buildvec_slideup(<4 x double> %v, double %e0, double %e1, double %e2, double %e3, double %e4, double %e5, double %e6) vscale_range(4, 128) {
+; CHECK-LABEL: buildvec_slideup:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e64, m2, ta, ma
+; CHECK-NEXT: vfmv.f.s ft0, v8
+; CHECK-NEXT: vfmv.v.f v8, fa0
+; CHECK-NEXT: vfslide1down.vf v8, v8, fa1
+; CHECK-NEXT: vfslide1down.vf v8, v8, fa2
+; CHECK-NEXT: vfslide1down.vf v8, v8, fa3
+; CHECK-NEXT: vfslide1down.vf v8, v8, fa4
+; CHECK-NEXT: vfslide1down.vf v8, v8, fa5
+; CHECK-NEXT: vfslide1down.vf v8, v8, fa6
+; CHECK-NEXT: vfslide1down.vf v8, v8, ft0
+; CHECK-NEXT: ret
+ %v0 = insertelement <8 x double> poison, double %e0, i64 0
+ %v1 = insertelement <8 x double> %v0, double %e1, i64 1
+ %v2 = insertelement <8 x double> %v1, double %e2, i64 2
+ %v3 = insertelement <8 x double> %v2, double %e3, i64 3
+ %v4 = insertelement <8 x double> %v3, double %e4, i64 4
+ %v5 = insertelement <8 x double> %v4, double %e5, i64 5
+ %v6 = insertelement <8 x double> %v5, double %e6, i64 6
+ %e7 = extractelement <4 x double> %v, i64 0
+ %v7 = insertelement <8 x double> %v6, double %e7, i64 7
+ ret <8 x double> %v7
+}
+
+; Negative test for slideup lowering where the extract_element was not build_vector's last operand.
+define <8 x double> @buildvec_slideup_not_last_element(<4 x double> %v, double %e0, double %e1, double %e2, double %e3, double %e4, double %e5, double %e7) vscale_range(4, 128) {
+; CHECK-LABEL: buildvec_slideup_not_last_element:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e64, m2, ta, ma
+; CHECK-NEXT: vfmv.f.s ft0, v8
+; CHECK-NEXT: vfmv.v.f v8, fa0
+; CHECK-NEXT: vfslide1down.vf v8, v8, fa1
+; CHECK-NEXT: vfslide1down.vf v8, v8, fa2
+; CHECK-NEXT: vfslide1down.vf v8, v8, fa3
+; CHECK-NEXT: vfslide1down.vf v8, v8, fa4
+; CHECK-NEXT: vfslide1down.vf v8, v8, fa5
+; CHECK-NEXT: vfslide1down.vf v8, v8, ft0
+; CHECK-NEXT: vfslide1down.vf v8, v8, fa6
+; CHECK-NEXT: ret
+ %v0 = insertelement <8 x double> poison, double %e0, i64 0
+ %v1 = insertelement <8 x double> %v0, double %e1, i64 1
+ %v2 = insertelement <8 x double> %v1, double %e2, i64 2
+ %v3 = insertelement <8 x double> %v2, double %e3, i64 3
+ %v4 = insertelement <8 x double> %v3, double %e4, i64 4
+ %v5 = insertelement <8 x double> %v4, double %e5, i64 5
+ %e6 = extractelement <4 x double> %v, i64 0
+ %v6 = insertelement <8 x double> %v5, double %e6, i64 6
+ %v7 = insertelement <8 x double> %v6, double %e7, i64 7
+ ret <8 x double> %v7
+}
+
+define <4 x float> @buildvec_vfredusum(float %start, <8 x float> %arg1, <8 x float> %arg2, <8 x float> %arg3, <8 x float> %arg4) nounwind {
+; CHECK-LABEL: buildvec_vfredusum:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT: vfmv.s.f v16, fa0
+; CHECK-NEXT: vfredusum.vs v8, v8, v16
+; CHECK-NEXT: vfredusum.vs v9, v10, v16
+; CHECK-NEXT: vfredusum.vs v10, v12, v16
+; CHECK-NEXT: vfredusum.vs v11, v14, v16
+; CHECK-NEXT: vfmv.f.s fa5, v9
+; CHECK-NEXT: vfmv.f.s fa4, v10
+; CHECK-NEXT: vfmv.f.s fa3, v11
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vrgather.vi v9, v8, 0
+; CHECK-NEXT: vfslide1down.vf v8, v9, fa5
+; CHECK-NEXT: vfslide1down.vf v8, v8, fa4
+; CHECK-NEXT: vfslide1down.vf v8, v8, fa3
+; CHECK-NEXT: ret
+ %247 = tail call reassoc float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %arg1)
+ %248 = insertelement <4 x float> poison, float %247, i64 0
+ %250 = tail call reassoc float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %arg2)
+ %251 = insertelement <4 x float> %248, float %250, i64 1
+ %252 = tail call reassoc float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %arg3)
+ %253 = insertelement <4 x float> %251, float %252, i64 2
+ %254 = tail call reassoc float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %arg4)
+ %255 = insertelement <4 x float> %253, float %254, i64 3
+ ret <4 x float> %255
+}
+
+define <4 x float> @buildvec_vfredosum(float %start, <8 x float> %arg1, <8 x float> %arg2, <8 x float> %arg3, <8 x float> %arg4) nounwind {
+; CHECK-LABEL: buildvec_vfredosum:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT: vfmv.s.f v16, fa0
+; CHECK-NEXT: vfredosum.vs v8, v8, v16
+; CHECK-NEXT: vfredosum.vs v9, v10, v16
+; CHECK-NEXT: vfredosum.vs v10, v12, v16
+; CHECK-NEXT: vfredosum.vs v11, v14, v16
+; CHECK-NEXT: vfmv.f.s fa5, v9
+; CHECK-NEXT: vfmv.f.s fa4, v10
+; CHECK-NEXT: vfmv.f.s fa3, v11
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vrgather.vi v9, v8, 0
+; CHECK-NEXT: vfslide1down.vf v8, v9, fa5
+; CHECK-NEXT: vfslide1down.vf v8, v8, fa4
+; CHECK-NEXT: vfslide1down.vf v8, v8, fa3
+; CHECK-NEXT: ret
+ %247 = tail call float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %arg1)
+ %248 = insertelement <4 x float> poison, float %247, i64 0
+ %250 = tail call float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %arg2)
+ %251 = insertelement <4 x float> %248, float %250, i64 1
+ %252 = tail call float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %arg3)
+ %253 = insertelement <4 x float> %251, float %252, i64 2
+ %254 = tail call float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %arg4)
+ %255 = insertelement <4 x float> %253, float %254, i64 3
+ ret <4 x float> %255
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
index d9bb007a10f71..d9122676e805f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
@@ -3416,5 +3416,216 @@ define <4 x i1> @buildvec_i1_splat(i1 %e1) {
ret <4 x i1> %v4
}
+define <4 x i32> @buildvec_vredsum(<8 x i32> %arg0, <8 x i32> %arg1, <8 x i32> %arg2, <8 x i32> %arg3) nounwind {
+; RV32-LABEL: buildvec_vredsum:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV32-NEXT: vmv.s.x v16, zero
+; RV32-NEXT: vredsum.vs v8, v8, v16
+; RV32-NEXT: vredsum.vs v9, v10, v16
+; RV32-NEXT: vredsum.vs v10, v12, v16
+; RV32-NEXT: vredsum.vs v11, v14, v16
+; RV32-NEXT: vmv.x.s a0, v8
+; RV32-NEXT: vmv.x.s a1, v9
+; RV32-NEXT: vmv.x.s a2, v10
+; RV32-NEXT: vmv.x.s a3, v11
+; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV32-NEXT: vmv.v.x v8, a0
+; RV32-NEXT: vslide1down.vx v8, v8, a1
+; RV32-NEXT: vslide1down.vx v8, v8, a2
+; RV32-NEXT: vslide1down.vx v8, v8, a3
+; RV32-NEXT: ret
+;
+; RV64V-ONLY-LABEL: buildvec_vredsum:
+; RV64V-ONLY: # %bb.0:
+; RV64V-ONLY-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV64V-ONLY-NEXT: vmv.s.x v16, zero
+; RV64V-ONLY-NEXT: vredsum.vs v8, v8, v16
+; RV64V-ONLY-NEXT: vredsum.vs v9, v10, v16
+; RV64V-ONLY-NEXT: vredsum.vs v10, v12, v16
+; RV64V-ONLY-NEXT: vredsum.vs v11, v14, v16
+; RV64V-ONLY-NEXT: vmv.x.s a0, v8
+; RV64V-ONLY-NEXT: vmv.x.s a1, v9
+; RV64V-ONLY-NEXT: vmv.x.s a2, v10
+; RV64V-ONLY-NEXT: vmv.x.s a3, v11
+; RV64V-ONLY-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV64V-ONLY-NEXT: vmv.v.x v8, a0
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a1
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a2
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a3
+; RV64V-ONLY-NEXT: ret
+;
+; RVA22U64-LABEL: buildvec_vredsum:
+; RVA22U64: # %bb.0:
+; RVA22U64-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RVA22U64-NEXT: vmv.s.x v16, zero
+; RVA22U64-NEXT: vredsum.vs v8, v8, v16
+; RVA22U64-NEXT: vredsum.vs v9, v10, v16
+; RVA22U64-NEXT: vredsum.vs v10, v12, v16
+; RVA22U64-NEXT: vredsum.vs v11, v14, v16
+; RVA22U64-NEXT: vmv.x.s a0, v8
+; RVA22U64-NEXT: vmv.x.s a1, v9
+; RVA22U64-NEXT: vmv.x.s a2, v10
+; RVA22U64-NEXT: slli a1, a1, 32
+; RVA22U64-NEXT: add.uw a0, a0, a1
+; RVA22U64-NEXT: vmv.x.s a1, v11
+; RVA22U64-NEXT: slli a1, a1, 32
+; RVA22U64-NEXT: add.uw a1, a2, a1
+; RVA22U64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; RVA22U64-NEXT: vmv.v.x v8, a0
+; RVA22U64-NEXT: vslide1down.vx v8, v8, a1
+; RVA22U64-NEXT: ret
+;
+; RVA22U64-PACK-LABEL: buildvec_vredsum:
+; RVA22U64-PACK: # %bb.0:
+; RVA22U64-PACK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RVA22U64-PACK-NEXT: vmv.s.x v16, zero
+; RVA22U64-PACK-NEXT: vredsum.vs v8, v8, v16
+; RVA22U64-PACK-NEXT: vredsum.vs v9, v10, v16
+; RVA22U64-PACK-NEXT: vredsum.vs v10, v12, v16
+; RVA22U64-PACK-NEXT: vredsum.vs v11, v14, v16
+; RVA22U64-PACK-NEXT: vmv.x.s a0, v8
+; RVA22U64-PACK-NEXT: vmv.x.s a1, v9
+; RVA22U64-PACK-NEXT: vmv.x.s a2, v10
+; RVA22U64-PACK-NEXT: pack a0, a0, a1
+; RVA22U64-PACK-NEXT: vmv.x.s a1, v11
+; RVA22U64-PACK-NEXT: pack a1, a2, a1
+; RVA22U64-PACK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; RVA22U64-PACK-NEXT: vmv.v.x v8, a0
+; RVA22U64-PACK-NEXT: vslide1down.vx v8, v8, a1
+; RVA22U64-PACK-NEXT: ret
+;
+; RV64ZVE32-LABEL: buildvec_vredsum:
+; RV64ZVE32: # %bb.0:
+; RV64ZVE32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV64ZVE32-NEXT: vmv.s.x v16, zero
+; RV64ZVE32-NEXT: vredsum.vs v8, v8, v16
+; RV64ZVE32-NEXT: vredsum.vs v9, v10, v16
+; RV64ZVE32-NEXT: vredsum.vs v10, v12, v16
+; RV64ZVE32-NEXT: vredsum.vs v11, v14, v16
+; RV64ZVE32-NEXT: vmv.x.s a0, v8
+; RV64ZVE32-NEXT: vmv.x.s a1, v9
+; RV64ZVE32-NEXT: vmv.x.s a2, v10
+; RV64ZVE32-NEXT: vmv.x.s a3, v11
+; RV64ZVE32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV64ZVE32-NEXT: vmv.v.x v8, a0
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a1
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a2
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a3
+; RV64ZVE32-NEXT: ret
+ %247 = tail call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %arg0)
+ %248 = insertelement <4 x i32> poison, i32 %247, i64 0
+ %250 = tail call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %arg1)
+ %251 = insertelement <4 x i32> %248, i32 %250, i64 1
+ %252 = tail call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %arg2)
+ %253 = insertelement <4 x i32> %251, i32 %252, i64 2
+ %254 = tail call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %arg3)
+ %255 = insertelement <4 x i32> %253, i32 %254, i64 3
+ ret <4 x i32> %255
+}
+
+define <4 x i32> @buildvec_vredmax(<8 x i32> %arg0, <8 x i32> %arg1, <8 x i32> %arg2, <8 x i32> %arg3) nounwind {
+; RV32-LABEL: buildvec_vredmax:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV32-NEXT: vredmaxu.vs v8, v8, v8
+; RV32-NEXT: vredmaxu.vs v9, v10, v10
+; RV32-NEXT: vredmaxu.vs v10, v12, v12
+; RV32-NEXT: vredmaxu.vs v11, v14, v14
+; RV32-NEXT: vmv.x.s a0, v8
+; RV32-NEXT: vmv.x.s a1, v9
+; RV32-NEXT: vmv.x.s a2, v10
+; RV32-NEXT: vmv.x.s a3, v11
+; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV32-NEXT: vmv.v.x v8, a0
+; RV32-NEXT: vslide1down.vx v8, v8, a1
+; RV32-NEXT: vslide1down.vx v8, v8, a2
+; RV32-NEXT: vslide1down.vx v8, v8, a3
+; RV32-NEXT: ret
+;
+; RV64V-ONLY-LABEL: buildvec_vredmax:
+; RV64V-ONLY: # %bb.0:
+; RV64V-ONLY-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV64V-ONLY-NEXT: vredmaxu.vs v8, v8, v8
+; RV64V-ONLY-NEXT: vredmaxu.vs v9, v10, v10
+; RV64V-ONLY-NEXT: vredmaxu.vs v10, v12, v12
+; RV64V-ONLY-NEXT: vredmaxu.vs v11, v14, v14
+; RV64V-ONLY-NEXT: vmv.x.s a0, v8
+; RV64V-ONLY-NEXT: vmv.x.s a1, v9
+; RV64V-ONLY-NEXT: vmv.x.s a2, v10
+; RV64V-ONLY-NEXT: vmv.x.s a3, v11
+; RV64V-ONLY-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV64V-ONLY-NEXT: vmv.v.x v8, a0
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a1
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a2
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a3
+; RV64V-ONLY-NEXT: ret
+;
+; RVA22U64-LABEL: buildvec_vredmax:
+; RVA22U64: # %bb.0:
+; RVA22U64-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RVA22U64-NEXT: vredmaxu.vs v8, v8, v8
+; RVA22U64-NEXT: vredmaxu.vs v9, v10, v10
+; RVA22U64-NEXT: vredmaxu.vs v10, v12, v12
+; RVA22U64-NEXT: vredmaxu.vs v11, v14, v14
+; RVA22U64-NEXT: vmv.x.s a0, v8
+; RVA22U64-NEXT: vmv.x.s a1, v9
+; RVA22U64-NEXT: vmv.x.s a2, v10
+; RVA22U64-NEXT: slli a1, a1, 32
+; RVA22U64-NEXT: add.uw a0, a0, a1
+; RVA22U64-NEXT: vmv.x.s a1, v11
+; RVA22U64-NEXT: slli a1, a1, 32
+; RVA22U64-NEXT: add.uw a1, a2, a1
+; RVA22U64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; RVA22U64-NEXT: vmv.v.x v8, a0
+; RVA22U64-NEXT: vslide1down.vx v8, v8, a1
+; RVA22U64-NEXT: ret
+;
+; RVA22U64-PACK-LABEL: buildvec_vredmax:
+; RVA22U64-PACK: # %bb.0:
+; RVA22U64-PACK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RVA22U64-PACK-NEXT: vredmaxu.vs v8, v8, v8
+; RVA22U64-PACK-NEXT: vredmaxu.vs v9, v10, v10
+; RVA22U64-PACK-NEXT: vredmaxu.vs v10, v12, v12
+; RVA22U64-PACK-NEXT: vredmaxu.vs v11, v14, v14
+; RVA22U64-PACK-NEXT: vmv.x.s a0, v8
+; RVA22U64-PACK-NEXT: vmv.x.s a1, v9
+; RVA22U64-PACK-NEXT: vmv.x.s a2, v10
+; RVA22U64-PACK-NEXT: pack a0, a0, a1
+; RVA22U64-PACK-NEXT: vmv.x.s a1, v11
+; RVA22U64-PACK-NEXT: pack a1, a2, a1
+; RVA22U64-PACK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; RVA22U64-PACK-NEXT: vmv.v.x v8, a0
+; RVA22U64-PACK-NEXT: vslide1down.vx v8, v8, a1
+; RVA22U64-PACK-NEXT: ret
+;
+; RV64ZVE32-LABEL: buildvec_vredmax:
+; RV64ZVE32: # %bb.0:
+; RV64ZVE32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV64ZVE32-NEXT: vredmaxu.vs v8, v8, v8
+; RV64ZVE32-NEXT: vredmaxu.vs v9, v10, v10
+; RV64ZVE32-NEXT: vredmaxu.vs v10, v12, v12
+; RV64ZVE32-NEXT: vredmaxu.vs v11, v14, v14
+; RV64ZVE32-NEXT: vmv.x.s a0, v8
+; RV64ZVE32-NEXT: vmv.x.s a1, v9
+; RV64ZVE32-NEXT: vmv.x.s a2, v10
+; RV64ZVE32-NEXT: vmv.x.s a3, v11
+; RV64ZVE32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV64ZVE32-NEXT: vmv.v.x v8, a0
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a1
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a2
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a3
+; RV64ZVE32-NEXT: ret
+ %247 = tail call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> %arg0)
+ %248 = insertelement <4 x i32> poison, i32 %247, i64 0
+ %250 = tail call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> %arg1)
+ %251 = insertelement <4 x i32> %248, i32 %250, i64 1
+ %252 = tail call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> %arg2)
+ %253 = insertelement <4 x i32> %251, i32 %252, i64 2
+ %254 = tail call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> %arg3)
+ %255 = insertelement <4 x i32> %253, i32 %254, i64 3
+ ret <4 x i32> %255
+}
+
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; RV64: {{.*}}
>From c5b56c25754c5fd684ca69aa63cf603b7b235339 Mon Sep 17 00:00:00 2001
From: Min-Yih Hsu <min.hsu at sifive.com>
Date: Tue, 19 Aug 2025 13:34:36 -0700
Subject: [PATCH 2/4] [RISCV] Use slideup when the last build_vector operand is
a reduction
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 85 ++++++++++++++++---
.../RISCV/rvv/fixed-vectors-fp-buildvec.ll | 30 +++----
.../RISCV/rvv/fixed-vectors-int-buildvec.ll | 60 ++++++-------
.../CodeGen/RISCV/rvv/redundant-vfmvsf.ll | 7 +-
4 files changed, 114 insertions(+), 68 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 4a1db80076530..ce6fc8425856a 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -4512,33 +4512,94 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
"Illegal type which will result in reserved encoding");
const unsigned Policy = RISCVVType::TAIL_AGNOSTIC | RISCVVType::MASK_AGNOSTIC;
+ auto getVSlide = [&](bool SlideUp, EVT ContainerVT, SDValue Passthru,
+ SDValue Vec, SDValue Offset, SDValue Mask,
+ SDValue VL) -> SDValue {
+ if (SlideUp)
+ return getVSlideup(DAG, Subtarget, DL, ContainerVT, Passthru, Vec, Offset,
+ Mask, VL, Policy);
+ return getVSlidedown(DAG, Subtarget, DL, ContainerVT, Passthru, Vec, Offset,
+ Mask, VL, Policy);
+ };
+
+ // General case: splat the first operand and sliding other operands down one
+ // by one to form a vector. Alternatively, if the last operand is an
+ // extraction from a reduction result, we can use the original vector
+ // reduction result as the start value and slide up instead of slide down.
+ // Such that we can avoid the splat.
+ SmallVector<SDValue> Operands(Op->op_begin(), Op->op_end());
+ SDValue Reduce;
+ bool SlideUp = false;
+ // Find the first first non-undef from the tail.
+ auto ItLastNonUndef = find_if(Operands.rbegin(), Operands.rend(),
+ [](SDValue V) { return !V.isUndef(); });
+ if (ItLastNonUndef != Operands.rend()) {
+ using namespace SDPatternMatch;
+ // Check if the last non-undef operand was extracted from a reduction.
+ for (unsigned Opc :
+ {RISCVISD::VECREDUCE_ADD_VL, RISCVISD::VECREDUCE_UMAX_VL,
+ RISCVISD::VECREDUCE_SMAX_VL, RISCVISD::VECREDUCE_UMIN_VL,
+ RISCVISD::VECREDUCE_SMIN_VL, RISCVISD::VECREDUCE_AND_VL,
+ RISCVISD::VECREDUCE_OR_VL, RISCVISD::VECREDUCE_XOR_VL,
+ RISCVISD::VECREDUCE_FADD_VL, RISCVISD::VECREDUCE_SEQ_FADD_VL,
+ RISCVISD::VECREDUCE_FMAX_VL, RISCVISD::VECREDUCE_FMIN_VL}) {
+ SlideUp = sd_match(
+ *ItLastNonUndef,
+ m_ExtractElt(m_AllOf(m_Opc(Opc), m_Value(Reduce)), m_Zero()));
+ if (SlideUp)
+ break;
+ }
+ }
+
+ if (SlideUp) {
+ // Adapt Reduce's type into ContainerVT.
+ if (Reduce.getValueType().getVectorMinNumElements() <
+ ContainerVT.getVectorMinNumElements())
+ Reduce = DAG.getInsertSubvector(DL, DAG.getUNDEF(ContainerVT), Reduce, 0);
+ else
+ Reduce = DAG.getExtractSubvector(DL, ContainerVT, Reduce, 0);
+
+ // Reverse the elements as we're going to slide up from the last element.
+ for (unsigned i = 0U, N = Operands.size(), H = divideCeil(N, 2); i < H; ++i)
+ std::swap(Operands[i], Operands[N - 1 - i]);
+ }
SDValue Vec;
UndefCount = 0;
- for (SDValue V : Op->ops()) {
+ for (SDValue V : Operands) {
if (V.isUndef()) {
UndefCount++;
continue;
}
- // Start our sequence with a TA splat in the hopes that hardware is able to
- // recognize there's no dependency on the prior value of our temporary
- // register.
+ // Start our sequence with either a TA splat or a reduction result in the
+ // hopes that hardware is able to recognize there's no dependency on the
+ // prior value of our temporary register.
if (!Vec) {
- Vec = DAG.getSplatVector(VT, DL, V);
- Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
+ if (SlideUp) {
+ Vec = Reduce;
+ } else {
+ Vec = DAG.getSplatVector(VT, DL, V);
+ Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
+ }
+
UndefCount = 0;
continue;
}
if (UndefCount) {
const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
- Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
- Vec, Offset, Mask, VL, Policy);
+ Vec = getVSlide(SlideUp, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
+ Offset, Mask, VL);
UndefCount = 0;
}
- auto OpCode =
- VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL;
+
+ unsigned OpCode;
+ if (VT.isFloatingPoint())
+ OpCode = SlideUp ? RISCVISD::VFSLIDE1UP_VL : RISCVISD::VFSLIDE1DOWN_VL;
+ else
+ OpCode = SlideUp ? RISCVISD::VSLIDE1UP_VL : RISCVISD::VSLIDE1DOWN_VL;
+
if (!VT.isFloatingPoint())
V = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), V);
Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
@@ -4546,8 +4607,8 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
}
if (UndefCount) {
const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
- Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
- Vec, Offset, Mask, VL, Policy);
+ Vec = getVSlide(SlideUp, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
+ Offset, Mask, VL);
}
return convertFromScalableVector(VT, Vec, DAG, Subtarget);
}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
index d4d4ed682f6cf..972e565ba8657 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
@@ -1890,15 +1890,14 @@ define <4 x float> @buildvec_vfredusum(float %start, <8 x float> %arg1, <8 x flo
; CHECK-NEXT: vfredusum.vs v8, v8, v16
; CHECK-NEXT: vfredusum.vs v9, v10, v16
; CHECK-NEXT: vfredusum.vs v10, v12, v16
-; CHECK-NEXT: vfredusum.vs v11, v14, v16
-; CHECK-NEXT: vfmv.f.s fa5, v9
-; CHECK-NEXT: vfmv.f.s fa4, v10
-; CHECK-NEXT: vfmv.f.s fa3, v11
+; CHECK-NEXT: vfmv.f.s fa5, v8
+; CHECK-NEXT: vfmv.f.s fa4, v9
+; CHECK-NEXT: vfmv.f.s fa3, v10
+; CHECK-NEXT: vfredusum.vs v8, v14, v16
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vrgather.vi v9, v8, 0
-; CHECK-NEXT: vfslide1down.vf v8, v9, fa5
-; CHECK-NEXT: vfslide1down.vf v8, v8, fa4
-; CHECK-NEXT: vfslide1down.vf v8, v8, fa3
+; CHECK-NEXT: vfslide1up.vf v9, v8, fa3
+; CHECK-NEXT: vfslide1up.vf v10, v9, fa4
+; CHECK-NEXT: vfslide1up.vf v8, v10, fa5
; CHECK-NEXT: ret
%247 = tail call reassoc float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %arg1)
%248 = insertelement <4 x float> poison, float %247, i64 0
@@ -1919,15 +1918,14 @@ define <4 x float> @buildvec_vfredosum(float %start, <8 x float> %arg1, <8 x flo
; CHECK-NEXT: vfredosum.vs v8, v8, v16
; CHECK-NEXT: vfredosum.vs v9, v10, v16
; CHECK-NEXT: vfredosum.vs v10, v12, v16
-; CHECK-NEXT: vfredosum.vs v11, v14, v16
-; CHECK-NEXT: vfmv.f.s fa5, v9
-; CHECK-NEXT: vfmv.f.s fa4, v10
-; CHECK-NEXT: vfmv.f.s fa3, v11
+; CHECK-NEXT: vfmv.f.s fa5, v8
+; CHECK-NEXT: vfmv.f.s fa4, v9
+; CHECK-NEXT: vfmv.f.s fa3, v10
+; CHECK-NEXT: vfredosum.vs v8, v14, v16
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vrgather.vi v9, v8, 0
-; CHECK-NEXT: vfslide1down.vf v8, v9, fa5
-; CHECK-NEXT: vfslide1down.vf v8, v8, fa4
-; CHECK-NEXT: vfslide1down.vf v8, v8, fa3
+; CHECK-NEXT: vfslide1up.vf v9, v8, fa3
+; CHECK-NEXT: vfslide1up.vf v10, v9, fa4
+; CHECK-NEXT: vfslide1up.vf v8, v10, fa5
; CHECK-NEXT: ret
%247 = tail call float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %arg1)
%248 = insertelement <4 x float> poison, float %247, i64 0
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
index d9122676e805f..a02117fdd2833 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
@@ -3424,16 +3424,14 @@ define <4 x i32> @buildvec_vredsum(<8 x i32> %arg0, <8 x i32> %arg1, <8 x i32> %
; RV32-NEXT: vredsum.vs v8, v8, v16
; RV32-NEXT: vredsum.vs v9, v10, v16
; RV32-NEXT: vredsum.vs v10, v12, v16
-; RV32-NEXT: vredsum.vs v11, v14, v16
; RV32-NEXT: vmv.x.s a0, v8
; RV32-NEXT: vmv.x.s a1, v9
; RV32-NEXT: vmv.x.s a2, v10
-; RV32-NEXT: vmv.x.s a3, v11
+; RV32-NEXT: vredsum.vs v8, v14, v16
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT: vmv.v.x v8, a0
-; RV32-NEXT: vslide1down.vx v8, v8, a1
-; RV32-NEXT: vslide1down.vx v8, v8, a2
-; RV32-NEXT: vslide1down.vx v8, v8, a3
+; RV32-NEXT: vslide1up.vx v9, v8, a2
+; RV32-NEXT: vslide1up.vx v10, v9, a1
+; RV32-NEXT: vslide1up.vx v8, v10, a0
; RV32-NEXT: ret
;
; RV64V-ONLY-LABEL: buildvec_vredsum:
@@ -3443,16 +3441,14 @@ define <4 x i32> @buildvec_vredsum(<8 x i32> %arg0, <8 x i32> %arg1, <8 x i32> %
; RV64V-ONLY-NEXT: vredsum.vs v8, v8, v16
; RV64V-ONLY-NEXT: vredsum.vs v9, v10, v16
; RV64V-ONLY-NEXT: vredsum.vs v10, v12, v16
-; RV64V-ONLY-NEXT: vredsum.vs v11, v14, v16
; RV64V-ONLY-NEXT: vmv.x.s a0, v8
; RV64V-ONLY-NEXT: vmv.x.s a1, v9
; RV64V-ONLY-NEXT: vmv.x.s a2, v10
-; RV64V-ONLY-NEXT: vmv.x.s a3, v11
+; RV64V-ONLY-NEXT: vredsum.vs v8, v14, v16
; RV64V-ONLY-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV64V-ONLY-NEXT: vmv.v.x v8, a0
-; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a1
-; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a2
-; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a3
+; RV64V-ONLY-NEXT: vslide1up.vx v9, v8, a2
+; RV64V-ONLY-NEXT: vslide1up.vx v10, v9, a1
+; RV64V-ONLY-NEXT: vslide1up.vx v8, v10, a0
; RV64V-ONLY-NEXT: ret
;
; RVA22U64-LABEL: buildvec_vredsum:
@@ -3502,16 +3498,14 @@ define <4 x i32> @buildvec_vredsum(<8 x i32> %arg0, <8 x i32> %arg1, <8 x i32> %
; RV64ZVE32-NEXT: vredsum.vs v8, v8, v16
; RV64ZVE32-NEXT: vredsum.vs v9, v10, v16
; RV64ZVE32-NEXT: vredsum.vs v10, v12, v16
-; RV64ZVE32-NEXT: vredsum.vs v11, v14, v16
; RV64ZVE32-NEXT: vmv.x.s a0, v8
; RV64ZVE32-NEXT: vmv.x.s a1, v9
; RV64ZVE32-NEXT: vmv.x.s a2, v10
-; RV64ZVE32-NEXT: vmv.x.s a3, v11
+; RV64ZVE32-NEXT: vredsum.vs v8, v14, v16
; RV64ZVE32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV64ZVE32-NEXT: vmv.v.x v8, a0
-; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a1
-; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a2
-; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a3
+; RV64ZVE32-NEXT: vslide1up.vx v9, v8, a2
+; RV64ZVE32-NEXT: vslide1up.vx v10, v9, a1
+; RV64ZVE32-NEXT: vslide1up.vx v8, v10, a0
; RV64ZVE32-NEXT: ret
%247 = tail call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %arg0)
%248 = insertelement <4 x i32> poison, i32 %247, i64 0
@@ -3531,16 +3525,14 @@ define <4 x i32> @buildvec_vredmax(<8 x i32> %arg0, <8 x i32> %arg1, <8 x i32> %
; RV32-NEXT: vredmaxu.vs v8, v8, v8
; RV32-NEXT: vredmaxu.vs v9, v10, v10
; RV32-NEXT: vredmaxu.vs v10, v12, v12
-; RV32-NEXT: vredmaxu.vs v11, v14, v14
; RV32-NEXT: vmv.x.s a0, v8
; RV32-NEXT: vmv.x.s a1, v9
; RV32-NEXT: vmv.x.s a2, v10
-; RV32-NEXT: vmv.x.s a3, v11
+; RV32-NEXT: vredmaxu.vs v8, v14, v14
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT: vmv.v.x v8, a0
-; RV32-NEXT: vslide1down.vx v8, v8, a1
-; RV32-NEXT: vslide1down.vx v8, v8, a2
-; RV32-NEXT: vslide1down.vx v8, v8, a3
+; RV32-NEXT: vslide1up.vx v9, v8, a2
+; RV32-NEXT: vslide1up.vx v10, v9, a1
+; RV32-NEXT: vslide1up.vx v8, v10, a0
; RV32-NEXT: ret
;
; RV64V-ONLY-LABEL: buildvec_vredmax:
@@ -3549,16 +3541,14 @@ define <4 x i32> @buildvec_vredmax(<8 x i32> %arg0, <8 x i32> %arg1, <8 x i32> %
; RV64V-ONLY-NEXT: vredmaxu.vs v8, v8, v8
; RV64V-ONLY-NEXT: vredmaxu.vs v9, v10, v10
; RV64V-ONLY-NEXT: vredmaxu.vs v10, v12, v12
-; RV64V-ONLY-NEXT: vredmaxu.vs v11, v14, v14
; RV64V-ONLY-NEXT: vmv.x.s a0, v8
; RV64V-ONLY-NEXT: vmv.x.s a1, v9
; RV64V-ONLY-NEXT: vmv.x.s a2, v10
-; RV64V-ONLY-NEXT: vmv.x.s a3, v11
+; RV64V-ONLY-NEXT: vredmaxu.vs v8, v14, v14
; RV64V-ONLY-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV64V-ONLY-NEXT: vmv.v.x v8, a0
-; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a1
-; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a2
-; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a3
+; RV64V-ONLY-NEXT: vslide1up.vx v9, v8, a2
+; RV64V-ONLY-NEXT: vslide1up.vx v10, v9, a1
+; RV64V-ONLY-NEXT: vslide1up.vx v8, v10, a0
; RV64V-ONLY-NEXT: ret
;
; RVA22U64-LABEL: buildvec_vredmax:
@@ -3605,16 +3595,14 @@ define <4 x i32> @buildvec_vredmax(<8 x i32> %arg0, <8 x i32> %arg1, <8 x i32> %
; RV64ZVE32-NEXT: vredmaxu.vs v8, v8, v8
; RV64ZVE32-NEXT: vredmaxu.vs v9, v10, v10
; RV64ZVE32-NEXT: vredmaxu.vs v10, v12, v12
-; RV64ZVE32-NEXT: vredmaxu.vs v11, v14, v14
; RV64ZVE32-NEXT: vmv.x.s a0, v8
; RV64ZVE32-NEXT: vmv.x.s a1, v9
; RV64ZVE32-NEXT: vmv.x.s a2, v10
-; RV64ZVE32-NEXT: vmv.x.s a3, v11
+; RV64ZVE32-NEXT: vredmaxu.vs v8, v14, v14
; RV64ZVE32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV64ZVE32-NEXT: vmv.v.x v8, a0
-; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a1
-; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a2
-; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a3
+; RV64ZVE32-NEXT: vslide1up.vx v9, v8, a2
+; RV64ZVE32-NEXT: vslide1up.vx v10, v9, a1
+; RV64ZVE32-NEXT: vslide1up.vx v8, v10, a0
; RV64ZVE32-NEXT: ret
%247 = tail call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> %arg0)
%248 = insertelement <4 x i32> poison, i32 %247, i64 0
diff --git a/llvm/test/CodeGen/RISCV/rvv/redundant-vfmvsf.ll b/llvm/test/CodeGen/RISCV/rvv/redundant-vfmvsf.ll
index da912bf401ec0..821d4240827fb 100644
--- a/llvm/test/CodeGen/RISCV/rvv/redundant-vfmvsf.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/redundant-vfmvsf.ll
@@ -9,12 +9,11 @@ define <2 x float> @redundant_vfmv(<2 x float> %arg0, <64 x float> %arg1, <64 x
; CHECK-NEXT: vfredusum.vs v9, v12, v8
; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
; CHECK-NEXT: vslidedown.vi v8, v8, 1
+; CHECK-NEXT: vfmv.f.s fa5, v9
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT: vfredusum.vs v8, v16, v8
-; CHECK-NEXT: vfmv.f.s fa5, v8
+; CHECK-NEXT: vfredusum.vs v9, v16, v8
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT: vrgather.vi v8, v9, 0
-; CHECK-NEXT: vfslide1down.vf v8, v8, fa5
+; CHECK-NEXT: vfslide1up.vf v8, v9, fa5
; CHECK-NEXT: ret
%s0 = extractelement <2 x float> %arg0, i64 0
%r0 = tail call reassoc float @llvm.vector.reduce.fadd.v64f32(float %s0, <64 x float> %arg1)
>From 217402a2664de789b43ad1709e4b9785cd418fd0 Mon Sep 17 00:00:00 2001
From: Min-Yih Hsu <min.hsu at sifive.com>
Date: Wed, 20 Aug 2025 10:52:49 -0700
Subject: [PATCH 3/4] fixup! Generalize this into non-reduction operations as
well
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 38 +++++++++----------
.../RISCV/rvv/fixed-vectors-fp-buildvec.ll | 16 ++++----
2 files changed, 24 insertions(+), 30 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index ce6fc8425856a..d7ce0dcbb4295 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -4524,40 +4524,36 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
// General case: splat the first operand and sliding other operands down one
// by one to form a vector. Alternatively, if the last operand is an
- // extraction from a reduction result, we can use the original vector
+ // extraction from element 0 of a vector, we can use the original vector
// reduction result as the start value and slide up instead of slide down.
// Such that we can avoid the splat.
SmallVector<SDValue> Operands(Op->op_begin(), Op->op_end());
- SDValue Reduce;
+ SDValue EVec;
bool SlideUp = false;
// Find the first first non-undef from the tail.
auto ItLastNonUndef = find_if(Operands.rbegin(), Operands.rend(),
[](SDValue V) { return !V.isUndef(); });
if (ItLastNonUndef != Operands.rend()) {
using namespace SDPatternMatch;
- // Check if the last non-undef operand was extracted from a reduction.
- for (unsigned Opc :
- {RISCVISD::VECREDUCE_ADD_VL, RISCVISD::VECREDUCE_UMAX_VL,
- RISCVISD::VECREDUCE_SMAX_VL, RISCVISD::VECREDUCE_UMIN_VL,
- RISCVISD::VECREDUCE_SMIN_VL, RISCVISD::VECREDUCE_AND_VL,
- RISCVISD::VECREDUCE_OR_VL, RISCVISD::VECREDUCE_XOR_VL,
- RISCVISD::VECREDUCE_FADD_VL, RISCVISD::VECREDUCE_SEQ_FADD_VL,
- RISCVISD::VECREDUCE_FMAX_VL, RISCVISD::VECREDUCE_FMIN_VL}) {
- SlideUp = sd_match(
- *ItLastNonUndef,
- m_ExtractElt(m_AllOf(m_Opc(Opc), m_Value(Reduce)), m_Zero()));
- if (SlideUp)
- break;
- }
+ // Check if the last non-undef operand was an extraction.
+ SlideUp = sd_match(*ItLastNonUndef, m_ExtractElt(m_Value(EVec), m_Zero()));
}
if (SlideUp) {
- // Adapt Reduce's type into ContainerVT.
- if (Reduce.getValueType().getVectorMinNumElements() <
+ MVT EVecContainerVT = EVec.getSimpleValueType();
+ // Make sure the original vector has scalable vector type.
+ if (EVecContainerVT.isFixedLengthVector()) {
+ EVecContainerVT =
+ getContainerForFixedLengthVector(DAG, EVecContainerVT, Subtarget);
+ EVec = convertToScalableVector(EVecContainerVT, EVec, DAG, Subtarget);
+ }
+
+ // Adapt EVec's type into ContainerVT.
+ if (EVecContainerVT.getVectorMinNumElements() <
ContainerVT.getVectorMinNumElements())
- Reduce = DAG.getInsertSubvector(DL, DAG.getUNDEF(ContainerVT), Reduce, 0);
+ EVec = DAG.getInsertSubvector(DL, DAG.getUNDEF(ContainerVT), EVec, 0);
else
- Reduce = DAG.getExtractSubvector(DL, ContainerVT, Reduce, 0);
+ EVec = DAG.getExtractSubvector(DL, ContainerVT, EVec, 0);
// Reverse the elements as we're going to slide up from the last element.
for (unsigned i = 0U, N = Operands.size(), H = divideCeil(N, 2); i < H; ++i)
@@ -4577,7 +4573,7 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
// prior value of our temporary register.
if (!Vec) {
if (SlideUp) {
- Vec = Reduce;
+ Vec = EVec;
} else {
Vec = DAG.getSplatVector(VT, DL, V);
Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
index 972e565ba8657..face756283c33 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
@@ -1833,15 +1833,13 @@ define <8 x double> @buildvec_slideup(<4 x double> %v, double %e0, double %e1, d
; CHECK-LABEL: buildvec_slideup:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e64, m2, ta, ma
-; CHECK-NEXT: vfmv.f.s ft0, v8
-; CHECK-NEXT: vfmv.v.f v8, fa0
-; CHECK-NEXT: vfslide1down.vf v8, v8, fa1
-; CHECK-NEXT: vfslide1down.vf v8, v8, fa2
-; CHECK-NEXT: vfslide1down.vf v8, v8, fa3
-; CHECK-NEXT: vfslide1down.vf v8, v8, fa4
-; CHECK-NEXT: vfslide1down.vf v8, v8, fa5
-; CHECK-NEXT: vfslide1down.vf v8, v8, fa6
-; CHECK-NEXT: vfslide1down.vf v8, v8, ft0
+; CHECK-NEXT: vfslide1up.vf v10, v8, fa6
+; CHECK-NEXT: vfslide1up.vf v8, v10, fa5
+; CHECK-NEXT: vfslide1up.vf v10, v8, fa4
+; CHECK-NEXT: vfslide1up.vf v8, v10, fa3
+; CHECK-NEXT: vfslide1up.vf v10, v8, fa2
+; CHECK-NEXT: vfslide1up.vf v12, v10, fa1
+; CHECK-NEXT: vfslide1up.vf v8, v12, fa0
; CHECK-NEXT: ret
%v0 = insertelement <8 x double> poison, double %e0, i64 0
%v1 = insertelement <8 x double> %v0, double %e1, i64 1
>From 3dec8ff38e69fa1400f30f54b58cc2aa92aade6f Mon Sep 17 00:00:00 2001
From: Min-Yih Hsu <min.hsu at sifive.com>
Date: Wed, 20 Aug 2025 13:41:14 -0700
Subject: [PATCH 4/4] fixup! Use std::reverse
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index d7ce0dcbb4295..f358c3e77e1db 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -4556,8 +4556,7 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
EVec = DAG.getExtractSubvector(DL, ContainerVT, EVec, 0);
// Reverse the elements as we're going to slide up from the last element.
- for (unsigned i = 0U, N = Operands.size(), H = divideCeil(N, 2); i < H; ++i)
- std::swap(Operands[i], Operands[N - 1 - i]);
+ std::reverse(Operands.begin(), Operands.end());
}
SDValue Vec;
More information about the llvm-commits
mailing list