[llvm] [RISCV] Use slideup to lower build_vector when all operand are (extract_element X, 0) (PR #154450)

Tue Sep 2 09:44:35 PDT 2025

https://github.com/mshockwave updated https://github.com/llvm/llvm-project/pull/154450

>From f74a607fb7a6d3a632cf40a0abcac3078f9cc73b Mon Sep 17 00:00:00 2001
From: Min-Yih Hsu <min.hsu at sifive.com>
Date: Tue, 19 Aug 2025 17:24:30 -0700
Subject: [PATCH 1/9] Pre-commit tests

---
 .../RISCV/rvv/fixed-vectors-fp-buildvec.ll    | 111 +++++++++
 .../RISCV/rvv/fixed-vectors-int-buildvec.ll   | 211 ++++++++++++++++++
 2 files changed, 322 insertions(+)

diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
index 3c3e08d387faa..d4d4ed682f6cf 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
@@ -1828,3 +1828,114 @@ define <8 x double> @buildvec_v8f64_zvl512(double %e0, double %e1, double %e2, d
   %v7 = insertelement <8 x double> %v6, double %e7, i64 7
   ret <8 x double> %v7
 }
+
+define <8 x double> @buildvec_slideup(<4 x double> %v, double %e0, double %e1, double %e2, double %e3, double %e4, double %e5, double %e6) vscale_range(4, 128) {
+; CHECK-LABEL: buildvec_slideup:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e64, m2, ta, ma
+; CHECK-NEXT:    vfmv.f.s ft0, v8
+; CHECK-NEXT:    vfmv.v.f v8, fa0
+; CHECK-NEXT:    vfslide1down.vf v8, v8, fa1
+; CHECK-NEXT:    vfslide1down.vf v8, v8, fa2
+; CHECK-NEXT:    vfslide1down.vf v8, v8, fa3
+; CHECK-NEXT:    vfslide1down.vf v8, v8, fa4
+; CHECK-NEXT:    vfslide1down.vf v8, v8, fa5
+; CHECK-NEXT:    vfslide1down.vf v8, v8, fa6
+; CHECK-NEXT:    vfslide1down.vf v8, v8, ft0
+; CHECK-NEXT:    ret
+  %v0 = insertelement <8 x double> poison, double %e0, i64 0
+  %v1 = insertelement <8 x double> %v0, double %e1, i64 1
+  %v2 = insertelement <8 x double> %v1, double %e2, i64 2
+  %v3 = insertelement <8 x double> %v2, double %e3, i64 3
+  %v4 = insertelement <8 x double> %v3, double %e4, i64 4
+  %v5 = insertelement <8 x double> %v4, double %e5, i64 5
+  %v6 = insertelement <8 x double> %v5, double %e6, i64 6
+  %e7 = extractelement <4 x double> %v, i64 0
+  %v7 = insertelement <8 x double> %v6, double %e7, i64 7
+  ret <8 x double> %v7
+}
+
+; Negative test for slideup lowering where the extract_element was not build_vector's last operand.
+define <8 x double> @buildvec_slideup_not_last_element(<4 x double> %v, double %e0, double %e1, double %e2, double %e3, double %e4, double %e5, double %e7) vscale_range(4, 128) {
+; CHECK-LABEL: buildvec_slideup_not_last_element:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e64, m2, ta, ma
+; CHECK-NEXT:    vfmv.f.s ft0, v8
+; CHECK-NEXT:    vfmv.v.f v8, fa0
+; CHECK-NEXT:    vfslide1down.vf v8, v8, fa1
+; CHECK-NEXT:    vfslide1down.vf v8, v8, fa2
+; CHECK-NEXT:    vfslide1down.vf v8, v8, fa3
+; CHECK-NEXT:    vfslide1down.vf v8, v8, fa4
+; CHECK-NEXT:    vfslide1down.vf v8, v8, fa5
+; CHECK-NEXT:    vfslide1down.vf v8, v8, ft0
+; CHECK-NEXT:    vfslide1down.vf v8, v8, fa6
+; CHECK-NEXT:    ret
+  %v0 = insertelement <8 x double> poison, double %e0, i64 0
+  %v1 = insertelement <8 x double> %v0, double %e1, i64 1
+  %v2 = insertelement <8 x double> %v1, double %e2, i64 2
+  %v3 = insertelement <8 x double> %v2, double %e3, i64 3
+  %v4 = insertelement <8 x double> %v3, double %e4, i64 4
+  %v5 = insertelement <8 x double> %v4, double %e5, i64 5
+  %e6 = extractelement <4 x double> %v, i64 0
+  %v6 = insertelement <8 x double> %v5, double %e6, i64 6
+  %v7 = insertelement <8 x double> %v6, double %e7, i64 7
+  ret <8 x double> %v7
+}
+
+define <4 x float> @buildvec_vfredusum(float %start, <8 x float> %arg1, <8 x float> %arg2, <8 x float> %arg3, <8 x float> %arg4) nounwind {
+; CHECK-LABEL: buildvec_vfredusum:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vfmv.s.f v16, fa0
+; CHECK-NEXT:    vfredusum.vs v8, v8, v16
+; CHECK-NEXT:    vfredusum.vs v9, v10, v16
+; CHECK-NEXT:    vfredusum.vs v10, v12, v16
+; CHECK-NEXT:    vfredusum.vs v11, v14, v16
+; CHECK-NEXT:    vfmv.f.s fa5, v9
+; CHECK-NEXT:    vfmv.f.s fa4, v10
+; CHECK-NEXT:    vfmv.f.s fa3, v11
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT:    vrgather.vi v9, v8, 0
+; CHECK-NEXT:    vfslide1down.vf v8, v9, fa5
+; CHECK-NEXT:    vfslide1down.vf v8, v8, fa4
+; CHECK-NEXT:    vfslide1down.vf v8, v8, fa3
+; CHECK-NEXT:    ret
+  %247 = tail call reassoc float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %arg1)
+  %248 = insertelement <4 x float> poison, float %247, i64 0
+  %250 = tail call reassoc float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %arg2)
+  %251 = insertelement <4 x float> %248, float %250, i64 1
+  %252 = tail call reassoc float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %arg3)
+  %253 = insertelement <4 x float> %251, float %252, i64 2
+  %254 = tail call reassoc float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %arg4)
+  %255 = insertelement <4 x float> %253, float %254, i64 3
+  ret <4 x float> %255
+}
+
+define <4 x float> @buildvec_vfredosum(float %start, <8 x float> %arg1, <8 x float> %arg2, <8 x float> %arg3, <8 x float> %arg4) nounwind {
+; CHECK-LABEL: buildvec_vfredosum:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vfmv.s.f v16, fa0
+; CHECK-NEXT:    vfredosum.vs v8, v8, v16
+; CHECK-NEXT:    vfredosum.vs v9, v10, v16
+; CHECK-NEXT:    vfredosum.vs v10, v12, v16
+; CHECK-NEXT:    vfredosum.vs v11, v14, v16
+; CHECK-NEXT:    vfmv.f.s fa5, v9
+; CHECK-NEXT:    vfmv.f.s fa4, v10
+; CHECK-NEXT:    vfmv.f.s fa3, v11
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT:    vrgather.vi v9, v8, 0
+; CHECK-NEXT:    vfslide1down.vf v8, v9, fa5
+; CHECK-NEXT:    vfslide1down.vf v8, v8, fa4
+; CHECK-NEXT:    vfslide1down.vf v8, v8, fa3
+; CHECK-NEXT:    ret
+  %247 = tail call float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %arg1)
+  %248 = insertelement <4 x float> poison, float %247, i64 0
+  %250 = tail call float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %arg2)
+  %251 = insertelement <4 x float> %248, float %250, i64 1
+  %252 = tail call float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %arg3)
+  %253 = insertelement <4 x float> %251, float %252, i64 2
+  %254 = tail call float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %arg4)
+  %255 = insertelement <4 x float> %253, float %254, i64 3
+  ret <4 x float> %255
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
index d9bb007a10f71..d9122676e805f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
@@ -3416,5 +3416,216 @@ define <4 x i1> @buildvec_i1_splat(i1 %e1) {
   ret <4 x i1> %v4
 }
 
+define <4 x i32> @buildvec_vredsum(<8 x i32> %arg0, <8 x i32> %arg1, <8 x i32> %arg2, <8 x i32> %arg3) nounwind {
+; RV32-LABEL: buildvec_vredsum:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; RV32-NEXT:    vmv.s.x v16, zero
+; RV32-NEXT:    vredsum.vs v8, v8, v16
+; RV32-NEXT:    vredsum.vs v9, v10, v16
+; RV32-NEXT:    vredsum.vs v10, v12, v16
+; RV32-NEXT:    vredsum.vs v11, v14, v16
+; RV32-NEXT:    vmv.x.s a0, v8
+; RV32-NEXT:    vmv.x.s a1, v9
+; RV32-NEXT:    vmv.x.s a2, v10
+; RV32-NEXT:    vmv.x.s a3, v11
+; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; RV32-NEXT:    vmv.v.x v8, a0
+; RV32-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-NEXT:    vslide1down.vx v8, v8, a3
+; RV32-NEXT:    ret
+;
+; RV64V-ONLY-LABEL: buildvec_vredsum:
+; RV64V-ONLY:       # %bb.0:
+; RV64V-ONLY-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; RV64V-ONLY-NEXT:    vmv.s.x v16, zero
+; RV64V-ONLY-NEXT:    vredsum.vs v8, v8, v16
+; RV64V-ONLY-NEXT:    vredsum.vs v9, v10, v16
+; RV64V-ONLY-NEXT:    vredsum.vs v10, v12, v16
+; RV64V-ONLY-NEXT:    vredsum.vs v11, v14, v16
+; RV64V-ONLY-NEXT:    vmv.x.s a0, v8
+; RV64V-ONLY-NEXT:    vmv.x.s a1, v9
+; RV64V-ONLY-NEXT:    vmv.x.s a2, v10
+; RV64V-ONLY-NEXT:    vmv.x.s a3, v11
+; RV64V-ONLY-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; RV64V-ONLY-NEXT:    vmv.v.x v8, a0
+; RV64V-ONLY-NEXT:    vslide1down.vx v8, v8, a1
+; RV64V-ONLY-NEXT:    vslide1down.vx v8, v8, a2
+; RV64V-ONLY-NEXT:    vslide1down.vx v8, v8, a3
+; RV64V-ONLY-NEXT:    ret
+;
+; RVA22U64-LABEL: buildvec_vredsum:
+; RVA22U64:       # %bb.0:
+; RVA22U64-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; RVA22U64-NEXT:    vmv.s.x v16, zero
+; RVA22U64-NEXT:    vredsum.vs v8, v8, v16
+; RVA22U64-NEXT:    vredsum.vs v9, v10, v16
+; RVA22U64-NEXT:    vredsum.vs v10, v12, v16
+; RVA22U64-NEXT:    vredsum.vs v11, v14, v16
+; RVA22U64-NEXT:    vmv.x.s a0, v8
+; RVA22U64-NEXT:    vmv.x.s a1, v9
+; RVA22U64-NEXT:    vmv.x.s a2, v10
+; RVA22U64-NEXT:    slli a1, a1, 32
+; RVA22U64-NEXT:    add.uw a0, a0, a1
+; RVA22U64-NEXT:    vmv.x.s a1, v11
+; RVA22U64-NEXT:    slli a1, a1, 32
+; RVA22U64-NEXT:    add.uw a1, a2, a1
+; RVA22U64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RVA22U64-NEXT:    vmv.v.x v8, a0
+; RVA22U64-NEXT:    vslide1down.vx v8, v8, a1
+; RVA22U64-NEXT:    ret
+;
+; RVA22U64-PACK-LABEL: buildvec_vredsum:
+; RVA22U64-PACK:       # %bb.0:
+; RVA22U64-PACK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; RVA22U64-PACK-NEXT:    vmv.s.x v16, zero
+; RVA22U64-PACK-NEXT:    vredsum.vs v8, v8, v16
+; RVA22U64-PACK-NEXT:    vredsum.vs v9, v10, v16
+; RVA22U64-PACK-NEXT:    vredsum.vs v10, v12, v16
+; RVA22U64-PACK-NEXT:    vredsum.vs v11, v14, v16
+; RVA22U64-PACK-NEXT:    vmv.x.s a0, v8
+; RVA22U64-PACK-NEXT:    vmv.x.s a1, v9
+; RVA22U64-PACK-NEXT:    vmv.x.s a2, v10
+; RVA22U64-PACK-NEXT:    pack a0, a0, a1
+; RVA22U64-PACK-NEXT:    vmv.x.s a1, v11
+; RVA22U64-PACK-NEXT:    pack a1, a2, a1
+; RVA22U64-PACK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RVA22U64-PACK-NEXT:    vmv.v.x v8, a0
+; RVA22U64-PACK-NEXT:    vslide1down.vx v8, v8, a1
+; RVA22U64-PACK-NEXT:    ret
+;
+; RV64ZVE32-LABEL: buildvec_vredsum:
+; RV64ZVE32:       # %bb.0:
+; RV64ZVE32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; RV64ZVE32-NEXT:    vmv.s.x v16, zero
+; RV64ZVE32-NEXT:    vredsum.vs v8, v8, v16
+; RV64ZVE32-NEXT:    vredsum.vs v9, v10, v16
+; RV64ZVE32-NEXT:    vredsum.vs v10, v12, v16
+; RV64ZVE32-NEXT:    vredsum.vs v11, v14, v16
+; RV64ZVE32-NEXT:    vmv.x.s a0, v8
+; RV64ZVE32-NEXT:    vmv.x.s a1, v9
+; RV64ZVE32-NEXT:    vmv.x.s a2, v10
+; RV64ZVE32-NEXT:    vmv.x.s a3, v11
+; RV64ZVE32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; RV64ZVE32-NEXT:    vmv.v.x v8, a0
+; RV64ZVE32-NEXT:    vslide1down.vx v8, v8, a1
+; RV64ZVE32-NEXT:    vslide1down.vx v8, v8, a2
+; RV64ZVE32-NEXT:    vslide1down.vx v8, v8, a3
+; RV64ZVE32-NEXT:    ret
+  %247 = tail call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %arg0)
+  %248 = insertelement <4 x i32> poison, i32 %247, i64 0
+  %250 = tail call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %arg1)
+  %251 = insertelement <4 x i32> %248, i32 %250, i64 1
+  %252 = tail call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %arg2)
+  %253 = insertelement <4 x i32> %251, i32 %252, i64 2
+  %254 = tail call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %arg3)
+  %255 = insertelement <4 x i32> %253, i32 %254, i64 3
+  ret <4 x i32> %255
+}
+
+define <4 x i32> @buildvec_vredmax(<8 x i32> %arg0, <8 x i32> %arg1, <8 x i32> %arg2, <8 x i32> %arg3) nounwind {
+; RV32-LABEL: buildvec_vredmax:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; RV32-NEXT:    vredmaxu.vs v8, v8, v8
+; RV32-NEXT:    vredmaxu.vs v9, v10, v10
+; RV32-NEXT:    vredmaxu.vs v10, v12, v12
+; RV32-NEXT:    vredmaxu.vs v11, v14, v14
+; RV32-NEXT:    vmv.x.s a0, v8
+; RV32-NEXT:    vmv.x.s a1, v9
+; RV32-NEXT:    vmv.x.s a2, v10
+; RV32-NEXT:    vmv.x.s a3, v11
+; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; RV32-NEXT:    vmv.v.x v8, a0
+; RV32-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-NEXT:    vslide1down.vx v8, v8, a3
+; RV32-NEXT:    ret
+;
+; RV64V-ONLY-LABEL: buildvec_vredmax:
+; RV64V-ONLY:       # %bb.0:
+; RV64V-ONLY-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; RV64V-ONLY-NEXT:    vredmaxu.vs v8, v8, v8
+; RV64V-ONLY-NEXT:    vredmaxu.vs v9, v10, v10
+; RV64V-ONLY-NEXT:    vredmaxu.vs v10, v12, v12
+; RV64V-ONLY-NEXT:    vredmaxu.vs v11, v14, v14
+; RV64V-ONLY-NEXT:    vmv.x.s a0, v8
+; RV64V-ONLY-NEXT:    vmv.x.s a1, v9
+; RV64V-ONLY-NEXT:    vmv.x.s a2, v10
+; RV64V-ONLY-NEXT:    vmv.x.s a3, v11
+; RV64V-ONLY-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; RV64V-ONLY-NEXT:    vmv.v.x v8, a0
+; RV64V-ONLY-NEXT:    vslide1down.vx v8, v8, a1
+; RV64V-ONLY-NEXT:    vslide1down.vx v8, v8, a2
+; RV64V-ONLY-NEXT:    vslide1down.vx v8, v8, a3
+; RV64V-ONLY-NEXT:    ret
+;
+; RVA22U64-LABEL: buildvec_vredmax:
+; RVA22U64:       # %bb.0:
+; RVA22U64-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; RVA22U64-NEXT:    vredmaxu.vs v8, v8, v8
+; RVA22U64-NEXT:    vredmaxu.vs v9, v10, v10
+; RVA22U64-NEXT:    vredmaxu.vs v10, v12, v12
+; RVA22U64-NEXT:    vredmaxu.vs v11, v14, v14
+; RVA22U64-NEXT:    vmv.x.s a0, v8
+; RVA22U64-NEXT:    vmv.x.s a1, v9
+; RVA22U64-NEXT:    vmv.x.s a2, v10
+; RVA22U64-NEXT:    slli a1, a1, 32
+; RVA22U64-NEXT:    add.uw a0, a0, a1
+; RVA22U64-NEXT:    vmv.x.s a1, v11
+; RVA22U64-NEXT:    slli a1, a1, 32
+; RVA22U64-NEXT:    add.uw a1, a2, a1
+; RVA22U64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RVA22U64-NEXT:    vmv.v.x v8, a0
+; RVA22U64-NEXT:    vslide1down.vx v8, v8, a1
+; RVA22U64-NEXT:    ret
+;
+; RVA22U64-PACK-LABEL: buildvec_vredmax:
+; RVA22U64-PACK:       # %bb.0:
+; RVA22U64-PACK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; RVA22U64-PACK-NEXT:    vredmaxu.vs v8, v8, v8
+; RVA22U64-PACK-NEXT:    vredmaxu.vs v9, v10, v10
+; RVA22U64-PACK-NEXT:    vredmaxu.vs v10, v12, v12
+; RVA22U64-PACK-NEXT:    vredmaxu.vs v11, v14, v14
+; RVA22U64-PACK-NEXT:    vmv.x.s a0, v8
+; RVA22U64-PACK-NEXT:    vmv.x.s a1, v9
+; RVA22U64-PACK-NEXT:    vmv.x.s a2, v10
+; RVA22U64-PACK-NEXT:    pack a0, a0, a1
+; RVA22U64-PACK-NEXT:    vmv.x.s a1, v11
+; RVA22U64-PACK-NEXT:    pack a1, a2, a1
+; RVA22U64-PACK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RVA22U64-PACK-NEXT:    vmv.v.x v8, a0
+; RVA22U64-PACK-NEXT:    vslide1down.vx v8, v8, a1
+; RVA22U64-PACK-NEXT:    ret
+;
+; RV64ZVE32-LABEL: buildvec_vredmax:
+; RV64ZVE32:       # %bb.0:
+; RV64ZVE32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; RV64ZVE32-NEXT:    vredmaxu.vs v8, v8, v8
+; RV64ZVE32-NEXT:    vredmaxu.vs v9, v10, v10
+; RV64ZVE32-NEXT:    vredmaxu.vs v10, v12, v12
+; RV64ZVE32-NEXT:    vredmaxu.vs v11, v14, v14
+; RV64ZVE32-NEXT:    vmv.x.s a0, v8
+; RV64ZVE32-NEXT:    vmv.x.s a1, v9
+; RV64ZVE32-NEXT:    vmv.x.s a2, v10
+; RV64ZVE32-NEXT:    vmv.x.s a3, v11
+; RV64ZVE32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; RV64ZVE32-NEXT:    vmv.v.x v8, a0
+; RV64ZVE32-NEXT:    vslide1down.vx v8, v8, a1
+; RV64ZVE32-NEXT:    vslide1down.vx v8, v8, a2
+; RV64ZVE32-NEXT:    vslide1down.vx v8, v8, a3
+; RV64ZVE32-NEXT:    ret
+  %247 = tail call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> %arg0)
+  %248 = insertelement <4 x i32> poison, i32 %247, i64 0
+  %250 = tail call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> %arg1)
+  %251 = insertelement <4 x i32> %248, i32 %250, i64 1
+  %252 = tail call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> %arg2)
+  %253 = insertelement <4 x i32> %251, i32 %252, i64 2
+  %254 = tail call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> %arg3)
+  %255 = insertelement <4 x i32> %253, i32 %254, i64 3
+  ret <4 x i32> %255
+}
+
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 ; RV64: {{.*}}

>From c5b56c25754c5fd684ca69aa63cf603b7b235339 Mon Sep 17 00:00:00 2001
From: Min-Yih Hsu <min.hsu at sifive.com>
Date: Tue, 19 Aug 2025 13:34:36 -0700
Subject: [PATCH 2/9] [RISCV] Use slideup when the last build_vector operand is
 a reduction

---
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp   | 85 ++++++++++++++++---
 .../RISCV/rvv/fixed-vectors-fp-buildvec.ll    | 30 +++----
 .../RISCV/rvv/fixed-vectors-int-buildvec.ll   | 60 ++++++-------
 .../CodeGen/RISCV/rvv/redundant-vfmvsf.ll     |  7 +-
 4 files changed, 114 insertions(+), 68 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 4a1db80076530..ce6fc8425856a 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -4512,33 +4512,94 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
          "Illegal type which will result in reserved encoding");
 
   const unsigned Policy = RISCVVType::TAIL_AGNOSTIC | RISCVVType::MASK_AGNOSTIC;
+  auto getVSlide = [&](bool SlideUp, EVT ContainerVT, SDValue Passthru,
+                       SDValue Vec, SDValue Offset, SDValue Mask,
+                       SDValue VL) -> SDValue {
+    if (SlideUp)
+      return getVSlideup(DAG, Subtarget, DL, ContainerVT, Passthru, Vec, Offset,
+                         Mask, VL, Policy);
+    return getVSlidedown(DAG, Subtarget, DL, ContainerVT, Passthru, Vec, Offset,
+                         Mask, VL, Policy);
+  };
+
+  // General case: splat the first operand and sliding other operands down one
+  // by one to form a vector. Alternatively, if the last operand is an
+  // extraction from a reduction result, we can use the original vector
+  // reduction result as the start value and slide up instead of slide down.
+  // Such that we can avoid the splat.
+  SmallVector<SDValue> Operands(Op->op_begin(), Op->op_end());
+  SDValue Reduce;
+  bool SlideUp = false;
+  // Find the first first non-undef from the tail.
+  auto ItLastNonUndef = find_if(Operands.rbegin(), Operands.rend(),
+                                [](SDValue V) { return !V.isUndef(); });
+  if (ItLastNonUndef != Operands.rend()) {
+    using namespace SDPatternMatch;
+    // Check if the last non-undef operand was extracted from a reduction.
+    for (unsigned Opc :
+         {RISCVISD::VECREDUCE_ADD_VL, RISCVISD::VECREDUCE_UMAX_VL,
+          RISCVISD::VECREDUCE_SMAX_VL, RISCVISD::VECREDUCE_UMIN_VL,
+          RISCVISD::VECREDUCE_SMIN_VL, RISCVISD::VECREDUCE_AND_VL,
+          RISCVISD::VECREDUCE_OR_VL, RISCVISD::VECREDUCE_XOR_VL,
+          RISCVISD::VECREDUCE_FADD_VL, RISCVISD::VECREDUCE_SEQ_FADD_VL,
+          RISCVISD::VECREDUCE_FMAX_VL, RISCVISD::VECREDUCE_FMIN_VL}) {
+      SlideUp = sd_match(
+          *ItLastNonUndef,
+          m_ExtractElt(m_AllOf(m_Opc(Opc), m_Value(Reduce)), m_Zero()));
+      if (SlideUp)
+        break;
+    }
+  }
+
+  if (SlideUp) {
+    // Adapt Reduce's type into ContainerVT.
+    if (Reduce.getValueType().getVectorMinNumElements() <
+        ContainerVT.getVectorMinNumElements())
+      Reduce = DAG.getInsertSubvector(DL, DAG.getUNDEF(ContainerVT), Reduce, 0);
+    else
+      Reduce = DAG.getExtractSubvector(DL, ContainerVT, Reduce, 0);
+
+    // Reverse the elements as we're going to slide up from the last element.
+    for (unsigned i = 0U, N = Operands.size(), H = divideCeil(N, 2); i < H; ++i)
+      std::swap(Operands[i], Operands[N - 1 - i]);
+  }
 
   SDValue Vec;
   UndefCount = 0;
-  for (SDValue V : Op->ops()) {
+  for (SDValue V : Operands) {
     if (V.isUndef()) {
       UndefCount++;
       continue;
     }
 
-    // Start our sequence with a TA splat in the hopes that hardware is able to
-    // recognize there's no dependency on the prior value of our temporary
-    // register.
+    // Start our sequence with either a TA splat or a reduction result in the
+    // hopes that hardware is able to recognize there's no dependency on the
+    // prior value of our temporary register.
     if (!Vec) {
-      Vec = DAG.getSplatVector(VT, DL, V);
-      Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
+      if (SlideUp) {
+        Vec = Reduce;
+      } else {
+        Vec = DAG.getSplatVector(VT, DL, V);
+        Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
+      }
+
       UndefCount = 0;
       continue;
     }
 
     if (UndefCount) {
       const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
-      Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
-                          Vec, Offset, Mask, VL, Policy);
+      Vec = getVSlide(SlideUp, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
+                      Offset, Mask, VL);
       UndefCount = 0;
     }
-    auto OpCode =
-      VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL;
+
+    unsigned OpCode;
+    if (VT.isFloatingPoint())
+      OpCode = SlideUp ? RISCVISD::VFSLIDE1UP_VL : RISCVISD::VFSLIDE1DOWN_VL;
+    else
+      OpCode = SlideUp ? RISCVISD::VSLIDE1UP_VL : RISCVISD::VSLIDE1DOWN_VL;
+
     if (!VT.isFloatingPoint())
       V = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), V);
     Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
@@ -4546,8 +4607,8 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
   }
   if (UndefCount) {
     const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
-    Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
-                        Vec, Offset, Mask, VL, Policy);
+    Vec = getVSlide(SlideUp, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
+                    Offset, Mask, VL);
   }
   return convertFromScalableVector(VT, Vec, DAG, Subtarget);
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
index d4d4ed682f6cf..972e565ba8657 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
@@ -1890,15 +1890,14 @@ define <4 x float> @buildvec_vfredusum(float %start, <8 x float> %arg1, <8 x flo
 ; CHECK-NEXT:    vfredusum.vs v8, v8, v16
 ; CHECK-NEXT:    vfredusum.vs v9, v10, v16
 ; CHECK-NEXT:    vfredusum.vs v10, v12, v16
-; CHECK-NEXT:    vfredusum.vs v11, v14, v16
-; CHECK-NEXT:    vfmv.f.s fa5, v9
-; CHECK-NEXT:    vfmv.f.s fa4, v10
-; CHECK-NEXT:    vfmv.f.s fa3, v11
+; CHECK-NEXT:    vfmv.f.s fa5, v8
+; CHECK-NEXT:    vfmv.f.s fa4, v9
+; CHECK-NEXT:    vfmv.f.s fa3, v10
+; CHECK-NEXT:    vfredusum.vs v8, v14, v16
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vrgather.vi v9, v8, 0
-; CHECK-NEXT:    vfslide1down.vf v8, v9, fa5
-; CHECK-NEXT:    vfslide1down.vf v8, v8, fa4
-; CHECK-NEXT:    vfslide1down.vf v8, v8, fa3
+; CHECK-NEXT:    vfslide1up.vf v9, v8, fa3
+; CHECK-NEXT:    vfslide1up.vf v10, v9, fa4
+; CHECK-NEXT:    vfslide1up.vf v8, v10, fa5
 ; CHECK-NEXT:    ret
   %247 = tail call reassoc float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %arg1)
   %248 = insertelement <4 x float> poison, float %247, i64 0
@@ -1919,15 +1918,14 @@ define <4 x float> @buildvec_vfredosum(float %start, <8 x float> %arg1, <8 x flo
 ; CHECK-NEXT:    vfredosum.vs v8, v8, v16
 ; CHECK-NEXT:    vfredosum.vs v9, v10, v16
 ; CHECK-NEXT:    vfredosum.vs v10, v12, v16
-; CHECK-NEXT:    vfredosum.vs v11, v14, v16
-; CHECK-NEXT:    vfmv.f.s fa5, v9
-; CHECK-NEXT:    vfmv.f.s fa4, v10
-; CHECK-NEXT:    vfmv.f.s fa3, v11
+; CHECK-NEXT:    vfmv.f.s fa5, v8
+; CHECK-NEXT:    vfmv.f.s fa4, v9
+; CHECK-NEXT:    vfmv.f.s fa3, v10
+; CHECK-NEXT:    vfredosum.vs v8, v14, v16
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vrgather.vi v9, v8, 0
-; CHECK-NEXT:    vfslide1down.vf v8, v9, fa5
-; CHECK-NEXT:    vfslide1down.vf v8, v8, fa4
-; CHECK-NEXT:    vfslide1down.vf v8, v8, fa3
+; CHECK-NEXT:    vfslide1up.vf v9, v8, fa3
+; CHECK-NEXT:    vfslide1up.vf v10, v9, fa4
+; CHECK-NEXT:    vfslide1up.vf v8, v10, fa5
 ; CHECK-NEXT:    ret
   %247 = tail call float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %arg1)
   %248 = insertelement <4 x float> poison, float %247, i64 0
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
index d9122676e805f..a02117fdd2833 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
@@ -3424,16 +3424,14 @@ define <4 x i32> @buildvec_vredsum(<8 x i32> %arg0, <8 x i32> %arg1, <8 x i32> %
 ; RV32-NEXT:    vredsum.vs v8, v8, v16
 ; RV32-NEXT:    vredsum.vs v9, v10, v16
 ; RV32-NEXT:    vredsum.vs v10, v12, v16
-; RV32-NEXT:    vredsum.vs v11, v14, v16
 ; RV32-NEXT:    vmv.x.s a0, v8
 ; RV32-NEXT:    vmv.x.s a1, v9
 ; RV32-NEXT:    vmv.x.s a2, v10
-; RV32-NEXT:    vmv.x.s a3, v11
+; RV32-NEXT:    vredsum.vs v8, v14, v16
 ; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT:    vmv.v.x v8, a0
-; RV32-NEXT:    vslide1down.vx v8, v8, a1
-; RV32-NEXT:    vslide1down.vx v8, v8, a2
-; RV32-NEXT:    vslide1down.vx v8, v8, a3
+; RV32-NEXT:    vslide1up.vx v9, v8, a2
+; RV32-NEXT:    vslide1up.vx v10, v9, a1
+; RV32-NEXT:    vslide1up.vx v8, v10, a0
 ; RV32-NEXT:    ret
 ;
 ; RV64V-ONLY-LABEL: buildvec_vredsum:
@@ -3443,16 +3441,14 @@ define <4 x i32> @buildvec_vredsum(<8 x i32> %arg0, <8 x i32> %arg1, <8 x i32> %
 ; RV64V-ONLY-NEXT:    vredsum.vs v8, v8, v16
 ; RV64V-ONLY-NEXT:    vredsum.vs v9, v10, v16
 ; RV64V-ONLY-NEXT:    vredsum.vs v10, v12, v16
-; RV64V-ONLY-NEXT:    vredsum.vs v11, v14, v16
 ; RV64V-ONLY-NEXT:    vmv.x.s a0, v8
 ; RV64V-ONLY-NEXT:    vmv.x.s a1, v9
 ; RV64V-ONLY-NEXT:    vmv.x.s a2, v10
-; RV64V-ONLY-NEXT:    vmv.x.s a3, v11
+; RV64V-ONLY-NEXT:    vredsum.vs v8, v14, v16
 ; RV64V-ONLY-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV64V-ONLY-NEXT:    vmv.v.x v8, a0
-; RV64V-ONLY-NEXT:    vslide1down.vx v8, v8, a1
-; RV64V-ONLY-NEXT:    vslide1down.vx v8, v8, a2
-; RV64V-ONLY-NEXT:    vslide1down.vx v8, v8, a3
+; RV64V-ONLY-NEXT:    vslide1up.vx v9, v8, a2
+; RV64V-ONLY-NEXT:    vslide1up.vx v10, v9, a1
+; RV64V-ONLY-NEXT:    vslide1up.vx v8, v10, a0
 ; RV64V-ONLY-NEXT:    ret
 ;
 ; RVA22U64-LABEL: buildvec_vredsum:
@@ -3502,16 +3498,14 @@ define <4 x i32> @buildvec_vredsum(<8 x i32> %arg0, <8 x i32> %arg1, <8 x i32> %
 ; RV64ZVE32-NEXT:    vredsum.vs v8, v8, v16
 ; RV64ZVE32-NEXT:    vredsum.vs v9, v10, v16
 ; RV64ZVE32-NEXT:    vredsum.vs v10, v12, v16
-; RV64ZVE32-NEXT:    vredsum.vs v11, v14, v16
 ; RV64ZVE32-NEXT:    vmv.x.s a0, v8
 ; RV64ZVE32-NEXT:    vmv.x.s a1, v9
 ; RV64ZVE32-NEXT:    vmv.x.s a2, v10
-; RV64ZVE32-NEXT:    vmv.x.s a3, v11
+; RV64ZVE32-NEXT:    vredsum.vs v8, v14, v16
 ; RV64ZVE32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV64ZVE32-NEXT:    vmv.v.x v8, a0
-; RV64ZVE32-NEXT:    vslide1down.vx v8, v8, a1
-; RV64ZVE32-NEXT:    vslide1down.vx v8, v8, a2
-; RV64ZVE32-NEXT:    vslide1down.vx v8, v8, a3
+; RV64ZVE32-NEXT:    vslide1up.vx v9, v8, a2
+; RV64ZVE32-NEXT:    vslide1up.vx v10, v9, a1
+; RV64ZVE32-NEXT:    vslide1up.vx v8, v10, a0
 ; RV64ZVE32-NEXT:    ret
   %247 = tail call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %arg0)
   %248 = insertelement <4 x i32> poison, i32 %247, i64 0
@@ -3531,16 +3525,14 @@ define <4 x i32> @buildvec_vredmax(<8 x i32> %arg0, <8 x i32> %arg1, <8 x i32> %
 ; RV32-NEXT:    vredmaxu.vs v8, v8, v8
 ; RV32-NEXT:    vredmaxu.vs v9, v10, v10
 ; RV32-NEXT:    vredmaxu.vs v10, v12, v12
-; RV32-NEXT:    vredmaxu.vs v11, v14, v14
 ; RV32-NEXT:    vmv.x.s a0, v8
 ; RV32-NEXT:    vmv.x.s a1, v9
 ; RV32-NEXT:    vmv.x.s a2, v10
-; RV32-NEXT:    vmv.x.s a3, v11
+; RV32-NEXT:    vredmaxu.vs v8, v14, v14
 ; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT:    vmv.v.x v8, a0
-; RV32-NEXT:    vslide1down.vx v8, v8, a1
-; RV32-NEXT:    vslide1down.vx v8, v8, a2
-; RV32-NEXT:    vslide1down.vx v8, v8, a3
+; RV32-NEXT:    vslide1up.vx v9, v8, a2
+; RV32-NEXT:    vslide1up.vx v10, v9, a1
+; RV32-NEXT:    vslide1up.vx v8, v10, a0
 ; RV32-NEXT:    ret
 ;
 ; RV64V-ONLY-LABEL: buildvec_vredmax:
@@ -3549,16 +3541,14 @@ define <4 x i32> @buildvec_vredmax(<8 x i32> %arg0, <8 x i32> %arg1, <8 x i32> %
 ; RV64V-ONLY-NEXT:    vredmaxu.vs v8, v8, v8
 ; RV64V-ONLY-NEXT:    vredmaxu.vs v9, v10, v10
 ; RV64V-ONLY-NEXT:    vredmaxu.vs v10, v12, v12
-; RV64V-ONLY-NEXT:    vredmaxu.vs v11, v14, v14
 ; RV64V-ONLY-NEXT:    vmv.x.s a0, v8
 ; RV64V-ONLY-NEXT:    vmv.x.s a1, v9
 ; RV64V-ONLY-NEXT:    vmv.x.s a2, v10
-; RV64V-ONLY-NEXT:    vmv.x.s a3, v11
+; RV64V-ONLY-NEXT:    vredmaxu.vs v8, v14, v14
 ; RV64V-ONLY-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV64V-ONLY-NEXT:    vmv.v.x v8, a0
-; RV64V-ONLY-NEXT:    vslide1down.vx v8, v8, a1
-; RV64V-ONLY-NEXT:    vslide1down.vx v8, v8, a2
-; RV64V-ONLY-NEXT:    vslide1down.vx v8, v8, a3
+; RV64V-ONLY-NEXT:    vslide1up.vx v9, v8, a2
+; RV64V-ONLY-NEXT:    vslide1up.vx v10, v9, a1
+; RV64V-ONLY-NEXT:    vslide1up.vx v8, v10, a0
 ; RV64V-ONLY-NEXT:    ret
 ;
 ; RVA22U64-LABEL: buildvec_vredmax:
@@ -3605,16 +3595,14 @@ define <4 x i32> @buildvec_vredmax(<8 x i32> %arg0, <8 x i32> %arg1, <8 x i32> %
 ; RV64ZVE32-NEXT:    vredmaxu.vs v8, v8, v8
 ; RV64ZVE32-NEXT:    vredmaxu.vs v9, v10, v10
 ; RV64ZVE32-NEXT:    vredmaxu.vs v10, v12, v12
-; RV64ZVE32-NEXT:    vredmaxu.vs v11, v14, v14
 ; RV64ZVE32-NEXT:    vmv.x.s a0, v8
 ; RV64ZVE32-NEXT:    vmv.x.s a1, v9
 ; RV64ZVE32-NEXT:    vmv.x.s a2, v10
-; RV64ZVE32-NEXT:    vmv.x.s a3, v11
+; RV64ZVE32-NEXT:    vredmaxu.vs v8, v14, v14
 ; RV64ZVE32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV64ZVE32-NEXT:    vmv.v.x v8, a0
-; RV64ZVE32-NEXT:    vslide1down.vx v8, v8, a1
-; RV64ZVE32-NEXT:    vslide1down.vx v8, v8, a2
-; RV64ZVE32-NEXT:    vslide1down.vx v8, v8, a3
+; RV64ZVE32-NEXT:    vslide1up.vx v9, v8, a2
+; RV64ZVE32-NEXT:    vslide1up.vx v10, v9, a1
+; RV64ZVE32-NEXT:    vslide1up.vx v8, v10, a0
 ; RV64ZVE32-NEXT:    ret
   %247 = tail call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> %arg0)
   %248 = insertelement <4 x i32> poison, i32 %247, i64 0
diff --git a/llvm/test/CodeGen/RISCV/rvv/redundant-vfmvsf.ll b/llvm/test/CodeGen/RISCV/rvv/redundant-vfmvsf.ll
index da912bf401ec0..821d4240827fb 100644
--- a/llvm/test/CodeGen/RISCV/rvv/redundant-vfmvsf.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/redundant-vfmvsf.ll
@@ -9,12 +9,11 @@ define <2 x float> @redundant_vfmv(<2 x float> %arg0, <64 x float> %arg1, <64 x
 ; CHECK-NEXT:    vfredusum.vs v9, v12, v8
 ; CHECK-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
 ; CHECK-NEXT:    vslidedown.vi v8, v8, 1
+; CHECK-NEXT:    vfmv.f.s fa5, v9
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vfredusum.vs v8, v16, v8
-; CHECK-NEXT:    vfmv.f.s fa5, v8
+; CHECK-NEXT:    vfredusum.vs v9, v16, v8
 ; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT:    vrgather.vi v8, v9, 0
-; CHECK-NEXT:    vfslide1down.vf v8, v8, fa5
+; CHECK-NEXT:    vfslide1up.vf v8, v9, fa5
 ; CHECK-NEXT:    ret
   %s0 = extractelement <2 x float> %arg0, i64 0
   %r0 = tail call reassoc float @llvm.vector.reduce.fadd.v64f32(float %s0, <64 x float> %arg1)

>From 217402a2664de789b43ad1709e4b9785cd418fd0 Mon Sep 17 00:00:00 2001
From: Min-Yih Hsu <min.hsu at sifive.com>
Date: Wed, 20 Aug 2025 10:52:49 -0700
Subject: [PATCH 3/9] fixup! Generalize this into non-reduction operations as
 well

---
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp   | 38 +++++++++----------
 .../RISCV/rvv/fixed-vectors-fp-buildvec.ll    | 16 ++++----
 2 files changed, 24 insertions(+), 30 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index ce6fc8425856a..d7ce0dcbb4295 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -4524,40 +4524,36 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
 
   // General case: splat the first operand and sliding other operands down one
   // by one to form a vector. Alternatively, if the last operand is an
-  // extraction from a reduction result, we can use the original vector
+  // extraction from element 0 of a vector, we can use the original vector
   // reduction result as the start value and slide up instead of slide down.
   // Such that we can avoid the splat.
   SmallVector<SDValue> Operands(Op->op_begin(), Op->op_end());
-  SDValue Reduce;
+  SDValue EVec;
   bool SlideUp = false;
   // Find the first first non-undef from the tail.
   auto ItLastNonUndef = find_if(Operands.rbegin(), Operands.rend(),
                                 [](SDValue V) { return !V.isUndef(); });
   if (ItLastNonUndef != Operands.rend()) {
     using namespace SDPatternMatch;
-    // Check if the last non-undef operand was extracted from a reduction.
-    for (unsigned Opc :
-         {RISCVISD::VECREDUCE_ADD_VL, RISCVISD::VECREDUCE_UMAX_VL,
-          RISCVISD::VECREDUCE_SMAX_VL, RISCVISD::VECREDUCE_UMIN_VL,
-          RISCVISD::VECREDUCE_SMIN_VL, RISCVISD::VECREDUCE_AND_VL,
-          RISCVISD::VECREDUCE_OR_VL, RISCVISD::VECREDUCE_XOR_VL,
-          RISCVISD::VECREDUCE_FADD_VL, RISCVISD::VECREDUCE_SEQ_FADD_VL,
-          RISCVISD::VECREDUCE_FMAX_VL, RISCVISD::VECREDUCE_FMIN_VL}) {
-      SlideUp = sd_match(
-          *ItLastNonUndef,
-          m_ExtractElt(m_AllOf(m_Opc(Opc), m_Value(Reduce)), m_Zero()));
-      if (SlideUp)
-        break;
-    }
+    // Check if the last non-undef operand was an extraction.
+    SlideUp = sd_match(*ItLastNonUndef, m_ExtractElt(m_Value(EVec), m_Zero()));
   }
 
   if (SlideUp) {
-    // Adapt Reduce's type into ContainerVT.
-    if (Reduce.getValueType().getVectorMinNumElements() <
+    MVT EVecContainerVT = EVec.getSimpleValueType();
+    // Make sure the original vector has scalable vector type.
+    if (EVecContainerVT.isFixedLengthVector()) {
+      EVecContainerVT =
+          getContainerForFixedLengthVector(DAG, EVecContainerVT, Subtarget);
+      EVec = convertToScalableVector(EVecContainerVT, EVec, DAG, Subtarget);
+    }
+
+    // Adapt EVec's type into ContainerVT.
+    if (EVecContainerVT.getVectorMinNumElements() <
         ContainerVT.getVectorMinNumElements())
-      Reduce = DAG.getInsertSubvector(DL, DAG.getUNDEF(ContainerVT), Reduce, 0);
+      EVec = DAG.getInsertSubvector(DL, DAG.getUNDEF(ContainerVT), EVec, 0);
     else
-      Reduce = DAG.getExtractSubvector(DL, ContainerVT, Reduce, 0);
+      EVec = DAG.getExtractSubvector(DL, ContainerVT, EVec, 0);
 
     // Reverse the elements as we're going to slide up from the last element.
     for (unsigned i = 0U, N = Operands.size(), H = divideCeil(N, 2); i < H; ++i)
@@ -4577,7 +4573,7 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
     // prior value of our temporary register.
     if (!Vec) {
       if (SlideUp) {
-        Vec = Reduce;
+        Vec = EVec;
       } else {
         Vec = DAG.getSplatVector(VT, DL, V);
         Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
index 972e565ba8657..face756283c33 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
@@ -1833,15 +1833,13 @@ define <8 x double> @buildvec_slideup(<4 x double> %v, double %e0, double %e1, d
 ; CHECK-LABEL: buildvec_slideup:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m2, ta, ma
-; CHECK-NEXT:    vfmv.f.s ft0, v8
-; CHECK-NEXT:    vfmv.v.f v8, fa0
-; CHECK-NEXT:    vfslide1down.vf v8, v8, fa1
-; CHECK-NEXT:    vfslide1down.vf v8, v8, fa2
-; CHECK-NEXT:    vfslide1down.vf v8, v8, fa3
-; CHECK-NEXT:    vfslide1down.vf v8, v8, fa4
-; CHECK-NEXT:    vfslide1down.vf v8, v8, fa5
-; CHECK-NEXT:    vfslide1down.vf v8, v8, fa6
-; CHECK-NEXT:    vfslide1down.vf v8, v8, ft0
+; CHECK-NEXT:    vfslide1up.vf v10, v8, fa6
+; CHECK-NEXT:    vfslide1up.vf v8, v10, fa5
+; CHECK-NEXT:    vfslide1up.vf v10, v8, fa4
+; CHECK-NEXT:    vfslide1up.vf v8, v10, fa3
+; CHECK-NEXT:    vfslide1up.vf v10, v8, fa2
+; CHECK-NEXT:    vfslide1up.vf v12, v10, fa1
+; CHECK-NEXT:    vfslide1up.vf v8, v12, fa0
 ; CHECK-NEXT:    ret
   %v0 = insertelement <8 x double> poison, double %e0, i64 0
   %v1 = insertelement <8 x double> %v0, double %e1, i64 1

>From 3dec8ff38e69fa1400f30f54b58cc2aa92aade6f Mon Sep 17 00:00:00 2001
From: Min-Yih Hsu <min.hsu at sifive.com>
Date: Wed, 20 Aug 2025 13:41:14 -0700
Subject: [PATCH 4/9] fixup! Use std::reverse

---
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index d7ce0dcbb4295..f358c3e77e1db 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -4556,8 +4556,7 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
       EVec = DAG.getExtractSubvector(DL, ContainerVT, EVec, 0);
 
     // Reverse the elements as we're going to slide up from the last element.
-    for (unsigned i = 0U, N = Operands.size(), H = divideCeil(N, 2); i < H; ++i)
-      std::swap(Operands[i], Operands[N - 1 - i]);
+    std::reverse(Operands.begin(), Operands.end());
   }
 
   SDValue Vec;

>From 1d8b13ead5ed41753cbf06c1aedd6e02eca63c68 Mon Sep 17 00:00:00 2001
From: Min-Yih Hsu <min at myhsu.dev>
Date: Wed, 20 Aug 2025 14:37:41 -0700
Subject: [PATCH 5/9] Update llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Co-authored-by: Craig Topper <craig.topper at sifive.com>
---
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index f358c3e77e1db..76ed91f26599b 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -4522,7 +4522,7 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
                          Mask, VL, Policy);
   };
 
-  // General case: splat the first operand and sliding other operands down one
+  // General case: splat the first operand and slide other operands down one
   // by one to form a vector. Alternatively, if the last operand is an
   // extraction from element 0 of a vector, we can use the original vector
   // reduction result as the start value and slide up instead of slide down.

>From ac83561053cc1056f20294cf544c8477874a8f5a Mon Sep 17 00:00:00 2001
From: Min-Yih Hsu <min.hsu at sifive.com>
Date: Thu, 21 Aug 2025 11:05:24 -0700
Subject: [PATCH 6/9] fixup! Address review comments

Co-Authored-By: Luke Lau <luke at igalia.com>
---
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp   |  7 +++---
 .../RISCV/rvv/fixed-vectors-fp-buildvec.ll    | 22 +++++++++++++++++++
 2 files changed, 25 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 76ed91f26599b..4e697220cacee 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -4524,9 +4524,8 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
 
   // General case: splat the first operand and slide other operands down one
   // by one to form a vector. Alternatively, if the last operand is an
-  // extraction from element 0 of a vector, we can use the original vector
-  // reduction result as the start value and slide up instead of slide down.
-  // Such that we can avoid the splat.
+  // extraction from element 0 of a vector, we can use that vector as the start
+  // value and slide up instead of slide down. Such that we can avoid the splat.
   SmallVector<SDValue> Operands(Op->op_begin(), Op->op_end());
   SDValue EVec;
   bool SlideUp = false;
@@ -4567,7 +4566,7 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
       continue;
     }
 
-    // Start our sequence with either a TA splat or a reduction result in the
+    // Start our sequence with either a TA splat or extract source in the
     // hopes that hardware is able to recognize there's no dependency on the
     // prior value of our temporary register.
     if (!Vec) {
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
index face756283c33..b98a8521964e7 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
@@ -1853,6 +1853,28 @@ define <8 x double> @buildvec_slideup(<4 x double> %v, double %e0, double %e1, d
   ret <8 x double> %v7
 }
 
+define <8 x double> @buildvec_slideup_trailing_undef(<4 x double> %v, double %e0, double %e1, double %e2, double %e3, double %e4) vscale_range(4, 128) {
+; CHECK-LABEL: buildvec_slideup_trailing_undef:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e64, m2, ta, ma
+; CHECK-NEXT:    vfslide1up.vf v10, v8, fa4
+; CHECK-NEXT:    vfslide1up.vf v8, v10, fa3
+; CHECK-NEXT:    vfslide1up.vf v10, v8, fa2
+; CHECK-NEXT:    vfslide1up.vf v12, v10, fa1
+; CHECK-NEXT:    vfslide1up.vf v8, v12, fa0
+; CHECK-NEXT:    ret
+  %v0 = insertelement <8 x double> poison, double %e0, i64 0
+  %v1 = insertelement <8 x double> %v0, double %e1, i64 1
+  %v2 = insertelement <8 x double> %v1, double %e2, i64 2
+  %v3 = insertelement <8 x double> %v2, double %e3, i64 3
+  %v4 = insertelement <8 x double> %v3, double %e4, i64 4
+  %e5 = extractelement <4 x double> %v, i64 0
+  %v5 = insertelement <8 x double> %v4, double %e5, i64 5
+  %v6 = insertelement <8 x double> %v5, double poison, i64 6
+  %v7 = insertelement <8 x double> %v6, double poison, i64 7
+  ret <8 x double> %v7
+}
+
 ; Negative test for slideup lowering where the extract_element was not build_vector's last operand.
 define <8 x double> @buildvec_slideup_not_last_element(<4 x double> %v, double %e0, double %e1, double %e2, double %e3, double %e4, double %e5, double %e7) vscale_range(4, 128) {
 ; CHECK-LABEL: buildvec_slideup_not_last_element:

>From ed3f4566bc3bad7be9ea686413a5f762e4af27b3 Mon Sep 17 00:00:00 2001
From: Min-Yih Hsu <min.hsu at sifive.com>
Date: Fri, 22 Aug 2025 14:20:02 -0700
Subject: [PATCH 7/9] fixup! Address review comments and add more tests

---
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp   | 34 +++++++--------
 .../RISCV/rvv/fixed-vectors-fp-buildvec.ll    | 42 +++++++++++++++++++
 2 files changed, 59 insertions(+), 17 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 4e697220cacee..86958201f4921 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -4512,15 +4512,6 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
          "Illegal type which will result in reserved encoding");
 
   const unsigned Policy = RISCVVType::TAIL_AGNOSTIC | RISCVVType::MASK_AGNOSTIC;
-  auto getVSlide = [&](bool SlideUp, EVT ContainerVT, SDValue Passthru,
-                       SDValue Vec, SDValue Offset, SDValue Mask,
-                       SDValue VL) -> SDValue {
-    if (SlideUp)
-      return getVSlideup(DAG, Subtarget, DL, ContainerVT, Passthru, Vec, Offset,
-                         Mask, VL, Policy);
-    return getVSlidedown(DAG, Subtarget, DL, ContainerVT, Passthru, Vec, Offset,
-                         Mask, VL, Policy);
-  };
 
   // General case: splat the first operand and slide other operands down one
   // by one to form a vector. Alternatively, if the last operand is an
@@ -4529,6 +4520,15 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
   SmallVector<SDValue> Operands(Op->op_begin(), Op->op_end());
   SDValue EVec;
   bool SlideUp = false;
+  auto getVSlide = [&](EVT ContainerVT, SDValue Passthru, SDValue Vec,
+                       SDValue Offset, SDValue Mask, SDValue VL) -> SDValue {
+    if (SlideUp)
+      return getVSlideup(DAG, Subtarget, DL, ContainerVT, Passthru, Vec, Offset,
+                         Mask, VL, Policy);
+    return getVSlidedown(DAG, Subtarget, DL, ContainerVT, Passthru, Vec, Offset,
+                         Mask, VL, Policy);
+  };
+
   // Find the first first non-undef from the tail.
   auto ItLastNonUndef = find_if(Operands.rbegin(), Operands.rend(),
                                 [](SDValue V) { return !V.isUndef(); });
@@ -4583,26 +4583,26 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
 
     if (UndefCount) {
       const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
-      Vec = getVSlide(SlideUp, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
-                      Offset, Mask, VL);
+      Vec = getVSlide(ContainerVT, DAG.getUNDEF(ContainerVT), Vec, Offset, Mask,
+                      VL);
       UndefCount = 0;
     }
 
-    unsigned OpCode;
+    unsigned Opcode;
     if (VT.isFloatingPoint())
-      OpCode = SlideUp ? RISCVISD::VFSLIDE1UP_VL : RISCVISD::VFSLIDE1DOWN_VL;
+      Opcode = SlideUp ? RISCVISD::VFSLIDE1UP_VL : RISCVISD::VFSLIDE1DOWN_VL;
     else
-      OpCode = SlideUp ? RISCVISD::VSLIDE1UP_VL : RISCVISD::VSLIDE1DOWN_VL;
+      Opcode = SlideUp ? RISCVISD::VSLIDE1UP_VL : RISCVISD::VSLIDE1DOWN_VL;
 
     if (!VT.isFloatingPoint())
       V = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), V);
-    Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
+    Vec = DAG.getNode(Opcode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
                       V, Mask, VL);
   }
   if (UndefCount) {
     const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
-    Vec = getVSlide(SlideUp, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
-                    Offset, Mask, VL);
+    Vec = getVSlide(ContainerVT, DAG.getUNDEF(ContainerVT), Vec, Offset, Mask,
+                    VL);
   }
   return convertFromScalableVector(VT, Vec, DAG, Subtarget);
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
index b98a8521964e7..cd67fc31119f7 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
@@ -1853,6 +1853,48 @@ define <8 x double> @buildvec_slideup(<4 x double> %v, double %e0, double %e1, d
   ret <8 x double> %v7
 }
 
+define <8 x double> @buildvec_slideup_leading_undef(<4 x double> %v, double %e0, double %e1, double %e2, double %e3, double %e4) vscale_range(4, 128) {
+; CHECK-LABEL: buildvec_slideup_leading_undef:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e64, m2, ta, ma
+; CHECK-NEXT:    vfslide1up.vf v10, v8, fa4
+; CHECK-NEXT:    vfslide1up.vf v8, v10, fa3
+; CHECK-NEXT:    vfslide1up.vf v10, v8, fa2
+; CHECK-NEXT:    vfslide1up.vf v8, v10, fa1
+; CHECK-NEXT:    vfslide1up.vf v10, v8, fa0
+; CHECK-NEXT:    vslideup.vi v8, v10, 2
+; CHECK-NEXT:    ret
+  %v2 = insertelement <8 x double> poison, double %e0, i64 2
+  %v3 = insertelement <8 x double> %v2, double %e1, i64 3
+  %v4 = insertelement <8 x double> %v3, double %e2, i64 4
+  %v5 = insertelement <8 x double> %v4, double %e3, i64 5
+  %v6 = insertelement <8 x double> %v5, double %e4, i64 6
+  %e5 = extractelement <4 x double> %v, i64 0
+  %v7 = insertelement <8 x double> %v6, double %e5, i64 7
+  ret <8 x double> %v7
+}
+
+define <8 x double> @buildvec_slideup_mid_undef(<4 x double> %v, double %e0, double %e1, double %e2, double %e3, double %e4) vscale_range(4, 128) {
+; CHECK-LABEL: buildvec_slideup_mid_undef:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e64, m2, ta, ma
+; CHECK-NEXT:    vfslide1up.vf v10, v8, fa4
+; CHECK-NEXT:    vfslide1up.vf v8, v10, fa3
+; CHECK-NEXT:    vfslide1up.vf v10, v8, fa2
+; CHECK-NEXT:    vslideup.vi v8, v10, 2
+; CHECK-NEXT:    vfslide1up.vf v10, v8, fa1
+; CHECK-NEXT:    vfslide1up.vf v8, v10, fa0
+; CHECK-NEXT:    ret
+  %v0 = insertelement <8 x double> poison, double %e0, i64 0
+  %v1 = insertelement <8 x double> %v0, double %e1, i64 1
+  %v4 = insertelement <8 x double> %v1, double %e2, i64 4
+  %v5 = insertelement <8 x double> %v4, double %e3, i64 5
+  %v6 = insertelement <8 x double> %v5, double %e4, i64 6
+  %e5 = extractelement <4 x double> %v, i64 0
+  %v7 = insertelement <8 x double> %v6, double %e5, i64 7
+  ret <8 x double> %v7
+}
+
 define <8 x double> @buildvec_slideup_trailing_undef(<4 x double> %v, double %e0, double %e1, double %e2, double %e3, double %e4) vscale_range(4, 128) {
 ; CHECK-LABEL: buildvec_slideup_trailing_undef:
 ; CHECK:       # %bb.0:

>From 41f1a97069a224e719e819a3ea9ac8857334ffc1 Mon Sep 17 00:00:00 2001
From: Min-Yih Hsu <min.hsu at sifive.com>
Date: Thu, 28 Aug 2025 11:14:55 -0700
Subject: [PATCH 8/9] fixup! Limit the condition to build_vector with *all*
 operands being extraction from first element

---
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp   |  26 +-
 .../RISCV/rvv/fixed-vectors-fp-buildvec.ll    | 233 +++++++++---------
 .../RISCV/rvv/fixed-vectors-int-buildvec.ll   |  24 +-
 3 files changed, 143 insertions(+), 140 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 86958201f4921..562fbae776051 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -4514,9 +4514,15 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
   const unsigned Policy = RISCVVType::TAIL_AGNOSTIC | RISCVVType::MASK_AGNOSTIC;
 
   // General case: splat the first operand and slide other operands down one
-  // by one to form a vector. Alternatively, if the last operand is an
-  // extraction from element 0 of a vector, we can use that vector as the start
-  // value and slide up instead of slide down. Such that we can avoid the splat.
+  // by one to form a vector. Alternatively, if every operand is an
+  // extraction from element 0 of a vector, we use that vector from the last
+  // extraction as the start value and slide up instead of slide down. Such that
+  // (1) we can avoid the initial splat (2) we can turn those vslide1up into
+  // vslideup of 1 later and eliminate the vector to scalar movement, which is
+  // something we cannot do with vslide1down/vslidedown.
+  // Of course, using vslide1up/vslideup might increase the register pressure,
+  // and that's why we conservatively limit to cases where every operands is an
+  // extraction from first element.
   SmallVector<SDValue> Operands(Op->op_begin(), Op->op_end());
   SDValue EVec;
   bool SlideUp = false;
@@ -4529,13 +4535,15 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
                          Mask, VL, Policy);
   };
 
-  // Find the first first non-undef from the tail.
-  auto ItLastNonUndef = find_if(Operands.rbegin(), Operands.rend(),
-                                [](SDValue V) { return !V.isUndef(); });
-  if (ItLastNonUndef != Operands.rend()) {
+  // The reason we don't use all_of here is because we're also capturing EVec
+  // from the last non-undef operand. If the std::execution_policy of the
+  // underlying std::all_of is anything but std::sequenced_policy we might
+  // capture the wrong EVec.
+  for (SDValue V : Operands) {
     using namespace SDPatternMatch;
-    // Check if the last non-undef operand was an extraction.
-    SlideUp = sd_match(*ItLastNonUndef, m_ExtractElt(m_Value(EVec), m_Zero()));
+    SlideUp = V.isUndef() || sd_match(V, m_ExtractElt(m_Value(EVec), m_Zero()));
+    if (!SlideUp)
+      break;
   }
 
   if (SlideUp) {
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
index cd67fc31119f7..aa3b9abe3a7aa 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
@@ -1829,123 +1829,8 @@ define <8 x double> @buildvec_v8f64_zvl512(double %e0, double %e1, double %e2, d
   ret <8 x double> %v7
 }
 
-define <8 x double> @buildvec_slideup(<4 x double> %v, double %e0, double %e1, double %e2, double %e3, double %e4, double %e5, double %e6) vscale_range(4, 128) {
-; CHECK-LABEL: buildvec_slideup:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 8, e64, m2, ta, ma
-; CHECK-NEXT:    vfslide1up.vf v10, v8, fa6
-; CHECK-NEXT:    vfslide1up.vf v8, v10, fa5
-; CHECK-NEXT:    vfslide1up.vf v10, v8, fa4
-; CHECK-NEXT:    vfslide1up.vf v8, v10, fa3
-; CHECK-NEXT:    vfslide1up.vf v10, v8, fa2
-; CHECK-NEXT:    vfslide1up.vf v12, v10, fa1
-; CHECK-NEXT:    vfslide1up.vf v8, v12, fa0
-; CHECK-NEXT:    ret
-  %v0 = insertelement <8 x double> poison, double %e0, i64 0
-  %v1 = insertelement <8 x double> %v0, double %e1, i64 1
-  %v2 = insertelement <8 x double> %v1, double %e2, i64 2
-  %v3 = insertelement <8 x double> %v2, double %e3, i64 3
-  %v4 = insertelement <8 x double> %v3, double %e4, i64 4
-  %v5 = insertelement <8 x double> %v4, double %e5, i64 5
-  %v6 = insertelement <8 x double> %v5, double %e6, i64 6
-  %e7 = extractelement <4 x double> %v, i64 0
-  %v7 = insertelement <8 x double> %v6, double %e7, i64 7
-  ret <8 x double> %v7
-}
-
-define <8 x double> @buildvec_slideup_leading_undef(<4 x double> %v, double %e0, double %e1, double %e2, double %e3, double %e4) vscale_range(4, 128) {
-; CHECK-LABEL: buildvec_slideup_leading_undef:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 8, e64, m2, ta, ma
-; CHECK-NEXT:    vfslide1up.vf v10, v8, fa4
-; CHECK-NEXT:    vfslide1up.vf v8, v10, fa3
-; CHECK-NEXT:    vfslide1up.vf v10, v8, fa2
-; CHECK-NEXT:    vfslide1up.vf v8, v10, fa1
-; CHECK-NEXT:    vfslide1up.vf v10, v8, fa0
-; CHECK-NEXT:    vslideup.vi v8, v10, 2
-; CHECK-NEXT:    ret
-  %v2 = insertelement <8 x double> poison, double %e0, i64 2
-  %v3 = insertelement <8 x double> %v2, double %e1, i64 3
-  %v4 = insertelement <8 x double> %v3, double %e2, i64 4
-  %v5 = insertelement <8 x double> %v4, double %e3, i64 5
-  %v6 = insertelement <8 x double> %v5, double %e4, i64 6
-  %e5 = extractelement <4 x double> %v, i64 0
-  %v7 = insertelement <8 x double> %v6, double %e5, i64 7
-  ret <8 x double> %v7
-}
-
-define <8 x double> @buildvec_slideup_mid_undef(<4 x double> %v, double %e0, double %e1, double %e2, double %e3, double %e4) vscale_range(4, 128) {
-; CHECK-LABEL: buildvec_slideup_mid_undef:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 8, e64, m2, ta, ma
-; CHECK-NEXT:    vfslide1up.vf v10, v8, fa4
-; CHECK-NEXT:    vfslide1up.vf v8, v10, fa3
-; CHECK-NEXT:    vfslide1up.vf v10, v8, fa2
-; CHECK-NEXT:    vslideup.vi v8, v10, 2
-; CHECK-NEXT:    vfslide1up.vf v10, v8, fa1
-; CHECK-NEXT:    vfslide1up.vf v8, v10, fa0
-; CHECK-NEXT:    ret
-  %v0 = insertelement <8 x double> poison, double %e0, i64 0
-  %v1 = insertelement <8 x double> %v0, double %e1, i64 1
-  %v4 = insertelement <8 x double> %v1, double %e2, i64 4
-  %v5 = insertelement <8 x double> %v4, double %e3, i64 5
-  %v6 = insertelement <8 x double> %v5, double %e4, i64 6
-  %e5 = extractelement <4 x double> %v, i64 0
-  %v7 = insertelement <8 x double> %v6, double %e5, i64 7
-  ret <8 x double> %v7
-}
-
-define <8 x double> @buildvec_slideup_trailing_undef(<4 x double> %v, double %e0, double %e1, double %e2, double %e3, double %e4) vscale_range(4, 128) {
-; CHECK-LABEL: buildvec_slideup_trailing_undef:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 8, e64, m2, ta, ma
-; CHECK-NEXT:    vfslide1up.vf v10, v8, fa4
-; CHECK-NEXT:    vfslide1up.vf v8, v10, fa3
-; CHECK-NEXT:    vfslide1up.vf v10, v8, fa2
-; CHECK-NEXT:    vfslide1up.vf v12, v10, fa1
-; CHECK-NEXT:    vfslide1up.vf v8, v12, fa0
-; CHECK-NEXT:    ret
-  %v0 = insertelement <8 x double> poison, double %e0, i64 0
-  %v1 = insertelement <8 x double> %v0, double %e1, i64 1
-  %v2 = insertelement <8 x double> %v1, double %e2, i64 2
-  %v3 = insertelement <8 x double> %v2, double %e3, i64 3
-  %v4 = insertelement <8 x double> %v3, double %e4, i64 4
-  %e5 = extractelement <4 x double> %v, i64 0
-  %v5 = insertelement <8 x double> %v4, double %e5, i64 5
-  %v6 = insertelement <8 x double> %v5, double poison, i64 6
-  %v7 = insertelement <8 x double> %v6, double poison, i64 7
-  ret <8 x double> %v7
-}
-
-; Negative test for slideup lowering where the extract_element was not build_vector's last operand.
-define <8 x double> @buildvec_slideup_not_last_element(<4 x double> %v, double %e0, double %e1, double %e2, double %e3, double %e4, double %e5, double %e7) vscale_range(4, 128) {
-; CHECK-LABEL: buildvec_slideup_not_last_element:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 8, e64, m2, ta, ma
-; CHECK-NEXT:    vfmv.f.s ft0, v8
-; CHECK-NEXT:    vfmv.v.f v8, fa0
-; CHECK-NEXT:    vfslide1down.vf v8, v8, fa1
-; CHECK-NEXT:    vfslide1down.vf v8, v8, fa2
-; CHECK-NEXT:    vfslide1down.vf v8, v8, fa3
-; CHECK-NEXT:    vfslide1down.vf v8, v8, fa4
-; CHECK-NEXT:    vfslide1down.vf v8, v8, fa5
-; CHECK-NEXT:    vfslide1down.vf v8, v8, ft0
-; CHECK-NEXT:    vfslide1down.vf v8, v8, fa6
-; CHECK-NEXT:    ret
-  %v0 = insertelement <8 x double> poison, double %e0, i64 0
-  %v1 = insertelement <8 x double> %v0, double %e1, i64 1
-  %v2 = insertelement <8 x double> %v1, double %e2, i64 2
-  %v3 = insertelement <8 x double> %v2, double %e3, i64 3
-  %v4 = insertelement <8 x double> %v3, double %e4, i64 4
-  %v5 = insertelement <8 x double> %v4, double %e5, i64 5
-  %e6 = extractelement <4 x double> %v, i64 0
-  %v6 = insertelement <8 x double> %v5, double %e6, i64 6
-  %v7 = insertelement <8 x double> %v6, double %e7, i64 7
-  ret <8 x double> %v7
-}
-
-define <4 x float> @buildvec_vfredusum(float %start, <8 x float> %arg1, <8 x float> %arg2, <8 x float> %arg3, <8 x float> %arg4) nounwind {
-; CHECK-LABEL: buildvec_vfredusum:
+define <4 x float> @buildvec_vfredusum_slideup(float %start, <8 x float> %arg1, <8 x float> %arg2, <8 x float> %arg3, <8 x float> %arg4) nounwind {
+; CHECK-LABEL: buildvec_vfredusum_slideup:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; CHECK-NEXT:    vfmv.s.f v16, fa0
@@ -1972,8 +1857,118 @@ define <4 x float> @buildvec_vfredusum(float %start, <8 x float> %arg1, <8 x flo
   ret <4 x float> %255
 }
 
-define <4 x float> @buildvec_vfredosum(float %start, <8 x float> %arg1, <8 x float> %arg2, <8 x float> %arg3, <8 x float> %arg4) nounwind {
-; CHECK-LABEL: buildvec_vfredosum:
+define <8 x float> @buildvec_vfredusum_slideup_leading_undef(float %start, <8 x float> %arg1, <8 x float> %arg2, <8 x float> %arg3, <8 x float> %arg4) nounwind {
+; CHECK-LABEL: buildvec_vfredusum_slideup_leading_undef:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vfmv.s.f v16, fa0
+; CHECK-NEXT:    vfredusum.vs v9, v8, v16
+; CHECK-NEXT:    vfredusum.vs v10, v10, v16
+; CHECK-NEXT:    vfredusum.vs v11, v12, v16
+; CHECK-NEXT:    vfredusum.vs v8, v14, v16
+; CHECK-NEXT:    vfmv.f.s fa5, v9
+; CHECK-NEXT:    vfmv.f.s fa4, v10
+; CHECK-NEXT:    vfmv.f.s fa3, v11
+; CHECK-NEXT:    vfslide1up.vf v10, v8, fa3
+; CHECK-NEXT:    vfslide1up.vf v8, v10, fa4
+; CHECK-NEXT:    vfslide1up.vf v10, v8, fa5
+; CHECK-NEXT:    vslideup.vi v8, v10, 4
+; CHECK-NEXT:    ret
+  %252 = tail call reassoc float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %arg1)
+  %253 = insertelement <8 x float> poison, float %252, i64 4
+  %254 = tail call reassoc float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %arg2)
+  %255 = insertelement <8 x float> %253, float %254, i64 5
+  %256 = tail call reassoc float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %arg3)
+  %257 = insertelement <8 x float> %255, float %256, i64 6
+  %258 = tail call reassoc float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %arg4)
+  %259 = insertelement <8 x float> %257, float %258, i64 7
+  ret <8 x float> %259
+}
+
+define <8 x float> @buildvec_vfredusum_slideup_trailing_undef(float %start, <8 x float> %arg1, <8 x float> %arg2, <8 x float> %arg3, <8 x float> %arg4) nounwind {
+; CHECK-LABEL: buildvec_vfredusum_slideup_trailing_undef:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vfmv.s.f v16, fa0
+; CHECK-NEXT:    vfredusum.vs v9, v8, v16
+; CHECK-NEXT:    vfredusum.vs v10, v10, v16
+; CHECK-NEXT:    vfredusum.vs v11, v12, v16
+; CHECK-NEXT:    vfredusum.vs v8, v14, v16
+; CHECK-NEXT:    vfmv.f.s fa5, v9
+; CHECK-NEXT:    vfmv.f.s fa4, v10
+; CHECK-NEXT:    vfmv.f.s fa3, v11
+; CHECK-NEXT:    vfslide1up.vf v10, v8, fa3
+; CHECK-NEXT:    vfslide1up.vf v12, v10, fa4
+; CHECK-NEXT:    vfslide1up.vf v8, v12, fa5
+; CHECK-NEXT:    ret
+  %252 = tail call reassoc float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %arg1)
+  %253 = insertelement <8 x float> poison, float %252, i64 0
+  %254 = tail call reassoc float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %arg2)
+  %255 = insertelement <8 x float> %253, float %254, i64 1
+  %256 = tail call reassoc float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %arg3)
+  %257 = insertelement <8 x float> %255, float %256, i64 2
+  %258 = tail call reassoc float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %arg4)
+  %259 = insertelement <8 x float> %257, float %258, i64 3
+  ret <8 x float> %259
+}
+
+; Negative test case checking if we generate slideup only when all build_vec operands are extraction from the first vector element.
+define <8 x float> @buildvec_vfredusum_slideup_not_extract_first(float %start, <8 x float> %arg1, <8 x float> %arg2, <8 x float> %arg3, <8 x float> %arg4) nounwind {
+; CHECK-LABEL: buildvec_vfredusum_slideup_not_extract_first:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vfmv.s.f v10, fa0
+; CHECK-NEXT:    vfredusum.vs v8, v8, v10
+; CHECK-NEXT:    vfredusum.vs v9, v12, v10
+; CHECK-NEXT:    vfredusum.vs v10, v14, v10
+; CHECK-NEXT:    vfmv.f.s fa5, v9
+; CHECK-NEXT:    vfmv.f.s fa4, v10
+; CHECK-NEXT:    vrgather.vi v10, v8, 0
+; CHECK-NEXT:    vfslide1down.vf v8, v10, fa0
+; CHECK-NEXT:    vfslide1down.vf v8, v8, fa5
+; CHECK-NEXT:    vfslide1down.vf v8, v8, fa4
+; CHECK-NEXT:    vslidedown.vi v8, v8, 4
+; CHECK-NEXT:    ret
+  %252 = tail call reassoc float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %arg1)
+  %253 = insertelement <8 x float> poison, float %252, i64 0
+  %255 = insertelement <8 x float> %253, float %start, i64 1
+  %256 = tail call reassoc float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %arg3)
+  %257 = insertelement <8 x float> %255, float %256, i64 2
+  %258 = tail call reassoc float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %arg4)
+  %259 = insertelement <8 x float> %257, float %258, i64 3
+  ret <8 x float> %259
+}
+
+define <8 x float> @buildvec_vfredusum_slideup_mid_undef(float %start, <8 x float> %arg1, <8 x float> %arg2, <8 x float> %arg3, <8 x float> %arg4) nounwind {
+; CHECK-LABEL: buildvec_vfredusum_slideup_mid_undef:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vfmv.s.f v16, fa0
+; CHECK-NEXT:    vfredusum.vs v9, v8, v16
+; CHECK-NEXT:    vfredusum.vs v10, v10, v16
+; CHECK-NEXT:    vfredusum.vs v11, v12, v16
+; CHECK-NEXT:    vfredusum.vs v8, v14, v16
+; CHECK-NEXT:    vfmv.f.s fa5, v9
+; CHECK-NEXT:    vfmv.f.s fa4, v10
+; CHECK-NEXT:    vfmv.f.s fa3, v11
+; CHECK-NEXT:    vfslide1up.vf v10, v8, fa3
+; CHECK-NEXT:    vslideup.vi v8, v10, 4
+; CHECK-NEXT:    vfslide1up.vf v10, v8, fa4
+; CHECK-NEXT:    vfslide1up.vf v8, v10, fa5
+; CHECK-NEXT:    ret
+  %252 = tail call reassoc float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %arg1)
+  %253 = insertelement <8 x float> poison, float %252, i64 0
+  %254 = tail call reassoc float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %arg2)
+  %255 = insertelement <8 x float> %253, float %254, i64 1
+  %256 = tail call reassoc float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %arg3)
+  %257 = insertelement <8 x float> %255, float %256, i64 6
+  %258 = tail call reassoc float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %arg4)
+  %259 = insertelement <8 x float> %257, float %258, i64 7
+  ret <8 x float> %259
+}
+
+define <4 x float> @buildvec_vfredosum_slideup(float %start, <8 x float> %arg1, <8 x float> %arg2, <8 x float> %arg3, <8 x float> %arg4) nounwind {
+; CHECK-LABEL: buildvec_vfredosum_slideup:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; CHECK-NEXT:    vfmv.s.f v16, fa0
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
index a02117fdd2833..eedf19c38766b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
@@ -3416,8 +3416,8 @@ define <4 x i1> @buildvec_i1_splat(i1 %e1) {
   ret <4 x i1> %v4
 }
 
-define <4 x i32> @buildvec_vredsum(<8 x i32> %arg0, <8 x i32> %arg1, <8 x i32> %arg2, <8 x i32> %arg3) nounwind {
-; RV32-LABEL: buildvec_vredsum:
+define <4 x i32> @buildvec_vredsum_slideup(<8 x i32> %arg0, <8 x i32> %arg1, <8 x i32> %arg2, <8 x i32> %arg3) nounwind {
+; RV32-LABEL: buildvec_vredsum_slideup:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; RV32-NEXT:    vmv.s.x v16, zero
@@ -3434,7 +3434,7 @@ define <4 x i32> @buildvec_vredsum(<8 x i32> %arg0, <8 x i32> %arg1, <8 x i32> %
 ; RV32-NEXT:    vslide1up.vx v8, v10, a0
 ; RV32-NEXT:    ret
 ;
-; RV64V-ONLY-LABEL: buildvec_vredsum:
+; RV64V-ONLY-LABEL: buildvec_vredsum_slideup:
 ; RV64V-ONLY:       # %bb.0:
 ; RV64V-ONLY-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; RV64V-ONLY-NEXT:    vmv.s.x v16, zero
@@ -3451,7 +3451,7 @@ define <4 x i32> @buildvec_vredsum(<8 x i32> %arg0, <8 x i32> %arg1, <8 x i32> %
 ; RV64V-ONLY-NEXT:    vslide1up.vx v8, v10, a0
 ; RV64V-ONLY-NEXT:    ret
 ;
-; RVA22U64-LABEL: buildvec_vredsum:
+; RVA22U64-LABEL: buildvec_vredsum_slideup:
 ; RVA22U64:       # %bb.0:
 ; RVA22U64-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; RVA22U64-NEXT:    vmv.s.x v16, zero
@@ -3472,7 +3472,7 @@ define <4 x i32> @buildvec_vredsum(<8 x i32> %arg0, <8 x i32> %arg1, <8 x i32> %
 ; RVA22U64-NEXT:    vslide1down.vx v8, v8, a1
 ; RVA22U64-NEXT:    ret
 ;
-; RVA22U64-PACK-LABEL: buildvec_vredsum:
+; RVA22U64-PACK-LABEL: buildvec_vredsum_slideup:
 ; RVA22U64-PACK:       # %bb.0:
 ; RVA22U64-PACK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; RVA22U64-PACK-NEXT:    vmv.s.x v16, zero
@@ -3491,7 +3491,7 @@ define <4 x i32> @buildvec_vredsum(<8 x i32> %arg0, <8 x i32> %arg1, <8 x i32> %
 ; RVA22U64-PACK-NEXT:    vslide1down.vx v8, v8, a1
 ; RVA22U64-PACK-NEXT:    ret
 ;
-; RV64ZVE32-LABEL: buildvec_vredsum:
+; RV64ZVE32-LABEL: buildvec_vredsum_slideup:
 ; RV64ZVE32:       # %bb.0:
 ; RV64ZVE32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; RV64ZVE32-NEXT:    vmv.s.x v16, zero
@@ -3518,8 +3518,8 @@ define <4 x i32> @buildvec_vredsum(<8 x i32> %arg0, <8 x i32> %arg1, <8 x i32> %
   ret <4 x i32> %255
 }
 
-define <4 x i32> @buildvec_vredmax(<8 x i32> %arg0, <8 x i32> %arg1, <8 x i32> %arg2, <8 x i32> %arg3) nounwind {
-; RV32-LABEL: buildvec_vredmax:
+define <4 x i32> @buildvec_vredmax_slideup(<8 x i32> %arg0, <8 x i32> %arg1, <8 x i32> %arg2, <8 x i32> %arg3) nounwind {
+; RV32-LABEL: buildvec_vredmax_slideup:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; RV32-NEXT:    vredmaxu.vs v8, v8, v8
@@ -3535,7 +3535,7 @@ define <4 x i32> @buildvec_vredmax(<8 x i32> %arg0, <8 x i32> %arg1, <8 x i32> %
 ; RV32-NEXT:    vslide1up.vx v8, v10, a0
 ; RV32-NEXT:    ret
 ;
-; RV64V-ONLY-LABEL: buildvec_vredmax:
+; RV64V-ONLY-LABEL: buildvec_vredmax_slideup:
 ; RV64V-ONLY:       # %bb.0:
 ; RV64V-ONLY-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; RV64V-ONLY-NEXT:    vredmaxu.vs v8, v8, v8
@@ -3551,7 +3551,7 @@ define <4 x i32> @buildvec_vredmax(<8 x i32> %arg0, <8 x i32> %arg1, <8 x i32> %
 ; RV64V-ONLY-NEXT:    vslide1up.vx v8, v10, a0
 ; RV64V-ONLY-NEXT:    ret
 ;
-; RVA22U64-LABEL: buildvec_vredmax:
+; RVA22U64-LABEL: buildvec_vredmax_slideup:
 ; RVA22U64:       # %bb.0:
 ; RVA22U64-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; RVA22U64-NEXT:    vredmaxu.vs v8, v8, v8
@@ -3571,7 +3571,7 @@ define <4 x i32> @buildvec_vredmax(<8 x i32> %arg0, <8 x i32> %arg1, <8 x i32> %
 ; RVA22U64-NEXT:    vslide1down.vx v8, v8, a1
 ; RVA22U64-NEXT:    ret
 ;
-; RVA22U64-PACK-LABEL: buildvec_vredmax:
+; RVA22U64-PACK-LABEL: buildvec_vredmax_slideup:
 ; RVA22U64-PACK:       # %bb.0:
 ; RVA22U64-PACK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; RVA22U64-PACK-NEXT:    vredmaxu.vs v8, v8, v8
@@ -3589,7 +3589,7 @@ define <4 x i32> @buildvec_vredmax(<8 x i32> %arg0, <8 x i32> %arg1, <8 x i32> %
 ; RVA22U64-PACK-NEXT:    vslide1down.vx v8, v8, a1
 ; RVA22U64-PACK-NEXT:    ret
 ;
-; RV64ZVE32-LABEL: buildvec_vredmax:
+; RV64ZVE32-LABEL: buildvec_vredmax_slideup:
 ; RV64ZVE32:       # %bb.0:
 ; RV64ZVE32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; RV64ZVE32-NEXT:    vredmaxu.vs v8, v8, v8

>From 83f17473e3a6967cfc7f54f445d12779536104f8 Mon Sep 17 00:00:00 2001
From: Min-Yih Hsu <min.hsu at sifive.com>
Date: Tue, 2 Sep 2025 09:42:42 -0700
Subject: [PATCH 9/9] fixup! Fix code comments

---
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 562fbae776051..2149739443650 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -4521,8 +4521,8 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
   // vslideup of 1 later and eliminate the vector to scalar movement, which is
   // something we cannot do with vslide1down/vslidedown.
   // Of course, using vslide1up/vslideup might increase the register pressure,
-  // and that's why we conservatively limit to cases where every operands is an
-  // extraction from first element.
+  // and that's why we conservatively limit to cases where every operand is an
+  // extraction from the first element.
   SmallVector<SDValue> Operands(Op->op_begin(), Op->op_end());
   SDValue EVec;
   bool SlideUp = false;