[llvm] [RISCV] Remove last use of experimental.vp.splat in RISCVCodeGenPrepare. NFCI (PR #170543)

Wed Dec 3 11:46:32 PST 2025

https://github.com/lukel97 created https://github.com/llvm/llvm-project/pull/170543

Stacked on #170539

RISCVCodeGenPrepare is the last user of the vp.splat intrinsic, where it uses it to expand a zero strided load into a scalar load and splat. This replaces it with a regular splat followed by a vp_merge to set the lanes past EVL as poison. We need to set the EVL here because RISCVISelDAGToDAG will try and recombine it back into a zero strided load, and we want to preserve the original VL.

We need to set

>From 9bdcb879c1922a50f999eeb1a15f2a0fd98052e4 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Thu, 4 Dec 2025 03:07:43 +0800
Subject: [PATCH 1/5] Precommit tests

---
 .../CodeGen/RISCV/rvv/vmv.v.v-peephole.ll     | 13 ++++++++++++
 .../CodeGen/RISCV/rvv/vmv.v.v-peephole.mir    | 21 +++++++++++++++++++
 2 files changed, 34 insertions(+)

diff --git a/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.ll b/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.ll
index c2638127e47af..7a8064107241f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.ll
@@ -245,3 +245,16 @@ define <vscale x 1 x i64> @vmerge(<vscale x 1 x i64> %passthru, <vscale x 1 x i6
   %b = call <vscale x 1 x i64> @llvm.riscv.vmv.v.v.nxv1i64(<vscale x 1 x i64> %passthru, <vscale x 1 x i64> %a, iXLen %avl)
   ret <vscale x 1 x i64> %b
 }
+
+define <vscale x 4 x float> @commute_vfmadd(<vscale x 4 x float> %passthru, <vscale x 4 x float> %a, <vscale x 4 x float> %b, iXLen %vl) {
+; CHECK-LABEL: commute_vfmadd:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT:    vfmadd.vv v10, v12, v8
+; CHECK-NEXT:    vsetvli zero, zero, e32, m2, tu, ma
+; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    ret
+  %v = call <vscale x 4 x float> @llvm.riscv.vfmadd(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %passthru, iXLen 7, iXLen %vl, iXLen 3)
+  %w = call <vscale x 4 x float> @llvm.riscv.vmv.v.v(<vscale x 4 x float> %passthru, <vscale x 4 x float> %v, iXLen %vl)
+  ret <vscale x 4 x float> %w
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.mir b/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.mir
index 95232e734bb18..a9b4622f8b5f7 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.mir
@@ -168,3 +168,24 @@ body: |
     %x:vrnov0 = PseudoVMERGE_VVM_M1 $noreg, %passthru, $noreg, %mask, 4, 5 /* e32 */
     %z:vr = PseudoVMV_V_V_M1 %passthru, %x, 4, 5 /* e32 */, 0 /* tu, mu */
 ...
+---
+name: commute_vfmadd
+body: |
+  bb.0:
+    liveins: $x8, $v0, $v8, $v9, $v10
+    ; CHECK-LABEL: name: commute_vfmadd
+    ; CHECK: liveins: $x8, $v0, $v8, $v9, $v10
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %avl:gprnox0 = COPY $x8
+    ; CHECK-NEXT: %passthru:vrnov0 = COPY $v8
+    ; CHECK-NEXT: %x:vr = COPY $v9
+    ; CHECK-NEXT: %y:vr = COPY $v10
+    ; CHECK-NEXT: %vfmadd:vrnov0 = nofpexcept PseudoVFMADD_VV_M1_E32 %x, %y, %passthru, 7, %avl, 5 /* e32 */, 3 /* ta, ma */, implicit $frm
+    ; CHECK-NEXT: %vmerge:vrnov0 = PseudoVMV_V_V_M1 %passthru, %vfmadd, %avl, 5 /* e32 */, 0 /* tu, mu */
+    %avl:gprnox0 = COPY $x8
+    %passthru:vrnov0 = COPY $v8
+    %x:vr = COPY $v9
+    %y:vr = COPY $v10
+    %vfmadd:vrnov0 = nofpexcept PseudoVFMADD_VV_M1_E32 %x, %y, %passthru, 7, -1, 5 /* e32 */, 3 /* ta, ma */, implicit $frm
+    %vmerge:vrnov0 = PseudoVMV_V_V_M1 %passthru, %vfmadd, %avl, 5 /* e32 */, 0 /* tu, mu */
+...

>From f24810a52d3ff68852027395025172ba9bc8a2ef Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Thu, 4 Dec 2025 03:08:33 +0800
Subject: [PATCH 2/5] [RISCV] Commute Src in foldVMV_V_V

In #156499 we taught the vmerge peephole to commute operands so that the passthru operands lined up. We can do the same for the vmv.v.v peephole, which allows us fold more vmv.v.vs away.

This is needed to prevent a regression in an upcoming patch that adds a combine for vmerge.vvm to vmv.v.v.
---
 llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp | 23 +++++++++++++++++--
 .../CodeGen/RISCV/rvv/vmv.v.v-peephole.ll     |  6 ++---
 .../CodeGen/RISCV/rvv/vmv.v.v-peephole.mir    |  3 +--
 3 files changed, 24 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
index 6ddca4a3e0909..a5385be0c011c 100644
--- a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
@@ -651,11 +651,23 @@ bool RISCVVectorPeephole::foldVMV_V_V(MachineInstr &MI) {
   if (!hasSameEEW(MI, *Src))
     return false;
 
+  std::optional<std::pair<unsigned, unsigned>> NeedsCommute;
+
   // Src needs to have the same passthru as VMV_V_V
   MachineOperand &SrcPassthru = Src->getOperand(Src->getNumExplicitDefs());
   if (SrcPassthru.getReg().isValid() &&
-      SrcPassthru.getReg() != Passthru.getReg())
-    return false;
+      SrcPassthru.getReg() != Passthru.getReg()) {
+    // If Src's passthru != Passthru, check if it uses Passthru in another
+    // operand and try to commute it.
+    int OtherIdx = Src->findRegisterUseOperandIdx(Passthru.getReg(), TRI);
+    if (OtherIdx == -1)
+      return false;
+    unsigned OpIdx1 = OtherIdx;
+    unsigned OpIdx2 = Src->getNumExplicitDefs();
+    if (!TII->findCommutedOpIndices(*Src, OpIdx1, OpIdx2))
+      return false;
+    NeedsCommute = {OpIdx1, OpIdx2};
+  }
 
   // Src VL will have already been reduced if legal (see tryToReduceVL),
   // so we don't need to handle a smaller source VL here.  However, the
@@ -668,6 +680,13 @@ bool RISCVVectorPeephole::foldVMV_V_V(MachineInstr &MI) {
   if (!ensureDominates(Passthru, *Src))
     return false;
 
+  if (NeedsCommute) {
+    auto [OpIdx1, OpIdx2] = *NeedsCommute;
+    [[maybe_unused]] bool Commuted =
+        TII->commuteInstruction(*Src, /*NewMI=*/false, OpIdx1, OpIdx2);
+    assert(Commuted && "Failed to commute Src?");
+  }
+
   if (SrcPassthru.getReg() != Passthru.getReg()) {
     SrcPassthru.setReg(Passthru.getReg());
     // If Src is masked then its passthru needs to be in VRNoV0.
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.ll b/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.ll
index 7a8064107241f..698d47f3be720 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.ll
@@ -249,10 +249,8 @@ define <vscale x 1 x i64> @vmerge(<vscale x 1 x i64> %passthru, <vscale x 1 x i6
 define <vscale x 4 x float> @commute_vfmadd(<vscale x 4 x float> %passthru, <vscale x 4 x float> %a, <vscale x 4 x float> %b, iXLen %vl) {
 ; CHECK-LABEL: commute_vfmadd:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vfmadd.vv v10, v12, v8
-; CHECK-NEXT:    vsetvli zero, zero, e32, m2, tu, ma
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, ma
+; CHECK-NEXT:    vfmacc.vv v8, v12, v10
 ; CHECK-NEXT:    ret
   %v = call <vscale x 4 x float> @llvm.riscv.vfmadd(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %passthru, iXLen 7, iXLen %vl, iXLen 3)
   %w = call <vscale x 4 x float> @llvm.riscv.vmv.v.v(<vscale x 4 x float> %passthru, <vscale x 4 x float> %v, iXLen %vl)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.mir b/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.mir
index a9b4622f8b5f7..68e74ff6ba05b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.mir
@@ -180,8 +180,7 @@ body: |
     ; CHECK-NEXT: %passthru:vrnov0 = COPY $v8
     ; CHECK-NEXT: %x:vr = COPY $v9
     ; CHECK-NEXT: %y:vr = COPY $v10
-    ; CHECK-NEXT: %vfmadd:vrnov0 = nofpexcept PseudoVFMADD_VV_M1_E32 %x, %y, %passthru, 7, %avl, 5 /* e32 */, 3 /* ta, ma */, implicit $frm
-    ; CHECK-NEXT: %vmerge:vrnov0 = PseudoVMV_V_V_M1 %passthru, %vfmadd, %avl, 5 /* e32 */, 0 /* tu, mu */
+    ; CHECK-NEXT: %vfmadd:vrnov0 = nofpexcept PseudoVFMACC_VV_M1_E32 %passthru, %y, %x, 7, %avl, 5 /* e32 */, 0 /* tu, mu */, implicit $frm
     %avl:gprnox0 = COPY $x8
     %passthru:vrnov0 = COPY $v8
     %x:vr = COPY $v9

>From 4ed47cb67840ad5d4ce230695126d541d052c386 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Thu, 4 Dec 2025 02:10:18 +0800
Subject: [PATCH 3/5] Precommit tests

---
 .../RISCV/rvv/fixed-vectors-vpmerge.ll        | 53 +++++++++++++++++++
 llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll | 49 +++++++++++++++++
 2 files changed, 102 insertions(+)

diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll
index 7968c5190eb01..f2481974abfa7 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll
@@ -1353,3 +1353,56 @@ define <32 x double> @vpmerge_vf_v32f64(double %a, <32 x double> %vb, <32 x i1>
   %v = call <32 x double> @llvm.vp.merge.v32f64(<32 x i1> %m, <32 x double> %va, <32 x double> %vb, i32 %evl)
   ret <32 x double> %v
 }
+
+define <4 x i32> @splat_v4i32(i32 %x, i32 zeroext %evl) {
+; CHECK-LABEL: splat_v4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
+; CHECK-NEXT:    vmset.m v0
+; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-NEXT:    vmerge.vxm v8, v8, a0, v0
+; CHECK-NEXT:    ret
+  %head = insertelement <4 x i32> poison, i32 %x, i32 0
+  %splat = shufflevector <4 x i32> %head, <4 x i32> poison, <4 x i32> zeroinitializer
+  %v = call <4 x i32> @llvm.vp.merge(<4 x i1> splat (i1 true), <4 x i32> %splat, <4 x i32> poison, i32 %evl)
+  ret <4 x i32> %v
+}
+
+define <4 x float> @splat_v4f32(float %x, i32 zeroext %evl) {
+; CHECK-LABEL: splat_v4f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
+; CHECK-NEXT:    vmset.m v0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT:    vfmerge.vfm v8, v8, fa0, v0
+; CHECK-NEXT:    ret
+  %head = insertelement <4 x float> poison, float %x, i32 0
+  %splat = shufflevector <4 x float> %head, <4 x float> poison, <4 x i32> zeroinitializer
+  %v = call <4 x float> @llvm.vp.merge(<4 x i1> splat (i1 true), <4 x float> %splat, <4 x float> poison, i32 %evl)
+  ret <4 x float> %v
+}
+
+define <4 x i32> @splat_v4i32_const(i32 zeroext %evl) {
+; CHECK-LABEL: splat_v4i32_const:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
+; CHECK-NEXT:    vmset.m v0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT:    vmerge.vim v8, v8, 1, v0
+; CHECK-NEXT:    ret
+  %v = call <4 x i32> @llvm.vp.merge(<4 x i1> splat (i1 true), <4 x i32> splat (i32 1), <4 x i32> poison, i32 %evl)
+  ret <4 x i32> %v
+}
+
+define <4 x float> @splat_v4f32_const(i32 zeroext %evl) {
+; CHECK-LABEL: splat_v4f32_const:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
+; CHECK-NEXT:    vmset.m v0
+; CHECK-NEXT:    lui a1, 270976
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT:    vmerge.vxm v8, v8, a1, v0
+; CHECK-NEXT:    ret
+  %v = call <4 x float> @llvm.vp.merge(<4 x i1> splat (i1 true), <4 x float> splat (float 42.0), <4 x float> poison, i32 %evl)
+  ret <4 x float> %v
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll
index 03697aafea45d..7b19985b98582 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll
@@ -1663,3 +1663,52 @@ define <vscale x 8 x double> @vpmerge_vf_nxv8f64(double %a, <vscale x 8 x double
   %v = call <vscale x 8 x double> @llvm.vp.merge.nxv8f64(<vscale x 8 x i1> %m, <vscale x 8 x double> %va, <vscale x 8 x double> %vb, i32 %evl)
   ret <vscale x 8 x double> %v
 }
+
+define <vscale x 2 x i32> @splat_nxv2i32(i32 %x, i32 zeroext %evl) {
+; CHECK-LABEL: splat_nxv2i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-NEXT:    vmset.m v0
+; CHECK-NEXT:    vmerge.vxm v8, v8, a0, v0
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 2 x i32> poison, i32 %x, i32 0
+  %splat = shufflevector <vscale x 2 x i32> %head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
+  %v = call <vscale x 2 x i32> @llvm.vp.merge(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i32> %splat, <vscale x 2 x i32> poison, i32 %evl)
+  ret <vscale x 2 x i32> %v
+}
+
+define <vscale x 2 x float> @splat_nxv2f32(float %x, i32 zeroext %evl) {
+; CHECK-LABEL: splat_nxv2f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT:    vmset.m v0
+; CHECK-NEXT:    vfmerge.vfm v8, v8, fa0, v0
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 2 x float> poison, float %x, i32 0
+  %splat = shufflevector <vscale x 2 x float> %head, <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
+  %v = call <vscale x 2 x float> @llvm.vp.merge(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x float> %splat, <vscale x 2 x float> poison, i32 %evl)
+  ret <vscale x 2 x float> %v
+}
+
+define <vscale x 2 x i32> @splat_nxv2i32_const(i32 zeroext %evl) {
+; CHECK-LABEL: splat_nxv2i32_const:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT:    vmset.m v0
+; CHECK-NEXT:    vmerge.vim v8, v8, 1, v0
+; CHECK-NEXT:    ret
+  %v = call <vscale x 2 x i32> @llvm.vp.merge(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i32> splat (i32 1), <vscale x 2 x i32> poison, i32 %evl)
+  ret <vscale x 2 x i32> %v
+}
+
+define <vscale x 2 x float> @splat_nxv2f32_const(i32 zeroext %evl) {
+; CHECK-LABEL: splat_nxv2f32_const:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT:    vmset.m v0
+; CHECK-NEXT:    lui a0, 270976
+; CHECK-NEXT:    vmerge.vxm v8, v8, a0, v0
+; CHECK-NEXT:    ret
+  %v = call <vscale x 2 x float> @llvm.vp.merge(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x float> splat (float 42.0), <vscale x 2 x float> poison, i32 %evl)
+  ret <vscale x 2 x float> %v
+}

>From 1f1fef1f90e096f0e39bc3954438f8db250f85c4 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Thu, 4 Dec 2025 03:15:55 +0800
Subject: [PATCH 4/5] [RISCV] Combine vmerge_vl allones -> vmv_v_v, vmv_v_v
 splat(x) -> vmv_v_x

Stacked on #170536

An upcoming patch aims to remove the last use of @llvm.experimental.vp.splat in RISCVCodegenPrepare by replacing it with a vp_merge of a regular splat.

A vp_merge will get lowered to vmerge_vl, and if we combine vmerge_vl of a splat to vmv_v_x we can get the same behaviour as the vp.splat intrinsic.

This adds the two combines needed. It was easier to do the combines on _vl nodes rather than on vp_merge itself, since the types are already legal for _vl nodes.
---
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp   | 38 +++++++++++++++++++
 .../RISCV/rvv/fixed-vectors-vpmerge.ll        | 16 ++------
 llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll | 14 +++----
 3 files changed, 47 insertions(+), 21 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index ab2652eac3823..899871edc9f7b 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -21872,6 +21872,44 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
       return N->getOperand(0);
     break;
   }
+  case RISCVISD::VMERGE_VL: {
+    // vmerge_vl allones, x, y, passthru, vl -> vmv_v_v passthru, x, vl
+    SDValue Mask = N->getOperand(0);
+    SDValue True = N->getOperand(1);
+    SDValue Passthru = N->getOperand(3);
+    SDValue VL = N->getOperand(4);
+
+    // Fixed vectors are wrapped in scalable containers, unwrap them.
+    using namespace SDPatternMatch;
+    SDValue SubVec;
+    if (sd_match(Mask, m_InsertSubvector(m_Undef(), m_Value(SubVec), m_Zero())))
+      Mask = SubVec;
+
+    if (!isOneOrOneSplat(Mask))
+      break;
+
+    return DAG.getNode(RISCVISD::VMV_V_V_VL, SDLoc(N), N->getSimpleValueType(0),
+                       Passthru, True, VL);
+  }
+  case RISCVISD::VMV_V_V_VL: {
+    // vmv_v_v passthru, splat(x), vl -> vmv_v_x passthru, x, vl
+    SDValue Passthru = N->getOperand(0);
+    SDValue Src = N->getOperand(1);
+    SDValue VL = N->getOperand(2);
+
+    // Fixed vectors are wrapped in scalable containers, unwrap them.
+    using namespace SDPatternMatch;
+    SDValue SubVec;
+    if (sd_match(Src, m_InsertSubvector(m_Undef(), m_Value(SubVec), m_Zero())))
+      Src = SubVec;
+
+    SDValue SplatVal = DAG.getSplatValue(Src);
+    if (!SplatVal)
+      break;
+    MVT VT = N->getSimpleValueType(0);
+    return lowerScalarSplat(Passthru, SplatVal, VL, VT, SDLoc(N), DAG,
+                            Subtarget);
+  }
   case RISCVISD::VSLIDEDOWN_VL:
   case RISCVISD::VSLIDEUP_VL:
     if (N->getOperand(1)->isUndef())
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll
index f2481974abfa7..0bacb5c26cb4a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll
@@ -1357,10 +1357,8 @@ define <32 x double> @vpmerge_vf_v32f64(double %a, <32 x double> %vb, <32 x i1>
 define <4 x i32> @splat_v4i32(i32 %x, i32 zeroext %evl) {
 ; CHECK-LABEL: splat_v4i32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
-; CHECK-NEXT:    vmset.m v0
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT:    vmerge.vxm v8, v8, a0, v0
+; CHECK-NEXT:    vmv.v.x v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <4 x i32> poison, i32 %x, i32 0
   %splat = shufflevector <4 x i32> %head, <4 x i32> poison, <4 x i32> zeroinitializer
@@ -1371,10 +1369,8 @@ define <4 x i32> @splat_v4i32(i32 %x, i32 zeroext %evl) {
 define <4 x float> @splat_v4f32(float %x, i32 zeroext %evl) {
 ; CHECK-LABEL: splat_v4f32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
-; CHECK-NEXT:    vmset.m v0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vfmerge.vfm v8, v8, fa0, v0
+; CHECK-NEXT:    vfmv.v.f v8, fa0
 ; CHECK-NEXT:    ret
   %head = insertelement <4 x float> poison, float %x, i32 0
   %splat = shufflevector <4 x float> %head, <4 x float> poison, <4 x i32> zeroinitializer
@@ -1385,10 +1381,8 @@ define <4 x float> @splat_v4f32(float %x, i32 zeroext %evl) {
 define <4 x i32> @splat_v4i32_const(i32 zeroext %evl) {
 ; CHECK-LABEL: splat_v4i32_const:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
-; CHECK-NEXT:    vmset.m v0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vmerge.vim v8, v8, 1, v0
+; CHECK-NEXT:    vmv.v.i v8, 1
 ; CHECK-NEXT:    ret
   %v = call <4 x i32> @llvm.vp.merge(<4 x i1> splat (i1 true), <4 x i32> splat (i32 1), <4 x i32> poison, i32 %evl)
   ret <4 x i32> %v
@@ -1397,11 +1391,9 @@ define <4 x i32> @splat_v4i32_const(i32 zeroext %evl) {
 define <4 x float> @splat_v4f32_const(i32 zeroext %evl) {
 ; CHECK-LABEL: splat_v4f32_const:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
-; CHECK-NEXT:    vmset.m v0
 ; CHECK-NEXT:    lui a1, 270976
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vmerge.vxm v8, v8, a1, v0
+; CHECK-NEXT:    vmv.v.x v8, a1
 ; CHECK-NEXT:    ret
   %v = call <4 x float> @llvm.vp.merge(<4 x i1> splat (i1 true), <4 x float> splat (float 42.0), <4 x float> poison, i32 %evl)
   ret <4 x float> %v
diff --git a/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll
index 7b19985b98582..f92ee37051840 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll
@@ -1668,8 +1668,7 @@ define <vscale x 2 x i32> @splat_nxv2i32(i32 %x, i32 zeroext %evl) {
 ; CHECK-LABEL: splat_nxv2i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT:    vmset.m v0
-; CHECK-NEXT:    vmerge.vxm v8, v8, a0, v0
+; CHECK-NEXT:    vmv.v.x v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x i32> poison, i32 %x, i32 0
   %splat = shufflevector <vscale x 2 x i32> %head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
@@ -1681,8 +1680,7 @@ define <vscale x 2 x float> @splat_nxv2f32(float %x, i32 zeroext %evl) {
 ; CHECK-LABEL: splat_nxv2f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vmset.m v0
-; CHECK-NEXT:    vfmerge.vfm v8, v8, fa0, v0
+; CHECK-NEXT:    vfmv.v.f v8, fa0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x float> poison, float %x, i32 0
   %splat = shufflevector <vscale x 2 x float> %head, <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
@@ -1694,8 +1692,7 @@ define <vscale x 2 x i32> @splat_nxv2i32_const(i32 zeroext %evl) {
 ; CHECK-LABEL: splat_nxv2i32_const:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vmset.m v0
-; CHECK-NEXT:    vmerge.vim v8, v8, 1, v0
+; CHECK-NEXT:    vmv.v.i v8, 1
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x i32> @llvm.vp.merge(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i32> splat (i32 1), <vscale x 2 x i32> poison, i32 %evl)
   ret <vscale x 2 x i32> %v
@@ -1704,10 +1701,9 @@ define <vscale x 2 x i32> @splat_nxv2i32_const(i32 zeroext %evl) {
 define <vscale x 2 x float> @splat_nxv2f32_const(i32 zeroext %evl) {
 ; CHECK-LABEL: splat_nxv2f32_const:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a1, 270976
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vmset.m v0
-; CHECK-NEXT:    lui a0, 270976
-; CHECK-NEXT:    vmerge.vxm v8, v8, a0, v0
+; CHECK-NEXT:    vmv.v.x v8, a1
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x float> @llvm.vp.merge(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x float> splat (float 42.0), <vscale x 2 x float> poison, i32 %evl)
   ret <vscale x 2 x float> %v

>From cf508aa18576e2c3e56a566a864c4e9a889e562d Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Thu, 4 Dec 2025 03:40:46 +0800
Subject: [PATCH 5/5] [RISCV] Remove last use of experimental.vp.splat in
 RISCVCodeGenPrepare. NFCI

Stacked on #170539

RISCVCodeGenPrepare is the last user of the vp.splat intrinsic, where it uses it to expand a zero strided load into a scalar load and splat. This replaces it with a regular splat followed by a vp_merge to set the lanes past EVL as poison. We need to set the EVL here because RISCVISelDAGToDAG will try and recombine it back into a zero strided load, and we want to preserve the original VL.

We need to set
---
 llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp b/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp
index ab450f9c4a61d..1ee4c66a5bde5 100644
--- a/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp
+++ b/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp
@@ -271,8 +271,10 @@ bool RISCVCodeGenPrepare::expandVPStrideLoad(IntrinsicInst &II) {
   IRBuilder<> Builder(&II);
   Type *STy = VTy->getElementType();
   Value *Val = Builder.CreateLoad(STy, BasePtr);
-  Value *Res = Builder.CreateIntrinsic(Intrinsic::experimental_vp_splat, {VTy},
-                                       {Val, II.getOperand(2), VL});
+  Value *Res = Builder.CreateIntrinsic(
+      Intrinsic::vp_merge, VTy,
+      {II.getOperand(2), Builder.CreateVectorSplat(VTy->getElementCount(), Val),
+       PoisonValue::get(VTy), VL});
 
   II.replaceAllUsesWith(Res);
   II.eraseFromParent();