[llvm] [RISCV] Remove last use of experimental.vp.splat in RISCVCodeGenPrepare. NFCI (PR #170543)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 3 11:47:03 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-risc-v
Author: Luke Lau (lukel97)
<details>
<summary>Changes</summary>
Stacked on #<!-- -->170539
RISCVCodeGenPrepare is the last user of the vp.splat intrinsic, where it uses it to expand a zero strided load into a scalar load and splat. This replaces it with a regular splat followed by a vp_merge to set the lanes past EVL as poison. We need to set the EVL here because RISCVISelDAGToDAG will try and recombine it back into a zero strided load, and we want to preserve the original VL.
We need to set
---
Full diff: https://github.com/llvm/llvm-project/pull/170543.diff
7 Files Affected:
- (modified) llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp (+4-2)
- (modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+38)
- (modified) llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp (+21-2)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll (+45)
- (modified) llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.ll (+11)
- (modified) llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.mir (+20)
- (modified) llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll (+45)
``````````diff
diff --git a/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp b/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp
index ab450f9c4a61d..1ee4c66a5bde5 100644
--- a/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp
+++ b/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp
@@ -271,8 +271,10 @@ bool RISCVCodeGenPrepare::expandVPStrideLoad(IntrinsicInst &II) {
IRBuilder<> Builder(&II);
Type *STy = VTy->getElementType();
Value *Val = Builder.CreateLoad(STy, BasePtr);
- Value *Res = Builder.CreateIntrinsic(Intrinsic::experimental_vp_splat, {VTy},
- {Val, II.getOperand(2), VL});
+ Value *Res = Builder.CreateIntrinsic(
+ Intrinsic::vp_merge, VTy,
+ {II.getOperand(2), Builder.CreateVectorSplat(VTy->getElementCount(), Val),
+ PoisonValue::get(VTy), VL});
II.replaceAllUsesWith(Res);
II.eraseFromParent();
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index ab2652eac3823..899871edc9f7b 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -21872,6 +21872,44 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
return N->getOperand(0);
break;
}
+ case RISCVISD::VMERGE_VL: {
+ // vmerge_vl allones, x, y, passthru, vl -> vmv_v_v passthru, x, vl
+ SDValue Mask = N->getOperand(0);
+ SDValue True = N->getOperand(1);
+ SDValue Passthru = N->getOperand(3);
+ SDValue VL = N->getOperand(4);
+
+ // Fixed vectors are wrapped in scalable containers, unwrap them.
+ using namespace SDPatternMatch;
+ SDValue SubVec;
+ if (sd_match(Mask, m_InsertSubvector(m_Undef(), m_Value(SubVec), m_Zero())))
+ Mask = SubVec;
+
+ if (!isOneOrOneSplat(Mask))
+ break;
+
+ return DAG.getNode(RISCVISD::VMV_V_V_VL, SDLoc(N), N->getSimpleValueType(0),
+ Passthru, True, VL);
+ }
+ case RISCVISD::VMV_V_V_VL: {
+ // vmv_v_v passthru, splat(x), vl -> vmv_v_x passthru, x, vl
+ SDValue Passthru = N->getOperand(0);
+ SDValue Src = N->getOperand(1);
+ SDValue VL = N->getOperand(2);
+
+ // Fixed vectors are wrapped in scalable containers, unwrap them.
+ using namespace SDPatternMatch;
+ SDValue SubVec;
+ if (sd_match(Src, m_InsertSubvector(m_Undef(), m_Value(SubVec), m_Zero())))
+ Src = SubVec;
+
+ SDValue SplatVal = DAG.getSplatValue(Src);
+ if (!SplatVal)
+ break;
+ MVT VT = N->getSimpleValueType(0);
+ return lowerScalarSplat(Passthru, SplatVal, VL, VT, SDLoc(N), DAG,
+ Subtarget);
+ }
case RISCVISD::VSLIDEDOWN_VL:
case RISCVISD::VSLIDEUP_VL:
if (N->getOperand(1)->isUndef())
diff --git a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
index 6ddca4a3e0909..a5385be0c011c 100644
--- a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
@@ -651,11 +651,23 @@ bool RISCVVectorPeephole::foldVMV_V_V(MachineInstr &MI) {
if (!hasSameEEW(MI, *Src))
return false;
+ std::optional<std::pair<unsigned, unsigned>> NeedsCommute;
+
// Src needs to have the same passthru as VMV_V_V
MachineOperand &SrcPassthru = Src->getOperand(Src->getNumExplicitDefs());
if (SrcPassthru.getReg().isValid() &&
- SrcPassthru.getReg() != Passthru.getReg())
- return false;
+ SrcPassthru.getReg() != Passthru.getReg()) {
+ // If Src's passthru != Passthru, check if it uses Passthru in another
+ // operand and try to commute it.
+ int OtherIdx = Src->findRegisterUseOperandIdx(Passthru.getReg(), TRI);
+ if (OtherIdx == -1)
+ return false;
+ unsigned OpIdx1 = OtherIdx;
+ unsigned OpIdx2 = Src->getNumExplicitDefs();
+ if (!TII->findCommutedOpIndices(*Src, OpIdx1, OpIdx2))
+ return false;
+ NeedsCommute = {OpIdx1, OpIdx2};
+ }
// Src VL will have already been reduced if legal (see tryToReduceVL),
// so we don't need to handle a smaller source VL here. However, the
@@ -668,6 +680,13 @@ bool RISCVVectorPeephole::foldVMV_V_V(MachineInstr &MI) {
if (!ensureDominates(Passthru, *Src))
return false;
+ if (NeedsCommute) {
+ auto [OpIdx1, OpIdx2] = *NeedsCommute;
+ [[maybe_unused]] bool Commuted =
+ TII->commuteInstruction(*Src, /*NewMI=*/false, OpIdx1, OpIdx2);
+ assert(Commuted && "Failed to commute Src?");
+ }
+
if (SrcPassthru.getReg() != Passthru.getReg()) {
SrcPassthru.setReg(Passthru.getReg());
// If Src is masked then its passthru needs to be in VRNoV0.
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll
index 7968c5190eb01..0bacb5c26cb4a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll
@@ -1353,3 +1353,48 @@ define <32 x double> @vpmerge_vf_v32f64(double %a, <32 x double> %vb, <32 x i1>
%v = call <32 x double> @llvm.vp.merge.v32f64(<32 x i1> %m, <32 x double> %va, <32 x double> %vb, i32 %evl)
ret <32 x double> %v
}
+
+define <4 x i32> @splat_v4i32(i32 %x, i32 zeroext %evl) {
+; CHECK-LABEL: splat_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-NEXT: vmv.v.x v8, a0
+; CHECK-NEXT: ret
+ %head = insertelement <4 x i32> poison, i32 %x, i32 0
+ %splat = shufflevector <4 x i32> %head, <4 x i32> poison, <4 x i32> zeroinitializer
+ %v = call <4 x i32> @llvm.vp.merge(<4 x i1> splat (i1 true), <4 x i32> %splat, <4 x i32> poison, i32 %evl)
+ ret <4 x i32> %v
+}
+
+define <4 x float> @splat_v4f32(float %x, i32 zeroext %evl) {
+; CHECK-LABEL: splat_v4f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vfmv.v.f v8, fa0
+; CHECK-NEXT: ret
+ %head = insertelement <4 x float> poison, float %x, i32 0
+ %splat = shufflevector <4 x float> %head, <4 x float> poison, <4 x i32> zeroinitializer
+ %v = call <4 x float> @llvm.vp.merge(<4 x i1> splat (i1 true), <4 x float> %splat, <4 x float> poison, i32 %evl)
+ ret <4 x float> %v
+}
+
+define <4 x i32> @splat_v4i32_const(i32 zeroext %evl) {
+; CHECK-LABEL: splat_v4i32_const:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v8, 1
+; CHECK-NEXT: ret
+ %v = call <4 x i32> @llvm.vp.merge(<4 x i1> splat (i1 true), <4 x i32> splat (i32 1), <4 x i32> poison, i32 %evl)
+ ret <4 x i32> %v
+}
+
+define <4 x float> @splat_v4f32_const(i32 zeroext %evl) {
+; CHECK-LABEL: splat_v4f32_const:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a1, 270976
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vmv.v.x v8, a1
+; CHECK-NEXT: ret
+ %v = call <4 x float> @llvm.vp.merge(<4 x i1> splat (i1 true), <4 x float> splat (float 42.0), <4 x float> poison, i32 %evl)
+ ret <4 x float> %v
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.ll b/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.ll
index c2638127e47af..698d47f3be720 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.ll
@@ -245,3 +245,14 @@ define <vscale x 1 x i64> @vmerge(<vscale x 1 x i64> %passthru, <vscale x 1 x i6
%b = call <vscale x 1 x i64> @llvm.riscv.vmv.v.v.nxv1i64(<vscale x 1 x i64> %passthru, <vscale x 1 x i64> %a, iXLen %avl)
ret <vscale x 1 x i64> %b
}
+
+define <vscale x 4 x float> @commute_vfmadd(<vscale x 4 x float> %passthru, <vscale x 4 x float> %a, <vscale x 4 x float> %b, iXLen %vl) {
+; CHECK-LABEL: commute_vfmadd:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, ma
+; CHECK-NEXT: vfmacc.vv v8, v12, v10
+; CHECK-NEXT: ret
+ %v = call <vscale x 4 x float> @llvm.riscv.vfmadd(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %passthru, iXLen 7, iXLen %vl, iXLen 3)
+ %w = call <vscale x 4 x float> @llvm.riscv.vmv.v.v(<vscale x 4 x float> %passthru, <vscale x 4 x float> %v, iXLen %vl)
+ ret <vscale x 4 x float> %w
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.mir b/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.mir
index 95232e734bb18..68e74ff6ba05b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.mir
@@ -168,3 +168,23 @@ body: |
%x:vrnov0 = PseudoVMERGE_VVM_M1 $noreg, %passthru, $noreg, %mask, 4, 5 /* e32 */
%z:vr = PseudoVMV_V_V_M1 %passthru, %x, 4, 5 /* e32 */, 0 /* tu, mu */
...
+---
+name: commute_vfmadd
+body: |
+ bb.0:
+ liveins: $x8, $v0, $v8, $v9, $v10
+ ; CHECK-LABEL: name: commute_vfmadd
+ ; CHECK: liveins: $x8, $v0, $v8, $v9, $v10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %avl:gprnox0 = COPY $x8
+ ; CHECK-NEXT: %passthru:vrnov0 = COPY $v8
+ ; CHECK-NEXT: %x:vr = COPY $v9
+ ; CHECK-NEXT: %y:vr = COPY $v10
+ ; CHECK-NEXT: %vfmadd:vrnov0 = nofpexcept PseudoVFMACC_VV_M1_E32 %passthru, %y, %x, 7, %avl, 5 /* e32 */, 0 /* tu, mu */, implicit $frm
+ %avl:gprnox0 = COPY $x8
+ %passthru:vrnov0 = COPY $v8
+ %x:vr = COPY $v9
+ %y:vr = COPY $v10
+ %vfmadd:vrnov0 = nofpexcept PseudoVFMADD_VV_M1_E32 %x, %y, %passthru, 7, -1, 5 /* e32 */, 3 /* ta, ma */, implicit $frm
+ %vmerge:vrnov0 = PseudoVMV_V_V_M1 %passthru, %vfmadd, %avl, 5 /* e32 */, 0 /* tu, mu */
+...
diff --git a/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll
index 03697aafea45d..f92ee37051840 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll
@@ -1663,3 +1663,48 @@ define <vscale x 8 x double> @vpmerge_vf_nxv8f64(double %a, <vscale x 8 x double
%v = call <vscale x 8 x double> @llvm.vp.merge.nxv8f64(<vscale x 8 x i1> %m, <vscale x 8 x double> %va, <vscale x 8 x double> %vb, i32 %evl)
ret <vscale x 8 x double> %v
}
+
+define <vscale x 2 x i32> @splat_nxv2i32(i32 %x, i32 zeroext %evl) {
+; CHECK-LABEL: splat_nxv2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-NEXT: vmv.v.x v8, a0
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 2 x i32> poison, i32 %x, i32 0
+ %splat = shufflevector <vscale x 2 x i32> %head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i32> @llvm.vp.merge(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i32> %splat, <vscale x 2 x i32> poison, i32 %evl)
+ ret <vscale x 2 x i32> %v
+}
+
+define <vscale x 2 x float> @splat_nxv2f32(float %x, i32 zeroext %evl) {
+; CHECK-LABEL: splat_nxv2f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vfmv.v.f v8, fa0
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 2 x float> poison, float %x, i32 0
+ %splat = shufflevector <vscale x 2 x float> %head, <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x float> @llvm.vp.merge(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x float> %splat, <vscale x 2 x float> poison, i32 %evl)
+ ret <vscale x 2 x float> %v
+}
+
+define <vscale x 2 x i32> @splat_nxv2i32_const(i32 zeroext %evl) {
+; CHECK-LABEL: splat_nxv2i32_const:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v8, 1
+; CHECK-NEXT: ret
+ %v = call <vscale x 2 x i32> @llvm.vp.merge(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i32> splat (i32 1), <vscale x 2 x i32> poison, i32 %evl)
+ ret <vscale x 2 x i32> %v
+}
+
+define <vscale x 2 x float> @splat_nxv2f32_const(i32 zeroext %evl) {
+; CHECK-LABEL: splat_nxv2f32_const:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a1, 270976
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vmv.v.x v8, a1
+; CHECK-NEXT: ret
+ %v = call <vscale x 2 x float> @llvm.vp.merge(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x float> splat (float 42.0), <vscale x 2 x float> poison, i32 %evl)
+ ret <vscale x 2 x float> %v
+}
``````````
</details>
https://github.com/llvm/llvm-project/pull/170543
More information about the llvm-commits
mailing list