[llvm] [RISCV] Rewrite vrgather.vx undef, (vmv.s.x), 0, v0 as vmv.v.x (PR #136010)

Thu Apr 17 10:03:42 PDT 2025

https://github.com/preames updated https://github.com/llvm/llvm-project/pull/136010

>From 0c43a14165be1a02df211f7e4539c17fc948a4ad Mon Sep 17 00:00:00 2001
From: Philip Reames <preames at rivosinc.com>
Date: Fri, 11 Apr 2025 07:56:31 -0700
Subject: [PATCH 1/3] [RISCV] Rewrite vrgather.vx undef, (vmv.s.x), 0, v0 as
 vmv.v.x

This extends the DAG combine introduced in 336b2909 to handle the case
where the prior value is defined by a vmv.s.x instead of a vmv.v.x.
If the vrgather splats the single source element, and has no passthru
we can replace it with a vmv.v.x - which will in turn usually get
folded into a vmerge if a select follows.
---
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp   | 32 +++++++++++++++++--
 .../RISCV/rvv/fixed-vectors-shuffle-fp.ll     | 19 ++++-------
 .../RISCV/rvv/fixed-vectors-shuffle-int.ll    | 17 ++++------
 3 files changed, 41 insertions(+), 27 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index f24752b8721f5..4dd237b5415dd 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -5573,7 +5573,6 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
     const int Lane = SVN->getSplatIndex();
     if (Lane >= 0) {
       MVT SVT = VT.getVectorElementType();
-
       // Turn splatted vector load into a strided load with an X0 stride.
       SDValue V = V1;
       // Peek through CONCAT_VECTORS as VectorCombine can concat a vector
@@ -19710,20 +19709,47 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
       return V;
     break;
   case RISCVISD::VRGATHER_VX_VL: {
-    // Drop a redundant vrgather_vx.
+    using namespace llvm::SDPatternMatch;
     // Note this assumes that out of bounds indices produce poison
     // and can thus be replaced without having to prove them inbounds..
+    EVT VT = N->getValueType(0);
     SDValue Src = N->getOperand(0);
+    SDValue Idx = N->getOperand(1);
     SDValue Passthru = N->getOperand(2);
     SDValue VL = N->getOperand(4);
+
+    // Warning: Unlike most cases we strip an insert_subvector, this one
+    // does not require the first operand to be undef.
+    if (Src.getOpcode() == ISD::INSERT_SUBVECTOR &&
+        sd_match(Src.getOperand(2), m_Zero()))
+      Src = Src.getOperand(1);
+
     switch (Src.getOpcode()) {
     default:
       break;
     case RISCVISD::VMV_V_X_VL:
     case RISCVISD::VFMV_V_F_VL:
-      if (Passthru.isUndef() && VL == Src.getOperand(2))
+      // Drop a redundant vrgather_vx.
+      // TODO: Remove the type restriction if we find a motivating
+      // test case?
+      if (Passthru.isUndef() && VL == Src.getOperand(2) &&
+          Src.getValueType() == VT)
         return Src;
       break;
+    case RISCVISD::VMV_S_X_VL:
+    case RISCVISD::VFMV_S_F_VL:
+      // If this use only demands lane zero from the source vmv.s.x, and
+      // doesn't have a passthru, then this vrgather.vi/vx is equivalent to
+      // a vmv.v.x.  Note that there can be other uses of the original
+      // vmv.s.x and thus we can't eliminate it.  (vfmv.s.f is analogous)
+      if (sd_match(Idx, m_Zero()) && Passthru.isUndef() &&
+          VL == Src.getOperand(2)) {
+        unsigned Opc =
+            VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL : RISCVISD::VMV_V_X_VL;
+        return DAG.getNode(Opc, DL, VT, DAG.getUNDEF(VT), Src.getOperand(1),
+                           VL);
+      }
+      break;
     }
     break;
   }
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-fp.ll
index 5aac2687122ae..f580b1b993395 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-fp.ll
@@ -96,13 +96,11 @@ define <8 x float> @vmerge_vxm(<8 x float> %v, float %s) {
 ; CHECK-LABEL: vmerge_vxm:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a0, 25
-; CHECK-NEXT:    vsetivli zero, 8, e32, m1, tu, ma
-; CHECK-NEXT:    vfmv.s.f v8, fa0
+; CHECK-NEXT:    vsetivli zero, 1, e32, m4, tu, ma
 ; CHECK-NEXT:    vmv.s.x v0, a0
-; CHECK-NEXT:    vmv2r.v v10, v8
-; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
-; CHECK-NEXT:    vrgather.vi v10, v8, 0, v0.t
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vfmv.s.f v8, fa0
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vfmerge.vfm v8, v8, fa0, v0
 ; CHECK-NEXT:    ret
   %ins = insertelement <8 x float> %v, float %s, i32 0
   %shuf = shufflevector <8 x float> %ins, <8 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 0, i32 0, i32 5, i32 6, i32 7>
@@ -112,15 +110,10 @@ define <8 x float> @vmerge_vxm(<8 x float> %v, float %s) {
 define <8 x float> @vmerge_vxm2(<8 x float> %v, float %s) {
 ; CHECK-LABEL: vmerge_vxm2:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 1, e32, m4, tu, ma
-; CHECK-NEXT:    vmv1r.v v12, v8
-; CHECK-NEXT:    vmv2r.v v10, v8
 ; CHECK-NEXT:    li a0, 25
-; CHECK-NEXT:    vfmv.s.f v12, fa0
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; CHECK-NEXT:    vmv.s.x v0, a0
-; CHECK-NEXT:    vmv1r.v v10, v12
-; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
-; CHECK-NEXT:    vrgather.vi v8, v10, 0, v0.t
+; CHECK-NEXT:    vfmerge.vfm v8, v8, fa0, v0
 ; CHECK-NEXT:    ret
   %ins = insertelement <8 x float> %v, float %s, i32 0
   %shuf = shufflevector <8 x float> %v, <8 x float> %ins, <8 x i32> <i32 8, i32 1, i32 2, i32 8, i32 8, i32 5, i32 6, i32 7>
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-int.ll
index 5c4ef29d7d5b7..8676803e20e3b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-int.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-int.ll
@@ -1448,13 +1448,11 @@ define <8 x i8> @vmerge_vxm(<8 x i8> %v, i8 %s) {
 ; CHECK-LABEL: vmerge_vxm:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a1, 25
-; CHECK-NEXT:    vsetivli zero, 8, e8, m1, tu, ma
-; CHECK-NEXT:    vmv.s.x v8, a0
+; CHECK-NEXT:    vsetivli zero, 1, e8, m1, tu, ma
 ; CHECK-NEXT:    vmv.s.x v0, a1
-; CHECK-NEXT:    vmv1r.v v9, v8
-; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, mu
-; CHECK-NEXT:    vrgather.vi v9, v8, 0, v0.t
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vmv.s.x v8, a0
+; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT:    vmerge.vxm v8, v8, a0, v0
 ; CHECK-NEXT:    ret
   %ins = insertelement <8 x i8> %v, i8 %s, i32 0
   %shuf = shufflevector <8 x i8> %ins, <8 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 0, i32 0, i32 5, i32 6, i32 7>
@@ -1465,12 +1463,9 @@ define <8 x i8> @vmerge_vxm2(<8 x i8> %v, i8 %s) {
 ; CHECK-LABEL: vmerge_vxm2:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a1, 25
-; CHECK-NEXT:    vsetivli zero, 1, e8, m1, tu, ma
+; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
 ; CHECK-NEXT:    vmv.s.x v0, a1
-; CHECK-NEXT:    vmv1r.v v9, v8
-; CHECK-NEXT:    vmv.s.x v9, a0
-; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, mu
-; CHECK-NEXT:    vrgather.vi v8, v9, 0, v0.t
+; CHECK-NEXT:    vmerge.vxm v8, v8, a0, v0
 ; CHECK-NEXT:    ret
   %ins = insertelement <8 x i8> %v, i8 %s, i32 0
   %shuf = shufflevector <8 x i8> %v, <8 x i8> %ins, <8 x i32> <i32 8, i32 1, i32 2, i32 8, i32 8, i32 5, i32 6, i32 7>

>From 33c68698d96f25912cd53d4398c3b5f6923800b1 Mon Sep 17 00:00:00 2001
From: Philip Reames <preames at rivosinc.com>
Date: Thu, 17 Apr 2025 09:27:45 -0700
Subject: [PATCH 2/3] Address review comment

---
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 4dd237b5415dd..8862a6e985870 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -19721,7 +19721,7 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
     // Warning: Unlike most cases we strip an insert_subvector, this one
     // does not require the first operand to be undef.
     if (Src.getOpcode() == ISD::INSERT_SUBVECTOR &&
-        sd_match(Src.getOperand(2), m_Zero()))
+        isNullConstant(Src.getOperand(2)))
       Src = Src.getOperand(1);
 
     switch (Src.getOpcode()) {
@@ -19742,7 +19742,7 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
       // doesn't have a passthru, then this vrgather.vi/vx is equivalent to
       // a vmv.v.x.  Note that there can be other uses of the original
       // vmv.s.x and thus we can't eliminate it.  (vfmv.s.f is analogous)
-      if (sd_match(Idx, m_Zero()) && Passthru.isUndef() &&
+      if (isNullConstant(Idx) && Passthru.isUndef() &&
           VL == Src.getOperand(2)) {
         unsigned Opc =
             VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL : RISCVISD::VMV_V_X_VL;

>From 9f4fc5a3e0287fec0e2155de47dc03b59b99ad3b Mon Sep 17 00:00:00 2001
From: Philip Reames <preames at rivosinc.com>
Date: Thu, 17 Apr 2025 10:02:16 -0700
Subject: [PATCH 3/3] Address review comment

---
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 8862a6e985870..4b1f5cb00cdb0 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -19709,7 +19709,6 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
       return V;
     break;
   case RISCVISD::VRGATHER_VX_VL: {
-    using namespace llvm::SDPatternMatch;
     // Note this assumes that out of bounds indices produce poison
     // and can thus be replaced without having to prove them inbounds..
     EVT VT = N->getValueType(0);