[llvm] [RISCV] Lower VP_SELECT constant false to use vmerge.vxm/vmerge.vim (PR #144461)

Mon Jun 16 19:52:28 PDT 2025

https://github.com/ChunyuLiao created https://github.com/llvm/llvm-project/pull/144461

Currently, when the false path of a vp_select is a splat vector, it is lowered to a vmv_v_x/vmv_v_i. The vmv is hoisted out of the loop and the whole copy in loop body by MachineLICM.

By inverting the mask register and swapping the true and false values in the vp_select, we can eliminate some instructions inside the loop.

corrent: https://godbolt.org/z/EnGMn3xeM
expected similar form: https://godbolt.org/z/nWhGM6Ej5

>From c1d7c1b8b6d582f0306507eb41e99695c6c11ab2 Mon Sep 17 00:00:00 2001
From: Liao Chunyu <chunyu at iscas.ac.cn>
Date: Mon, 16 Jun 2025 21:40:22 -0400
Subject: [PATCH] [RISCV] Lower VP_SELECT constant false to use
 vmerge.vxm/vmerge.vim

Currently, when the false path of a vp_select is a splat vector,
it is lowered to a vmv_v_x/vmv_v_i. The vmv is hoisted out of
the loop and the whole copy in loop body by MachineLICM.

By inverting the mask register and swapping the true and false values
in the vp_select, we can eliminate some instructions inside the loop.

corrent: https://godbolt.org/z/EnGMn3xeM
expected similar form: https://godbolt.org/z/nWhGM6Ej5
---
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp   | 24 ++++++++++++++++-
 llvm/lib/Target/RISCV/RISCVISelLowering.h     |  1 +
 .../test/CodeGen/RISCV/rvv/masked-load-int.ll |  6 ++---
 llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll     | 26 +++++++++++++++++--
 4 files changed, 51 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 7cfada6c0601c..ab36d0aeffa99 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -8170,11 +8170,17 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
     return lowerRESET_FPENV(Op, DAG);
   case ISD::EH_DWARF_CFA:
     return lowerEH_DWARF_CFA(Op, DAG);
+  case ISD::VP_SELECT:
+    if (SDValue Op2 = Op.getOperand(2);
+        Op2.hasOneUse() && (Op2.getOpcode() == ISD::SPLAT_VECTOR ||
+                            Op2.getOpcode() == ISD::SPLAT_VECTOR_PARTS))
+      return lowerVPSelectConstantFalse(Op, DAG);
+    else
+      return lowerVPOp(Op, DAG);
   case ISD::VP_MERGE:
     if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
       return lowerVPMergeMask(Op, DAG);
     [[fallthrough]];
-  case ISD::VP_SELECT:
   case ISD::VP_ADD:
   case ISD::VP_SUB:
   case ISD::VP_MUL:
@@ -13176,6 +13182,22 @@ SDValue RISCVTargetLowering::lowerVPFPIntConvOp(SDValue Op,
   return convertFromScalableVector(VT, Result, DAG, Subtarget);
 }
 
+SDValue RISCVTargetLowering::lowerVPSelectConstantFalse(SDValue Op,
+                                                SelectionDAG &DAG) const {
+  SDLoc DL(Op);
+  MVT VT = Op.getSimpleValueType();
+  SDValue TrueVal = Op.getOperand(1);
+  SDValue FalseVal = Op.getOperand(2);
+  SDValue VL = Op.getOperand(3);
+
+  MVT MaskVT = VT.changeVectorElementType(MVT::i1);
+  SDValue AllOneMask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
+  SDValue NewMask = DAG.getNode(RISCVISD::VMXOR_VL, DL, MaskVT,
+                                Op.getOperand(0), AllOneMask, VL);
+  return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, NewMask, FalseVal, TrueVal,
+                     DAG.getUNDEF(VT), VL);
+}
+
 SDValue RISCVTargetLowering::lowerVPMergeMask(SDValue Op,
                                               SelectionDAG &DAG) const {
   SDLoc DL(Op);
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 417d684a62382..cf04e56f36288 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -477,6 +477,7 @@ class RISCVTargetLowering : public TargetLowering {
   SDValue getTLSDescAddr(GlobalAddressSDNode *N, SelectionDAG &DAG) const;
 
   SDValue lowerConstantFP(SDValue Op, SelectionDAG &DAG) const;
+  SDValue lowerVPSelectConstantFalse(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/test/CodeGen/RISCV/rvv/masked-load-int.ll b/llvm/test/CodeGen/RISCV/rvv/masked-load-int.ll
index 75537406f3515..a9ed70b94c90f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/masked-load-int.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/masked-load-int.ll
@@ -34,10 +34,10 @@ define <vscale x 1 x i8> @masked_load_passthru_nxv1i8(ptr %a, <vscale x 1 x i1>
 ; ZVE32:       # %bb.0:
 ; ZVE32-NEXT:    csrr a1, vlenb
 ; ZVE32-NEXT:    srli a1, a1, 3
-; ZVE32-NEXT:    vsetvli a2, zero, e8, mf4, ta, ma
-; ZVE32-NEXT:    vmv.v.i v8, 0
-; ZVE32-NEXT:    vsetvli zero, a1, e8, mf4, ta, mu
+; ZVE32-NEXT:    vsetvli zero, a1, e8, mf4, ta, ma
 ; ZVE32-NEXT:    vle8.v v8, (a0), v0.t
+; ZVE32-NEXT:    vmnot.m v0, v0
+; ZVE32-NEXT:    vmerge.vim v8, v8, 0, v0
 ; ZVE32-NEXT:    ret
   %load = call <vscale x 1 x i8> @llvm.masked.load.nxv1i8(ptr %a, i32 1, <vscale x 1 x i1> %mask, <vscale x 1 x i8> zeroinitializer)
   ret <vscale x 1 x i8> %load
diff --git a/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll
index 371ec7c790dda..3918a8009fde8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll
@@ -470,6 +470,28 @@ define <vscale x 2 x i64> @select_nxv2i64(<vscale x 2 x i1> %a, <vscale x 2 x i6
   ret <vscale x 2 x i64> %v
 }
 
+define <vscale x 2 x i64> @select_nxv2i64_constant_true(<vscale x 2 x i1> %a, <vscale x 2 x i64> %b, i32 zeroext %evl) {
+; CHECK-LABEL: select_nxv2i64_constant_true:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-NEXT:    vmerge.vim v8, v8, -1, v0
+; CHECK-NEXT:    ret
+  %v = call <vscale x 2 x i64> @llvm.vp.select.nxv2i64(<vscale x 2 x i1> %a, <vscale x 2 x i64> splat (i64 -1), <vscale x 2 x i64> %b, i32 %evl)
+  ret <vscale x 2 x i64> %v
+}
+
+define <vscale x 2 x i64> @select_nxv2i64_constant_false(<vscale x 2 x i1> %a, <vscale x 2 x i64> %b, i32 zeroext %evl) {
+; CHECK-LABEL: select_nxv2i64_constant_false:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-NEXT:    vmnot.m v0, v0
+; CHECK-NEXT:    li a0, 100
+; CHECK-NEXT:    vmerge.vxm v8, v8, a0, v0
+; CHECK-NEXT:    ret
+  %v = call <vscale x 2 x i64> @llvm.vp.select.nxv2i64(<vscale x 2 x i1> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> splat (i64 100), i32 %evl)
+  ret <vscale x 2 x i64> %v
+}
+
 declare <vscale x 4 x i64> @llvm.vp.select.nxv4i64(<vscale x 4 x i1>, <vscale x 4 x i64>, <vscale x 4 x i64>, i32)
 
 define <vscale x 4 x i64> @select_nxv4i64(<vscale x 4 x i1> %a, <vscale x 4 x i64> %b, <vscale x 4 x i64> %c, i32 zeroext %evl) {
@@ -702,10 +724,10 @@ define <vscale x 16 x double> @select_nxv16f64(<vscale x 16 x i1> %a, <vscale x
 ; CHECK-NEXT:    and a4, a5, a4
 ; CHECK-NEXT:    vsetvli zero, a4, e64, m8, ta, ma
 ; CHECK-NEXT:    vmerge.vvm v16, v24, v16, v0
-; CHECK-NEXT:    bltu a2, a1, .LBB48_2
+; CHECK-NEXT:    bltu a2, a1, .LBB50_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    mv a2, a1
-; CHECK-NEXT:  .LBB48_2:
+; CHECK-NEXT:  .LBB50_2:
 ; CHECK-NEXT:    vmv1r.v v0, v7
 ; CHECK-NEXT:    addi a0, sp, 16
 ; CHECK-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload