[llvm] d10079e - [RISCV] Reduce the VL of both operands in VMERGE_VVM (#144759)

Wed Jun 18 14:23:54 PDT 2025

Author: Min-Yih Hsu
Date: 2025-06-18T14:23:51-07:00
New Revision: d10079e305acae58b44dc773cb94f7127de197ef

URL: https://github.com/llvm/llvm-project/commit/d10079e305acae58b44dc773cb94f7127de197ef
DIFF: https://github.com/llvm/llvm-project/commit/d10079e305acae58b44dc773cb94f7127de197ef.diff

LOG: [RISCV] Reduce the VL of both operands in VMERGE_VVM (#144759)

The `tryToReduceVL` function in RISCVVectorPeephole currently only
reduces the VL of the instruction that defines the true operand in
VMERGE_VVM. We should be able to reduce VL of both operands. This patch
generalizes this function to support multiple operands from a single
instruction.

Added: 
    

Modified: 
    llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
    llvm/test/CodeGen/RISCV/rvv/masked-load-int.ll
    llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
index c9c2413d009b7..f7acd676461fb 100644

--- a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
@@ -112,7 +112,7 @@ bool RISCVVectorPeephole::tryToReduceVL(MachineInstr &MI) const {
   //
   // TODO: We can handle a bunch more instructions here, and probably
   // recurse backwards through operands too.
-  unsigned SrcIdx = 0;
+  SmallVector<unsigned, 2> SrcIndices = {0};
   switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
   default:
     return false;
@@ -122,10 +122,10 @@ bool RISCVVectorPeephole::tryToReduceVL(MachineInstr &MI) const {
   case RISCV::VSE64_V:
     break;
   case RISCV::VMV_V_V:
-    SrcIdx = 2;
+    SrcIndices[0] = 2;
     break;
   case RISCV::VMERGE_VVM:
-    SrcIdx = 3; // TODO: We can also handle the false operand.
+    SrcIndices.assign({2, 3});
     break;
   case RISCV::VREDSUM_VS:
   case RISCV::VREDMAXU_VS:
@@ -143,7 +143,7 @@ bool RISCVVectorPeephole::tryToReduceVL(MachineInstr &MI) const {
   case RISCV::VFREDMIN_VS:
   case RISCV::VFWREDUSUM_VS:
   case RISCV::VFWREDOSUM_VS:
-    SrcIdx = 2;
+    SrcIndices[0] = 2;
     break;
   }
 
@@ -151,42 +151,48 @@ bool RISCVVectorPeephole::tryToReduceVL(MachineInstr &MI) const {
   if (VL.isImm() && VL.getImm() == RISCV::VLMaxSentinel)
     return false;
 
-  Register SrcReg = MI.getOperand(SrcIdx).getReg();
-  // Note: one *use*, not one *user*.
-  if (!MRI->hasOneUse(SrcReg))
-    return false;
-
-  MachineInstr *Src = MRI->getVRegDef(SrcReg);
-  if (!Src || Src->hasUnmodeledSideEffects() ||
-      Src->getParent() != MI.getParent() || Src->getNumDefs() != 1 ||
-      !RISCVII::hasVLOp(Src->getDesc().TSFlags) ||
-      !RISCVII::hasSEWOp(Src->getDesc().TSFlags))
-    return false;
-
-  // Src's dest needs to have the same EEW as MI's input.
-  if (!hasSameEEW(MI, *Src))
-    return false;
-
-  bool ElementsDependOnVL = RISCVII::elementsDependOnVL(
-      TII->get(RISCV::getRVVMCOpcode(Src->getOpcode())).TSFlags);
-  if (ElementsDependOnVL || Src->mayRaiseFPException())
-    return false;
+  bool Changed = false;
+  for (unsigned SrcIdx : SrcIndices) {
+    Register SrcReg = MI.getOperand(SrcIdx).getReg();
+    // Note: one *use*, not one *user*.
+    if (!MRI->hasOneUse(SrcReg))
+      continue;
+
+    MachineInstr *Src = MRI->getVRegDef(SrcReg);
+    if (!Src || Src->hasUnmodeledSideEffects() ||
+        Src->getParent() != MI.getParent() || Src->getNumDefs() != 1 ||
+        !RISCVII::hasVLOp(Src->getDesc().TSFlags) ||
+        !RISCVII::hasSEWOp(Src->getDesc().TSFlags))
+      continue;
+
+    // Src's dest needs to have the same EEW as MI's input.
+    if (!hasSameEEW(MI, *Src))
+      continue;
+
+    bool ElementsDependOnVL = RISCVII::elementsDependOnVL(
+        TII->get(RISCV::getRVVMCOpcode(Src->getOpcode())).TSFlags);
+    if (ElementsDependOnVL || Src->mayRaiseFPException())
+      continue;
+
+    MachineOperand &SrcVL =
+        Src->getOperand(RISCVII::getVLOpNum(Src->getDesc()));
+    if (VL.isIdenticalTo(SrcVL) || !RISCV::isVLKnownLE(VL, SrcVL))
+      continue;
 
-  MachineOperand &SrcVL = Src->getOperand(RISCVII::getVLOpNum(Src->getDesc()));
-  if (VL.isIdenticalTo(SrcVL) || !RISCV::isVLKnownLE(VL, SrcVL))
-    return false;
+    if (!ensureDominates(VL, *Src))
+      continue;
 
-  if (!ensureDominates(VL, *Src))
-    return false;
+    if (VL.isImm())
+      SrcVL.ChangeToImmediate(VL.getImm());
+    else if (VL.isReg())
+      SrcVL.ChangeToRegister(VL.getReg(), false);
 
-  if (VL.isImm())
-    SrcVL.ChangeToImmediate(VL.getImm());
-  else if (VL.isReg())
-    SrcVL.ChangeToRegister(VL.getReg(), false);
+    Changed = true;
+  }
 
   // TODO: For instructions with a passthru, we could clear the passthru
   // and tail policy since we've just proven the tail is not demanded.
-  return true;
+  return Changed;
 }
 
 /// Check if an operand is an immediate or a materialized ADDI $x0, imm.

diff  --git a/llvm/test/CodeGen/RISCV/rvv/masked-load-int.ll b/llvm/test/CodeGen/RISCV/rvv/masked-load-int.ll
index 75537406f3515..372b07e0137b4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/masked-load-int.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/masked-load-int.ll
@@ -34,9 +34,8 @@ define <vscale x 1 x i8> @masked_load_passthru_nxv1i8(ptr %a, <vscale x 1 x i1>
 ; ZVE32:       # %bb.0:
 ; ZVE32-NEXT:    csrr a1, vlenb
 ; ZVE32-NEXT:    srli a1, a1, 3
-; ZVE32-NEXT:    vsetvli a2, zero, e8, mf4, ta, ma
-; ZVE32-NEXT:    vmv.v.i v8, 0
 ; ZVE32-NEXT:    vsetvli zero, a1, e8, mf4, ta, mu
+; ZVE32-NEXT:    vmv.v.i v8, 0
 ; ZVE32-NEXT:    vle8.v v8, (a0), v0.t
 ; ZVE32-NEXT:    ret
   %load = call <vscale x 1 x i8> @llvm.masked.load.nxv1i8(ptr %a, i32 1, <vscale x 1 x i1> %mask, <vscale x 1 x i8> zeroinitializer)

diff  --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
index 10a92f0188a93..1cbb980aebffc 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
@@ -3063,9 +3063,9 @@ define <vscale x 4 x i32> @vmv_v_x(<vscale x 4 x i32> %a, i32 %x, iXLen %vl) {
 define <vscale x 1 x i8> @vmv_v_v(<vscale x 1 x i8> %a, <vscale x 1 x i8> %b, <vscale x 1 x i8> %c, <vscale x 1 x i1> %m, iXLen %vl) {
 ; NOVLOPT-LABEL: vmv_v_v:
 ; NOVLOPT:       # %bb.0:
-; NOVLOPT-NEXT:    vsetvli a1, zero, e8, mf8, tu, ma
+; NOVLOPT-NEXT:    vsetvli zero, a0, e8, mf8, tu, ma
 ; NOVLOPT-NEXT:    vmv.v.v v8, v9
-; NOVLOPT-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
+; NOVLOPT-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
 ; NOVLOPT-NEXT:    vmerge.vvm v8, v8, v10, v0
 ; NOVLOPT-NEXT:    ret
 ;