[llvm] [RISCV] Fold vmv.v.v into vleNff.v (PR #143981)

Thu Jun 12 14:54:05 PDT 2025

https://github.com/lukel97 created https://github.com/llvm/llvm-project/pull/143981

We currently already fold vmerge.vvm into vleNff.v via RISCVDAGToDAGISel::performCombineVMergeAndVOps, so this teaches RISCVVectorPeephole::foldVMV_V_V to do the same.

Stacked on #143935




>From 153f149dc0e20ec37dcec28fdf72fb9178151034 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Thu, 12 Jun 2025 18:07:17 +0100
Subject: [PATCH 1/3] [RISCV] Remove implicit $vl def on vleNff pseudos

In #90049 we removed the side effect flag on the vleNff pseudos with the reasoning that we modelled the effect of setting vl as an output operand.

This extends this further by removing the implicit def on vl, inserting it back in RISCVInsertVSETVLI when we also emit the PseudoReadVL.

The motiviation for this is to handle vleff in more places in RISCVVectorPeephole, to make migrating the last vmerge peephole over from RISCVISelDAGToDAG.

Some of these tests claim that the vleff shouldn't be deleted when none of its operands are used, but these are from the initial commit in 3b5430eb0dad5. I'm not sure if these still hold today?

This also moves the fault-only-first predicate to RISCVInstrPredicates.td since we can't rely on the implicit vl operand anymore.
---
 llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp            |  2 +-
 llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp         |  5 +++--
 llvm/lib/Target/RISCV/RISCVInstrInfo.cpp             |  5 -----
 llvm/lib/Target/RISCV/RISCVInstrInfo.h               |  2 --
 llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td      |  6 ------
 llvm/lib/Target/RISCV/RISCVInstrPredicates.td        |  7 +++++++
 llvm/test/CodeGen/RISCV/rvv/vleff-vlseg2ff-output.ll | 12 ++++++------
 llvm/test/CodeGen/RISCV/rvv/vleff.ll                 |  5 -----
 llvm/test/CodeGen/RISCV/rvv/vlsegff-rv32-dead.ll     |  4 ----
 llvm/test/CodeGen/RISCV/rvv/vlsegff-rv64-dead.ll     |  4 ----
 10 files changed, 17 insertions(+), 35 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp b/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp
index 72f1596d79a02..4fb71a3ed0006 100644
--- a/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp
+++ b/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp
@@ -1101,7 +1101,7 @@ static bool lowerRISCVVMachineInstrToMCInst(const MachineInstr *MI,
   if (RISCVII::hasRoundModeOp(TSFlags))
     --NumOps;
 
-  bool hasVLOutput = RISCV::isFaultFirstLoad(*MI);
+  bool hasVLOutput = RISCVInstrInfo::isFaultOnlyFirstLoad(*MI);
   for (unsigned OpNo = 0; OpNo != NumOps; ++OpNo) {
     const MachineOperand &MO = MI->getOperand(OpNo);
     // Skip vl output. It should be the second output.
diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
index 53192e9dfe6c6..9a513891b765d 100644
--- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
@@ -1253,7 +1253,7 @@ void RISCVInsertVSETVLI::transferAfter(VSETVLIInfo &Info,
     return;
   }
 
-  if (RISCV::isFaultFirstLoad(MI)) {
+  if (RISCVInstrInfo::isFaultOnlyFirstLoad(MI)) {
     // Update AVL to vl-output of the fault first load.
     assert(MI.getOperand(1).getReg().isVirtual());
     if (LIS) {
@@ -1756,7 +1756,7 @@ void RISCVInsertVSETVLI::coalesceVSETVLIs(MachineBasicBlock &MBB) const {
 void RISCVInsertVSETVLI::insertReadVL(MachineBasicBlock &MBB) {
   for (auto I = MBB.begin(), E = MBB.end(); I != E;) {
     MachineInstr &MI = *I++;
-    if (RISCV::isFaultFirstLoad(MI)) {
+    if (RISCVInstrInfo::isFaultOnlyFirstLoad(MI)) {
       Register VLOutput = MI.getOperand(1).getReg();
       assert(VLOutput.isVirtual());
       if (!MI.getOperand(1).isDead()) {
@@ -1774,6 +1774,7 @@ void RISCVInsertVSETVLI::insertReadVL(MachineBasicBlock &MBB) {
       }
       // We don't use the vl output of the VLEFF/VLSEGFF anymore.
       MI.getOperand(1).setReg(RISCV::X0);
+      MI.addRegisterDefined(RISCV::VL, MRI->getTargetRegisterInfo());
     }
   }
 }
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 86a4e8e370ee6..e5d29e1a8b476 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -4568,11 +4568,6 @@ RISCV::isRVVSpillForZvlsseg(unsigned Opcode) {
   }
 }
 
-bool RISCV::isFaultFirstLoad(const MachineInstr &MI) {
-  return MI.getNumExplicitDefs() == 2 &&
-         MI.modifiesRegister(RISCV::VL, /*TRI=*/nullptr) && !MI.isInlineAsm();
-}
-
 bool RISCV::hasEqualFRM(const MachineInstr &MI1, const MachineInstr &MI2) {
   int16_t MI1FrmOpIdx =
       RISCV::getNamedOperandIdx(MI1.getOpcode(), RISCV::OpName::frm);
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
index b099acd81e995..8260949cf918a 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
@@ -357,8 +357,6 @@ bool isRVVSpill(const MachineInstr &MI);
 std::optional<std::pair<unsigned, unsigned>>
 isRVVSpillForZvlsseg(unsigned Opcode);
 
-bool isFaultFirstLoad(const MachineInstr &MI);
-
 // Return true if both input instructions have equal rounding mode. If at least
 // one of the instructions does not have rounding mode, false will be returned.
 bool hasEqualFRM(const MachineInstr &MI1, const MachineInstr &MI2);
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
index 281f8d55932b9..f9fc6f0be3804 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
@@ -6144,8 +6144,6 @@ defm PseudoVSUX : VPseudoIStore<Ordered=false>;
 // 7.7. Unit-stride Fault-Only-First Loads
 //===----------------------------------------------------------------------===//
 
-// vleff may update VL register
-let Defs = [VL] in
 defm PseudoVL : VPseudoFFLoad;
 
 //===----------------------------------------------------------------------===//
@@ -6159,11 +6157,7 @@ defm PseudoVSSEG : VPseudoUSSegStore;
 defm PseudoVSSSEG : VPseudoSSegStore;
 defm PseudoVSOXSEG : VPseudoISegStore<Ordered=true>;
 defm PseudoVSUXSEG : VPseudoISegStore<Ordered=false>;
-
-// vlseg<nf>e<eew>ff.v may update VL register
-let Defs = [VL] in {
 defm PseudoVLSEG : VPseudoUSSegLoadFF;
-}
 
 //===----------------------------------------------------------------------===//
 // 11. Vector Integer Arithmetic Instructions
diff --git a/llvm/lib/Target/RISCV/RISCVInstrPredicates.td b/llvm/lib/Target/RISCV/RISCVInstrPredicates.td
index 4c37cb7e393bf..1057eeee31d65 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrPredicates.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrPredicates.td
@@ -129,6 +129,13 @@ def isVSlideInstr
                       !instances<Pseudo>("^PseudoVSLIDEUP_VI.*")
                      ])>>>;
 
+def isFaultOnlyFirstLoad
+    : TIIPredicate<"isFaultOnlyFirstLoad",
+                    MCReturnStatement<
+                      CheckOpcode<
+                       !instances<Pseudo>(
+                          "^PseudoVL(SEG[2-8])?E(8|16|32|64)FF_V.*")>>>;
+
 def isNonZeroLoadImmediate
     : TIIPredicate<"isNonZeroLoadImmediate",
                    MCReturnStatement<CheckAll<[
diff --git a/llvm/test/CodeGen/RISCV/rvv/vleff-vlseg2ff-output.ll b/llvm/test/CodeGen/RISCV/rvv/vleff-vlseg2ff-output.ll
index e4235d03cda31..db31866b56372 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vleff-vlseg2ff-output.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vleff-vlseg2ff-output.ll
@@ -14,7 +14,7 @@ define i64 @test_vleff_nxv8i8(ptr %p, i64 %vl) {
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:gprnox0 = COPY $x11
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:gpr = COPY $x10
-  ; CHECK-NEXT:   [[PseudoVLE8FF_V_M1_:%[0-9]+]]:vr, [[PseudoVLE8FF_V_M1_1:%[0-9]+]]:gpr = PseudoVLE8FF_V_M1 $noreg, [[COPY1]], [[COPY]], 3 /* e8 */, 2 /* tu, ma */, implicit-def dead $vl :: (load unknown-size from %ir.p, align 1)
+  ; CHECK-NEXT:   [[PseudoVLE8FF_V_M1_:%[0-9]+]]:vr, [[PseudoVLE8FF_V_M1_1:%[0-9]+]]:gpr = PseudoVLE8FF_V_M1 $noreg, [[COPY1]], [[COPY]], 3 /* e8 */, 2 /* tu, ma */ :: (load unknown-size from %ir.p, align 1)
   ; CHECK-NEXT:   $x10 = COPY [[PseudoVLE8FF_V_M1_1]]
   ; CHECK-NEXT:   PseudoRET implicit $x10
 entry:
@@ -31,7 +31,7 @@ define i64 @test_vleff_nxv8i8_tu(<vscale x 8 x i8> %passthru, ptr %p, i64 %vl) {
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:gprnox0 = COPY $x11
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:gpr = COPY $x10
   ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vr = COPY $v8
-  ; CHECK-NEXT:   [[PseudoVLE8FF_V_M1_:%[0-9]+]]:vr, [[PseudoVLE8FF_V_M1_1:%[0-9]+]]:gpr = PseudoVLE8FF_V_M1 [[COPY2]], [[COPY1]], [[COPY]], 3 /* e8 */, 2 /* tu, ma */, implicit-def dead $vl :: (load unknown-size from %ir.p, align 1)
+  ; CHECK-NEXT:   [[PseudoVLE8FF_V_M1_:%[0-9]+]]:vr, [[PseudoVLE8FF_V_M1_1:%[0-9]+]]:gpr = PseudoVLE8FF_V_M1 [[COPY2]], [[COPY1]], [[COPY]], 3 /* e8 */, 2 /* tu, ma */ :: (load unknown-size from %ir.p, align 1)
   ; CHECK-NEXT:   $x10 = COPY [[PseudoVLE8FF_V_M1_1]]
   ; CHECK-NEXT:   PseudoRET implicit $x10
 entry:
@@ -50,7 +50,7 @@ define i64 @test_vleff_nxv8i8_mask(<vscale x 8 x i8> %maskedoff, ptr %p, <vscale
   ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:gpr = COPY $x10
   ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:vrnov0 = COPY $v8
   ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:vmv0 = COPY [[COPY1]]
-  ; CHECK-NEXT:   [[PseudoVLE8FF_V_M1_MASK:%[0-9]+]]:vrnov0, [[PseudoVLE8FF_V_M1_MASK1:%[0-9]+]]:gpr = PseudoVLE8FF_V_M1_MASK [[COPY3]], [[COPY2]], [[COPY4]], [[COPY]], 3 /* e8 */, 0 /* tu, mu */, implicit-def dead $vl :: (load unknown-size from %ir.p, align 1)
+  ; CHECK-NEXT:   [[PseudoVLE8FF_V_M1_MASK:%[0-9]+]]:vrnov0, [[PseudoVLE8FF_V_M1_MASK1:%[0-9]+]]:gpr = PseudoVLE8FF_V_M1_MASK [[COPY3]], [[COPY2]], [[COPY4]], [[COPY]], 3 /* e8 */, 0 /* tu, mu */ :: (load unknown-size from %ir.p, align 1)
   ; CHECK-NEXT:   $x10 = COPY [[PseudoVLE8FF_V_M1_MASK1]]
   ; CHECK-NEXT:   PseudoRET implicit $x10
 entry:
@@ -66,7 +66,7 @@ define i64 @test_vlseg2ff_nxv8i8(ptr %base, i64 %vl, ptr %outvl) {
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:gprnox0 = COPY $x11
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:gpr = COPY $x10
-  ; CHECK-NEXT:   [[PseudoVLSEG2E8FF_V_M1_:%[0-9]+]]:vrn2m1, [[PseudoVLSEG2E8FF_V_M1_1:%[0-9]+]]:gpr = PseudoVLSEG2E8FF_V_M1 $noreg, [[COPY1]], [[COPY]], 3 /* e8 */, 2 /* tu, ma */, implicit-def dead $vl :: (load unknown-size from %ir.base, align 1)
+  ; CHECK-NEXT:   [[PseudoVLSEG2E8FF_V_M1_:%[0-9]+]]:vrn2m1, [[PseudoVLSEG2E8FF_V_M1_1:%[0-9]+]]:gpr = PseudoVLSEG2E8FF_V_M1 $noreg, [[COPY1]], [[COPY]], 3 /* e8 */, 2 /* tu, ma */ :: (load unknown-size from %ir.base, align 1)
   ; CHECK-NEXT:   $x10 = COPY [[PseudoVLSEG2E8FF_V_M1_1]]
   ; CHECK-NEXT:   PseudoRET implicit $x10
 entry:
@@ -83,7 +83,7 @@ define i64 @test_vlseg2ff_nxv8i8_tu(target("riscv.vector.tuple", <vscale x 8 x i
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:gprnox0 = COPY $x11
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:gpr = COPY $x10
   ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vrn2m1 = COPY $v8_v9
-  ; CHECK-NEXT:   [[PseudoVLSEG2E8FF_V_M1_:%[0-9]+]]:vrn2m1, [[PseudoVLSEG2E8FF_V_M1_1:%[0-9]+]]:gpr = PseudoVLSEG2E8FF_V_M1 [[COPY2]], [[COPY1]], [[COPY]], 3 /* e8 */, 2 /* tu, ma */, implicit-def dead $vl :: (load unknown-size from %ir.base, align 1)
+  ; CHECK-NEXT:   [[PseudoVLSEG2E8FF_V_M1_:%[0-9]+]]:vrn2m1, [[PseudoVLSEG2E8FF_V_M1_1:%[0-9]+]]:gpr = PseudoVLSEG2E8FF_V_M1 [[COPY2]], [[COPY1]], [[COPY]], 3 /* e8 */, 2 /* tu, ma */ :: (load unknown-size from %ir.base, align 1)
   ; CHECK-NEXT:   $x10 = COPY [[PseudoVLSEG2E8FF_V_M1_1]]
   ; CHECK-NEXT:   PseudoRET implicit $x10
 entry:
@@ -102,7 +102,7 @@ define i64 @test_vlseg2ff_nxv8i8_mask(target("riscv.vector.tuple", <vscale x 8 x
   ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:gpr = COPY $x10
   ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:vrn2m1nov0 = COPY $v8_v9
   ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:vmv0 = COPY [[COPY1]]
-  ; CHECK-NEXT:   [[PseudoVLSEG2E8FF_V_M1_MASK:%[0-9]+]]:vrn2m1nov0, [[PseudoVLSEG2E8FF_V_M1_MASK1:%[0-9]+]]:gpr = PseudoVLSEG2E8FF_V_M1_MASK [[COPY3]], [[COPY2]], [[COPY4]], [[COPY]], 3 /* e8 */, 0 /* tu, mu */, implicit-def dead $vl :: (load unknown-size from %ir.base, align 1)
+  ; CHECK-NEXT:   [[PseudoVLSEG2E8FF_V_M1_MASK:%[0-9]+]]:vrn2m1nov0, [[PseudoVLSEG2E8FF_V_M1_MASK1:%[0-9]+]]:gpr = PseudoVLSEG2E8FF_V_M1_MASK [[COPY3]], [[COPY2]], [[COPY4]], [[COPY]], 3 /* e8 */, 0 /* tu, mu */ :: (load unknown-size from %ir.base, align 1)
   ; CHECK-NEXT:   $x10 = COPY [[PseudoVLSEG2E8FF_V_M1_MASK1]]
   ; CHECK-NEXT:   PseudoRET implicit $x10
 entry:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vleff.ll b/llvm/test/CodeGen/RISCV/rvv/vleff.ll
index 1f3959c1eac8e..4c989ce87290e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vleff.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vleff.ll
@@ -3016,12 +3016,9 @@ entry:
   ret void
 }
 
-; Test with both outputs dead. Make sure the vleff isn't deleted.
 define void @intrinsic_vleff_dead_all(ptr %0, iXLen %1, ptr %2) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_dead_all:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, ma
-; CHECK-NEXT:    vle64ff.v v8, (a0)
 ; CHECK-NEXT:    ret
 entry:
   %a = call { <vscale x 1 x double>, iXLen } @llvm.riscv.vleff.nxv1f64(
@@ -3034,8 +3031,6 @@ entry:
 define void @intrinsic_vleff_mask_dead_all(<vscale x 1 x double> %0, ptr %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vleff_mask_dead_all:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, mu
-; CHECK-NEXT:    vle64ff.v v8, (a0), v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call { <vscale x 1 x double>, iXLen } @llvm.riscv.vleff.mask.nxv1f64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vlsegff-rv32-dead.ll b/llvm/test/CodeGen/RISCV/rvv/vlsegff-rv32-dead.ll
index d8bff08ea5513..333ba83f69eef 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vlsegff-rv32-dead.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vlsegff-rv32-dead.ll
@@ -66,8 +66,6 @@ entry:
 define void @test_vlseg2ff_dead_all(ptr %base, i32 %vl) {
 ; CHECK-LABEL: test_vlseg2ff_dead_all:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
-; CHECK-NEXT:    vlseg2e16ff.v v8, (a0)
 ; CHECK-NEXT:    ret
 entry:
   tail call {target("riscv.vector.tuple", <vscale x 32 x i8>, 2), i32} @llvm.riscv.vlseg2ff.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, i32 %vl, i32 4)
@@ -77,8 +75,6 @@ entry:
 define void @test_vlseg2ff_mask_dead_all(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %val, ptr %base, i32 %vl, <vscale x 16 x i1> %mask) {
 ; CHECK-LABEL: test_vlseg2ff_mask_dead_all:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
-; CHECK-NEXT:    vlseg2e16ff.v v8, (a0), v0.t
 ; CHECK-NEXT:    ret
 entry:
   tail call {target("riscv.vector.tuple", <vscale x 32 x i8>, 2), i32} @llvm.riscv.vlseg2ff.mask.triscv.vector.tuple_nxv32i8_2t.nxv16i1(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %val, ptr %base, <vscale x 16 x i1> %mask, i32 %vl, i32 1, i32 4)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vlsegff-rv64-dead.ll b/llvm/test/CodeGen/RISCV/rvv/vlsegff-rv64-dead.ll
index 05a5be295cc71..b9e45cc190a65 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vlsegff-rv64-dead.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vlsegff-rv64-dead.ll
@@ -66,8 +66,6 @@ entry:
 define void @test_vlseg2ff_dead_all(ptr %base, i64 %vl) {
 ; CHECK-LABEL: test_vlseg2ff_dead_all:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
-; CHECK-NEXT:    vlseg2e16ff.v v8, (a0)
 ; CHECK-NEXT:    ret
 entry:
   tail call {target("riscv.vector.tuple", <vscale x 32 x i8>, 2), i64} @llvm.riscv.vlseg2ff.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, i64 %vl, i64 4)
@@ -77,8 +75,6 @@ entry:
 define void @test_vlseg2ff_mask_dead_all(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %val, ptr %base, i64 %vl, <vscale x 16 x i1> %mask) {
 ; CHECK-LABEL: test_vlseg2ff_mask_dead_all:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
-; CHECK-NEXT:    vlseg2e16ff.v v8, (a0), v0.t
 ; CHECK-NEXT:    ret
 entry:
   tail call {target("riscv.vector.tuple", <vscale x 32 x i8>, 2), i64} @llvm.riscv.vlseg2ff.mask.triscv.vector.tuple_nxv32i8_2t.nxv16i1(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %val, ptr %base, <vscale x 16 x i1> %mask, i64 %vl, i64 1, i64 4)

>From ec16b44699e1db56154c73ab10a63767669d10b3 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Thu, 12 Jun 2025 22:46:11 +0100
Subject: [PATCH 2/3] Precommit tests

---
 .../test/CodeGen/RISCV/rvv/vmv.v.v-peephole.ll | 18 ++++++++++++++++++
 .../CodeGen/RISCV/rvv/vmv.v.v-peephole.mir     | 18 ++++++++++++++++++
 2 files changed, 36 insertions(+)

diff --git a/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.ll b/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.ll
index 6345b90db23b8..9c8a57eff0f03 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.ll
@@ -206,3 +206,21 @@ define <vscale x 1 x i64> @undef_passthru(<vscale x 1 x i64> %passthru, <vscale
   %b = call <vscale x 1 x i64> @llvm.riscv.vmv.v.v.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i64> %a, iXLen %avl)
   ret <vscale x 1 x i64> %b
 }
+
+; Check that we can fold into vle64ff.v even if we need to move it past the
+; passthru and it's safe.
+define <vscale x 1 x i64> @vleff_move_past_passthru(ptr %p, ptr %q, iXLen %avl) {
+; CHECK-LABEL: vleff_move_past_passthru:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a2, e64, m1, ta, ma
+; CHECK-NEXT:    vle64ff.v v9, (a0)
+; CHECK-NEXT:    vl1re64.v v8, (a1)
+; CHECK-NEXT:    vsetvli zero, a2, e64, m1, tu, ma
+; CHECK-NEXT:    vmv.v.v v8, v9
+; CHECK-NEXT:    ret
+  %a = call { <vscale x 1 x i64>, iXLen } @llvm.riscv.vleff(<vscale x 1 x i64> poison, ptr %p, iXLen %avl)
+  %vec = extractvalue { <vscale x 1 x i64>, iXLen } %a, 0
+  %passthru = load <vscale x 1 x i64>, ptr %q
+  %b = call <vscale x 1 x i64> @llvm.riscv.vmv.v.v.nxv1i64(<vscale x 1 x i64> %passthru, <vscale x 1 x i64> %vec, iXLen %avl)
+  ret <vscale x 1 x i64> %b
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.mir b/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.mir
index f545ecc5e53d7..ee6bde29e2597 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.mir
@@ -135,3 +135,21 @@ body: |
     %3:vrnov0 = PseudoVMV_V_V_MF2 $noreg, %2, 0, 5 /* e32 */, 0 /* tu, mu */
     %7:vmv0 = COPY $v8
     %6:vrnov0 = PseudoVLSE32_V_MF2_MASK %3, $noreg, $noreg, %7, 0, 5 /* e32 */, 0 /* tu, mu */ :: (load unknown-size, align 4)
+...
+---
+name: move_vleff
+body: |
+  bb.0:
+    liveins: $v8
+    ; CHECK-LABEL: name: move_vleff
+    ; CHECK: liveins: $v8
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %x:vr, %vl:gpr = PseudoVLE32FF_V_M1 $noreg, $noreg, 4, 5 /* e32 */, 0 /* tu, mu */ :: (load unknown-size, align 1)
+    ; CHECK-NEXT: %passthru:vr = COPY $v8
+    ; CHECK-NEXT: %y:gpr = ADDI $x0, 1
+    ; CHECK-NEXT: %z:vr = PseudoVMV_V_V_M1 %passthru, %x, 4, 5 /* e32 */, 0 /* tu, mu */
+    %x:vr, %vl:gpr = PseudoVLE32FF_V_M1 $noreg, $noreg, 4, 5 /* e32 */, 0 /* tu, mu */ :: (load unknown-size)
+    %passthru:vr = COPY $v8
+    %y:gpr = ADDI $x0, 1
+    %z:vr = PseudoVMV_V_V_M1 %passthru, %x, 4, 5 /* e32 */, 0 /* tu, mu */
+...

>From 0b238eeead78b062fe44dacb921c0a120cda2112 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Thu, 12 Jun 2025 22:51:19 +0100
Subject: [PATCH 3/3] [RISCV] Fold vmv.v.v into vleNff.v

We currently already fold vmerge.vvm into vleNff.v via RISCVDAGToDAGISel::performCombineVMergeAndVOps, so this teaches RISCVVectorPeephole::foldVMV_V_V to do the same.

Stacked on #143935
---
 llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp    | 7 ++++---
 llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.ll  | 4 +---
 llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.mir | 3 +--
 3 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
index 6bb026378274e..c9c2413d009b7 100644
--- a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
@@ -611,7 +611,7 @@ bool RISCVVectorPeephole::foldVMV_V_V(MachineInstr &MI) {
 
   MachineInstr *Src = MRI->getVRegDef(MI.getOperand(2).getReg());
   if (!Src || Src->hasUnmodeledSideEffects() ||
-      Src->getParent() != MI.getParent() || Src->getNumDefs() != 1 ||
+      Src->getParent() != MI.getParent() ||
       !RISCVII::isFirstDefTiedToFirstUse(Src->getDesc()) ||
       !RISCVII::hasVLOp(Src->getDesc().TSFlags) ||
       !RISCVII::hasVecPolicyOp(Src->getDesc().TSFlags))
@@ -622,7 +622,7 @@ bool RISCVVectorPeephole::foldVMV_V_V(MachineInstr &MI) {
     return false;
 
   // Src needs to have the same passthru as VMV_V_V
-  MachineOperand &SrcPassthru = Src->getOperand(1);
+  MachineOperand &SrcPassthru = Src->getOperand(Src->getNumExplicitDefs());
   if (SrcPassthru.getReg() != RISCV::NoRegister &&
       SrcPassthru.getReg() != Passthru.getReg())
     return false;
@@ -643,7 +643,8 @@ bool RISCVVectorPeephole::foldVMV_V_V(MachineInstr &MI) {
     // If Src is masked then its passthru needs to be in VRNoV0.
     if (Passthru.getReg() != RISCV::NoRegister)
       MRI->constrainRegClass(Passthru.getReg(),
-                             TII->getRegClass(Src->getDesc(), 1, TRI,
+                             TII->getRegClass(Src->getDesc(),
+                                              SrcPassthru.getOperandNo(), TRI,
                                               *Src->getParent()->getParent()));
   }
 
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.ll b/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.ll
index 9c8a57eff0f03..1e2e7795f6546 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.ll
@@ -212,11 +212,9 @@ define <vscale x 1 x i64> @undef_passthru(<vscale x 1 x i64> %passthru, <vscale
 define <vscale x 1 x i64> @vleff_move_past_passthru(ptr %p, ptr %q, iXLen %avl) {
 ; CHECK-LABEL: vleff_move_past_passthru:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli zero, a2, e64, m1, ta, ma
-; CHECK-NEXT:    vle64ff.v v9, (a0)
 ; CHECK-NEXT:    vl1re64.v v8, (a1)
 ; CHECK-NEXT:    vsetvli zero, a2, e64, m1, tu, ma
-; CHECK-NEXT:    vmv.v.v v8, v9
+; CHECK-NEXT:    vle64ff.v v8, (a0)
 ; CHECK-NEXT:    ret
   %a = call { <vscale x 1 x i64>, iXLen } @llvm.riscv.vleff(<vscale x 1 x i64> poison, ptr %p, iXLen %avl)
   %vec = extractvalue { <vscale x 1 x i64>, iXLen } %a, 0
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.mir b/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.mir
index ee6bde29e2597..6e106e50634f0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.mir
@@ -144,10 +144,9 @@ body: |
     ; CHECK-LABEL: name: move_vleff
     ; CHECK: liveins: $v8
     ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: %x:vr, %vl:gpr = PseudoVLE32FF_V_M1 $noreg, $noreg, 4, 5 /* e32 */, 0 /* tu, mu */ :: (load unknown-size, align 1)
     ; CHECK-NEXT: %passthru:vr = COPY $v8
+    ; CHECK-NEXT: %x:vr, %vl:gpr = PseudoVLE32FF_V_M1 %passthru, $noreg, 4, 5 /* e32 */, 0 /* tu, mu */ :: (load unknown-size, align 1)
     ; CHECK-NEXT: %y:gpr = ADDI $x0, 1
-    ; CHECK-NEXT: %z:vr = PseudoVMV_V_V_M1 %passthru, %x, 4, 5 /* e32 */, 0 /* tu, mu */
     %x:vr, %vl:gpr = PseudoVLE32FF_V_M1 $noreg, $noreg, 4, 5 /* e32 */, 0 /* tu, mu */ :: (load unknown-size)
     %passthru:vr = COPY $v8
     %y:gpr = ADDI $x0, 1