[llvm] [RISCV] Move performCombineVMergeAndVOps into RISCVFoldMasks (PR #71764)

Luke Lau via llvm-commits llvm-commits at lists.llvm.org
Wed Nov 8 20:45:27 PST 2023


https://github.com/lukel97 updated https://github.com/llvm/llvm-project/pull/71764

>From d51f43edf9cdf68330af9df9fed7d96445cc4f8a Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Tue, 31 Oct 2023 14:15:34 +0000
Subject: [PATCH 1/4] [RISCV] Move performCombineVMergeAndVOps into
 RISCVFoldMasks

Continuing on from 72e6c1c70d5e07bbc8cb7cae2ed915108daf93aa, this moves the
vmerge fold peephole into RISCVFoldMasks.

Instead of reasoning in chain and glue nodes, this sinks the True operand to
just before the vmerge pseudo so it can reuse its VL/mask/false/passthru
operands. Most of the time we're able to do this without any hitches because we
check that the vmerge is the only user of True. However there are certain
cases where we have write-after-read dependencies of physical registers that we
also need to sink too, e.g. fcsr in @vmerge_vfcvt_rm in
test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-masked-vops.ll

We also need to implement the masked -> unmasked peephole, since the vmerge
fold peephole can produce new masked pseudos with all one masks that should
then be unmasked. However that peephole is still kept in RISCVISelDAGToDAG to
minimize the diff for this patch. It will be switched over in a later patch.
---
 llvm/lib/Target/RISCV/RISCVFoldMasks.cpp      | 416 +++++++++++++++++-
 llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp   | 305 -------------
 llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h     |   2 -
 .../RISCV/rvv/rvv-peephole-vmerge-vops-mir.ll |   2 +-
 .../RISCV/rvv/rvv-peephole-vmerge-vops.ll     |  18 +-
 5 files changed, 402 insertions(+), 341 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVFoldMasks.cpp b/llvm/lib/Target/RISCV/RISCVFoldMasks.cpp
index d1c77a6cc7756df..197e4d04feee752 100644
--- a/llvm/lib/Target/RISCV/RISCVFoldMasks.cpp
+++ b/llvm/lib/Target/RISCV/RISCVFoldMasks.cpp
@@ -9,15 +9,29 @@
 // This pass performs various peephole optimisations that fold masks into vector
 // pseudo instructions after instruction selection.
 //
-// Currently it converts
-// PseudoVMERGE_VVM %false, %false, %true, %allonesmask, %vl, %sew
+// It performs the following transforms:
+//
+// %true = PseudoFOO %passthru, ..., %vl, %sew
+// %x = PseudoVMERGE_VVM %passthru, %passthru, %true, %mask, %vl, %sew
+// ->
+// %x = PseudoFOO_MASK %false, ..., %mask, %vl, %sew
+//
+// %x = PseudoFOO_MASK ..., %allonesmask, %vl, %sew
 // ->
-// PseudoVMV_V_V %false, %true, %vl, %sew
+// %x = PseudoFOO ..., %vl, %sew
+//
+// %x = PseudoVMERGE_VVM %false, %false, %true, %allonesmask, %vl, %sew
+// ->
+// %x = PseudoVMV_V_V %false, %true, %vl, %sew
 //
 //===---------------------------------------------------------------------===//
 
 #include "RISCV.h"
+#include "RISCVISelDAGToDAG.h"
+#include "RISCVInstrInfo.h"
 #include "RISCVSubtarget.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/TargetInstrInfo.h"
@@ -48,9 +62,12 @@ class RISCVFoldMasks : public MachineFunctionPass {
   StringRef getPassName() const override { return "RISC-V Fold Masks"; }
 
 private:
+  bool foldVMergeIntoOps(MachineInstr &MI, MachineInstr *MaskDef);
   bool convertVMergeToVMv(MachineInstr &MI, MachineInstr *MaskDef);
+  bool convertToUnmasked(MachineInstr &MI, MachineInstr *MaskDef);
 
   bool isAllOnesMask(MachineInstr *MaskCopy);
+  bool isOpSameAs(const MachineOperand &LHS, const MachineOperand &RHS);
 };
 
 } // namespace
@@ -59,22 +76,24 @@ char RISCVFoldMasks::ID = 0;
 
 INITIALIZE_PASS(RISCVFoldMasks, DEBUG_TYPE, "RISC-V Fold Masks", false, false)
 
-bool RISCVFoldMasks::isAllOnesMask(MachineInstr *MaskCopy) {
-  if (!MaskCopy)
-    return false;
-  assert(MaskCopy->isCopy() && MaskCopy->getOperand(0).getReg() == RISCV::V0);
-  Register SrcReg =
-      TRI->lookThruCopyLike(MaskCopy->getOperand(1).getReg(), MRI);
-  if (!SrcReg.isVirtual())
-    return false;
-  MachineInstr *SrcDef = MRI->getVRegDef(SrcReg);
-  if (!SrcDef)
+bool RISCVFoldMasks::isAllOnesMask(MachineInstr *MaskDef) {
+  if (!MaskDef)
     return false;
+  if (MaskDef->isCopy()) {
+    assert(MaskDef->getOperand(0).getReg() == RISCV::V0);
+    Register SrcReg =
+        TRI->lookThruCopyLike(MaskDef->getOperand(1).getReg(), MRI);
+    if (!SrcReg.isVirtual())
+      return false;
+    MaskDef = MRI->getVRegDef(SrcReg);
+    if (!MaskDef)
+      return false;
+  }
 
   // TODO: Check that the VMSET is the expected bitwidth? The pseudo has
   // undefined behaviour if it's the wrong bitwidth, so we could choose to
   // assume that it's all-ones? Same applies to its VL.
-  switch (SrcDef->getOpcode()) {
+  switch (MaskDef->getOpcode()) {
   case RISCV::PseudoVMSET_M_B1:
   case RISCV::PseudoVMSET_M_B2:
   case RISCV::PseudoVMSET_M_B4:
@@ -88,6 +107,313 @@ bool RISCVFoldMasks::isAllOnesMask(MachineInstr *MaskCopy) {
   }
 }
 
+static unsigned getVMSetForLMul(RISCVII::VLMUL LMUL) {
+  switch (LMUL) {
+  case RISCVII::LMUL_F8:
+    return RISCV::PseudoVMSET_M_B1;
+  case RISCVII::LMUL_F4:
+    return RISCV::PseudoVMSET_M_B2;
+  case RISCVII::LMUL_F2:
+    return RISCV::PseudoVMSET_M_B4;
+  case RISCVII::LMUL_1:
+    return RISCV::PseudoVMSET_M_B8;
+  case RISCVII::LMUL_2:
+    return RISCV::PseudoVMSET_M_B16;
+  case RISCVII::LMUL_4:
+    return RISCV::PseudoVMSET_M_B32;
+  case RISCVII::LMUL_8:
+    return RISCV::PseudoVMSET_M_B64;
+  case RISCVII::LMUL_RESERVED:
+    llvm_unreachable("Unexpected LMUL");
+  }
+  llvm_unreachable("Unknown VLMUL enum");
+}
+
+/// Inserts an operand at Idx in MI, pushing back any operands.
+static void insertOperand(MachineInstr &MI, MachineOperand MO, unsigned Idx) {
+  SmallVector<MachineOperand> OpsToAddBack;
+  unsigned NumTailOps = MI.getNumOperands() - Idx;
+  for (unsigned I = 0; I < NumTailOps; I++) {
+    OpsToAddBack.push_back(MI.getOperand(Idx));
+    MI.removeOperand(Idx);
+  }
+  MI.addOperand(MO);
+  for (MachineOperand &TailOp : OpsToAddBack)
+    MI.addOperand(TailOp);
+}
+
+// Try to sink From to before To, also sinking any instructions between From and
+// To where there is a write-after-read dependency on a physical register.
+static bool sinkInstructionAndDeps(MachineInstr &From, MachineInstr &To) {
+  assert(From.getParent() == To.getParent());
+  SmallVector<MachineInstr *> Worklist, ToSink;
+  Worklist.push_back(&From);
+
+  // Rather than compute whether or not we saw a store for every instruction,
+  // just compute it once even if it's more conservative.
+  bool SawStore = false;
+  for (MachineBasicBlock::instr_iterator II = From.getIterator();
+       II != To.getIterator(); II++) {
+    if (II->mayStore()) {
+      SawStore = true;
+      break;
+    }
+  }
+
+  while (!Worklist.empty()) {
+    MachineInstr *MI = Worklist.pop_back_val();
+
+    if (!MI->isSafeToMove(nullptr, SawStore))
+      return false;
+
+    SmallSet<Register, 8> Defs, Uses;
+    for (MachineOperand &Def : MI->all_defs())
+      Defs.insert(Def.getReg());
+    for (MachineOperand &Use : MI->all_uses())
+      Uses.insert(Use.getReg());
+
+    // If anything from [MI, To] uses a definition of MI, we can't sink it.
+    for (MachineBasicBlock::instr_iterator II = MI->getIterator();
+         II != To.getIterator(); II++) {
+      for (MachineOperand &Use : II->all_uses()) {
+        if (Defs.contains(Use.getReg()))
+          return false;
+      }
+    }
+
+    // If MI uses any physical registers, we need to sink any instructions after
+    // it where there might be a write-after-read dependency.
+    for (MachineBasicBlock::instr_iterator II = MI->getIterator();
+         II != To.getIterator(); II++) {
+      bool NeedsSink = any_of(II->all_defs(), [&Uses](MachineOperand &Def) {
+        return Def.getReg().isPhysical() && Uses.contains(Def.getReg());
+      });
+      if (NeedsSink)
+        Worklist.push_back(&*II);
+    }
+
+    ToSink.push_back(MI);
+  }
+
+  for (MachineInstr *MI : ToSink)
+    MI->moveBefore(&To);
+  return true;
+}
+
+// Returns true if LHS is the same register as RHS, or if LHS is undefined.
+bool RISCVFoldMasks::isOpSameAs(const MachineOperand &LHS,
+                                const MachineOperand &RHS) {
+  if (LHS.getReg() == RISCV::NoRegister)
+    return true;
+  if (RHS.getReg() == RISCV::NoRegister)
+    return false;
+  return TRI->lookThruCopyLike(LHS.getReg(), MRI) ==
+         TRI->lookThruCopyLike(RHS.getReg(), MRI);
+}
+
+// Try to fold away VMERGE_VVM instructions. We handle these cases:
+// -Masked TU VMERGE_VVM combined with an unmasked TA instruction instruction
+//  folds to a masked TU instruction. VMERGE_VVM must have have merge operand
+//  same as false operand.
+// -Masked TA VMERGE_VVM combined with an unmasked TA instruction fold to a
+//  masked TA instruction.
+// -Unmasked TU VMERGE_VVM combined with a masked MU TA instruction folds to
+//  masked TU instruction. Both instructions must have the same merge operand.
+//  VMERGE_VVM must have have merge operand same as false operand.
+// Note: The VMERGE_VVM forms above (TA, and TU) refer to the policy implied,
+// not the pseudo name.  That is, a TA VMERGE_VVM can be either the _TU pseudo
+// form with an IMPLICIT_DEF passthrough operand or the unsuffixed (TA) pseudo
+// form.
+bool RISCVFoldMasks::foldVMergeIntoOps(MachineInstr &MI,
+                                       MachineInstr *MaskDef) {
+  MachineOperand *True;
+  MachineOperand *Merge;
+  MachineOperand *False;
+
+  const unsigned BaseOpc = RISCV::getRVVMCOpcode(MI.getOpcode());
+  // A vmv.v.v is equivalent to a vmerge with an all-ones mask.
+  if (BaseOpc == RISCV::VMV_V_V) {
+    Merge = &MI.getOperand(1);
+    False = &MI.getOperand(1);
+    True = &MI.getOperand(2);
+  } else if (BaseOpc == RISCV::VMERGE_VVM) {
+    Merge = &MI.getOperand(1);
+    False = &MI.getOperand(2);
+    True = &MI.getOperand(3);
+  } else
+    return false;
+
+  MachineInstr &TrueMI = *MRI->getVRegDef(True->getReg());
+
+  // We require that either merge and false are the same, or that merge
+  // is undefined.
+  if (!isOpSameAs(*Merge, *False))
+    return false;
+
+  // N must be the only user of True.
+  if (!MRI->hasOneUse(True->getReg()))
+    return false;
+
+  unsigned TrueOpc = TrueMI.getOpcode();
+  const MCInstrDesc &TrueMCID = TrueMI.getDesc();
+  bool HasTiedDest = RISCVII::isFirstDefTiedToFirstUse(TrueMCID);
+
+  bool IsMasked = false;
+  const RISCV::RISCVMaskedPseudoInfo *Info =
+      RISCV::lookupMaskedIntrinsicByUnmasked(TrueOpc);
+  if (!Info && HasTiedDest) {
+    Info = RISCV::getMaskedPseudoInfo(TrueOpc);
+    IsMasked = true;
+  }
+
+  if (!Info)
+    return false;
+
+  // When Mask is not a true mask, this transformation is illegal for some
+  // operations whose results are affected by mask, like viota.m.
+  if (Info->MaskAffectsResult && BaseOpc == RISCV::VMERGE_VVM &&
+      !isAllOnesMask(MaskDef))
+    return false;
+
+  MachineOperand &TrueMergeOp = TrueMI.getOperand(1);
+  if (HasTiedDest && TrueMergeOp.getReg() != RISCV::NoRegister) {
+    // The vmerge instruction must be TU.
+    // FIXME: This could be relaxed, but we need to handle the policy for the
+    // resulting op correctly.
+    if (Merge->getReg() == RISCV::NoRegister)
+      return false;
+    // Both the vmerge instruction and the True instruction must have the same
+    // merge operand.
+    if (!isOpSameAs(TrueMergeOp, *False))
+      return false;
+  }
+
+  if (IsMasked) {
+    assert(HasTiedDest && "Expected tied dest");
+    // The vmerge instruction must be TU.
+    if (Merge->getReg() == RISCV::NoRegister)
+      return false;
+    // The vmerge instruction must have an all 1s mask since we're going to keep
+    // the mask from the True instruction.
+    // FIXME: Support mask agnostic True instruction which would have an
+    // undef merge operand.
+    if (BaseOpc == RISCV::VMERGE_VVM && !isAllOnesMask(MaskDef))
+      return false;
+  }
+
+  // Skip if True has side effect.
+  // TODO: Support vleff and vlsegff.
+  if (TII->get(TrueOpc).hasUnmodeledSideEffects())
+    return false;
+
+  // The vector policy operand may be present for masked intrinsics
+  const MachineOperand &TrueVL =
+      TrueMI.getOperand(RISCVII::getVLOpNum(TrueMCID));
+
+  auto GetMinVL =
+      [](const MachineOperand &LHS,
+         const MachineOperand &RHS) -> std::optional<MachineOperand> {
+    if (LHS.isReg() && RHS.isReg() && LHS.getReg().isVirtual() &&
+        LHS.getReg() == RHS.getReg())
+      return LHS;
+    if (LHS.isImm() && LHS.getImm() == RISCV::VLMaxSentinel)
+      return RHS;
+    if (RHS.isImm() && RHS.getImm() == RISCV::VLMaxSentinel)
+      return LHS;
+    if (!LHS.isImm() || !RHS.isImm())
+      return std::nullopt;
+    return LHS.getImm() <= RHS.getImm() ? LHS : RHS;
+  };
+
+  // Because MI and True must have the same merge operand (or True's operand is
+  // implicit_def), the "effective" body is the minimum of their VLs.
+  const MachineOperand VL = MI.getOperand(RISCVII::getVLOpNum(MI.getDesc()));
+  auto MinVL = GetMinVL(TrueVL, VL);
+  if (!MinVL)
+    return false;
+  bool VLChanged = !MinVL->isIdenticalTo(VL);
+
+  // If we end up changing the VL or mask of True, then we need to make sure it
+  // doesn't raise any observable fp exceptions, since changing the active
+  // elements will affect how fflags is set.
+  if (VLChanged || !IsMasked)
+    if (TrueMCID.mayRaiseFPException() &&
+        !TrueMI.getFlag(MachineInstr::MIFlag::NoFPExcept))
+      return false;
+
+  unsigned MaskedOpc = Info->MaskedPseudo;
+  const MCInstrDesc &MaskedMCID = TII->get(MaskedOpc);
+#ifndef NDEBUG
+  assert(RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags) &&
+         "Expected instructions with mask have policy operand.");
+  assert(MaskedMCID.getOperandConstraint(MaskedMCID.getNumDefs(),
+                                         MCOI::TIED_TO) == 0 &&
+         "Expected instructions with mask have a tied dest.");
+#endif
+
+  // Sink True down to MI so that it can access MI's operands.
+  if (!sinkInstructionAndDeps(TrueMI, MI))
+    return false;
+
+  // Set the merge to the false operand of the merge.
+  TrueMI.getOperand(1).setReg(False->getReg());
+
+  // If we're converting it to a masked pseudo, reuse MI's mask.
+  if (!IsMasked) {
+    if (BaseOpc == RISCV::VMV_V_V) {
+      // If MI is a vmv.v.v, it won't have a mask operand. So insert an all-ones
+      // mask just before True.
+      unsigned VMSetOpc =
+          getVMSetForLMul(RISCVII::getLMul(MI.getDesc().TSFlags));
+      BuildMI(*MI.getParent(), TrueMI, MI.getDebugLoc(), TII->get(VMSetOpc))
+          .addDef(RISCV::V0)
+          .add(VL)
+          .add(TrueMI.getOperand(RISCVII::getSEWOpNum(TrueMCID)));
+    }
+
+    TrueMI.setDesc(MaskedMCID);
+
+    // TODO: Increment MaskOpIdx by number of explicit defs in tablegen?
+    unsigned MaskOpIdx = Info->MaskOpIdx + TrueMI.getNumExplicitDefs();
+    insertOperand(TrueMI, MachineOperand::CreateReg(RISCV::V0, false),
+                  MaskOpIdx);
+  }
+
+  // Update the AVL.
+  if (MinVL->isReg())
+    TrueMI.getOperand(RISCVII::getVLOpNum(MaskedMCID))
+        .ChangeToRegister(MinVL->getReg(), false);
+  else
+    TrueMI.getOperand(RISCVII::getVLOpNum(MaskedMCID))
+        .ChangeToImmediate(MinVL->getImm());
+
+  // Use a tumu policy, relaxing it to tail agnostic provided that the merge
+  // operand is undefined.
+  //
+  // However, if the VL became smaller than what the vmerge had originally, then
+  // elements past VL that were previously in the vmerge's body will have moved
+  // to the tail. In that case we always need to use tail undisturbed to
+  // preserve them.
+  uint64_t Policy = (Merge->getReg() == RISCV::NoRegister && !VLChanged)
+                        ? RISCVII::TAIL_AGNOSTIC
+                        : RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED;
+  TrueMI.getOperand(RISCVII::getVecPolicyOpNum(MaskedMCID)).setImm(Policy);
+
+  const TargetRegisterClass *V0RC =
+      TII->getRegClass(MaskedMCID, 0, TRI, *MI.getMF());
+
+  // The destination and passthru can no longer be in V0.
+  MRI->constrainRegClass(TrueMI.getOperand(0).getReg(), V0RC);
+  Register PassthruReg = TrueMI.getOperand(1).getReg();
+  if (PassthruReg != RISCV::NoRegister)
+    MRI->constrainRegClass(PassthruReg, V0RC);
+
+  MRI->replaceRegWith(MI.getOperand(0).getReg(), TrueMI.getOperand(0).getReg());
+  MI.eraseFromParent();
+
+  return true;
+}
+
 // Transform (VMERGE_VVM_<LMUL> false, false, true, allones, vl, sew) to
 // (VMV_V_V_<LMUL> false, true, vl, sew). It may decrease uses of VMSET.
 bool RISCVFoldMasks::convertVMergeToVMv(MachineInstr &MI, MachineInstr *V0Def) {
@@ -98,7 +424,7 @@ bool RISCVFoldMasks::convertVMergeToVMv(MachineInstr &MI, MachineInstr *V0Def) {
   unsigned NewOpc;
   switch (MI.getOpcode()) {
   default:
-    llvm_unreachable("Expected VMERGE_VVM_<LMUL> instruction.");
+    return false;
     CASE_VMERGE_TO_VMV(MF8)
     CASE_VMERGE_TO_VMV(MF4)
     CASE_VMERGE_TO_VMV(MF2)
@@ -108,11 +434,8 @@ bool RISCVFoldMasks::convertVMergeToVMv(MachineInstr &MI, MachineInstr *V0Def) {
     CASE_VMERGE_TO_VMV(M8)
   }
 
-  Register MergeReg = MI.getOperand(1).getReg();
-  Register FalseReg = MI.getOperand(2).getReg();
   // Check merge == false (or merge == undef)
-  if (MergeReg != RISCV::NoRegister && TRI->lookThruCopyLike(MergeReg, MRI) !=
-                                           TRI->lookThruCopyLike(FalseReg, MRI))
+  if (!isOpSameAs(MI.getOperand(1), MI.getOperand(2)))
     return false;
 
   assert(MI.getOperand(4).isReg() && MI.getOperand(4).getReg() == RISCV::V0);
@@ -133,6 +456,48 @@ bool RISCVFoldMasks::convertVMergeToVMv(MachineInstr &MI, MachineInstr *V0Def) {
   return true;
 }
 
+bool RISCVFoldMasks::convertToUnmasked(MachineInstr &MI,
+                                       MachineInstr *MaskDef) {
+  const RISCV::RISCVMaskedPseudoInfo *I =
+      RISCV::getMaskedPseudoInfo(MI.getOpcode());
+  if (!I)
+    return false;
+
+  // TODO: Increment all MaskOpIdxs in tablegen by num of explicit defs?
+  unsigned MaskOpIdx = I->MaskOpIdx + MI.getNumExplicitDefs();
+  if (!isAllOnesMask(MaskDef))
+    return false;
+
+  // There are two classes of pseudos in the table - compares and
+  // everything else.  See the comment on RISCVMaskedPseudo for details.
+  const unsigned Opc = I->UnmaskedPseudo;
+  const MCInstrDesc &MCID = TII->get(Opc);
+  const bool HasPolicyOp = RISCVII::hasVecPolicyOp(MCID.TSFlags);
+  const bool HasPassthru = RISCVII::isFirstDefTiedToFirstUse(MCID);
+#ifndef NDEBUG
+  const MCInstrDesc &MaskedMCID = TII->get(MI.getOpcode());
+  assert(RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags) ==
+             RISCVII::hasVecPolicyOp(MCID.TSFlags) &&
+         "Masked and unmasked pseudos are inconsistent");
+  assert(HasPolicyOp == HasPassthru && "Unexpected pseudo structure");
+#endif
+
+  MI.setDesc(MCID);
+  MI.removeOperand(MaskOpIdx);
+
+  // The unmasked pseudo will no longer be constrained to the vrnov0 reg class,
+  // so try and relax it to vr.
+  MRI->recomputeRegClass(MI.getOperand(0).getReg());
+  unsigned PassthruOpIdx = MI.getNumExplicitDefs();
+  if (HasPassthru) {
+    if (MI.getOperand(PassthruOpIdx).getReg() != RISCV::NoRegister)
+      MRI->recomputeRegClass(MI.getOperand(PassthruOpIdx).getReg());
+  } else
+    MI.removeOperand(PassthruOpIdx);
+
+  return true;
+}
+
 bool RISCVFoldMasks::runOnMachineFunction(MachineFunction &MF) {
   if (skipFunction(MF.getFunction()))
     return false;
@@ -158,11 +523,16 @@ bool RISCVFoldMasks::runOnMachineFunction(MachineFunction &MF) {
   MachineInstr *CurrentV0Def;
   for (MachineBasicBlock &MBB : MF) {
     CurrentV0Def = nullptr;
-    for (MachineInstr &MI : MBB) {
-      unsigned BaseOpc = RISCV::getRVVMCOpcode(MI.getOpcode());
-      if (BaseOpc == RISCV::VMERGE_VVM)
-        Changed |= convertVMergeToVMv(MI, CurrentV0Def);
+    for (MachineInstr &MI : make_early_inc_range(MBB)) {
+      Changed |= foldVMergeIntoOps(MI, CurrentV0Def);
+      if (MI.definesRegister(RISCV::V0, TRI))
+        CurrentV0Def = &MI;
+    }
 
+    CurrentV0Def = nullptr;
+    for (MachineInstr &MI : MBB) {
+      Changed |= convertVMergeToVMv(MI, CurrentV0Def);
+      Changed |= convertToUnmasked(MI, CurrentV0Def);
       if (MI.definesRegister(RISCV::V0, TRI))
         CurrentV0Def = &MI;
     }
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index 920657a198d9b6b..f1b96ffd37b27a8 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -156,8 +156,6 @@ void RISCVDAGToDAGISel::PostprocessISelDAG() {
 
   CurDAG->setRoot(Dummy.getValue());
 
-  MadeChange |= doPeepholeMergeVVMFold();
-
   // After we're done with everything else, convert IMPLICIT_DEF
   // passthru operands to NoRegister.  This is required to workaround
   // an optimization deficiency in MachineCSE.  This really should
@@ -3420,309 +3418,6 @@ bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(MachineSDNode *N) {
   return true;
 }
 
-static bool IsVMerge(SDNode *N) {
-  return RISCV::getRVVMCOpcode(N->getMachineOpcode()) == RISCV::VMERGE_VVM;
-}
-
-static bool IsVMv(SDNode *N) {
-  return RISCV::getRVVMCOpcode(N->getMachineOpcode()) == RISCV::VMV_V_V;
-}
-
-static unsigned GetVMSetForLMul(RISCVII::VLMUL LMUL) {
-  switch (LMUL) {
-  case RISCVII::LMUL_F8:
-    return RISCV::PseudoVMSET_M_B1;
-  case RISCVII::LMUL_F4:
-    return RISCV::PseudoVMSET_M_B2;
-  case RISCVII::LMUL_F2:
-    return RISCV::PseudoVMSET_M_B4;
-  case RISCVII::LMUL_1:
-    return RISCV::PseudoVMSET_M_B8;
-  case RISCVII::LMUL_2:
-    return RISCV::PseudoVMSET_M_B16;
-  case RISCVII::LMUL_4:
-    return RISCV::PseudoVMSET_M_B32;
-  case RISCVII::LMUL_8:
-    return RISCV::PseudoVMSET_M_B64;
-  case RISCVII::LMUL_RESERVED:
-    llvm_unreachable("Unexpected LMUL");
-  }
-  llvm_unreachable("Unknown VLMUL enum");
-}
-
-// Try to fold away VMERGE_VVM instructions. We handle these cases:
-// -Masked TU VMERGE_VVM combined with an unmasked TA instruction instruction
-//  folds to a masked TU instruction. VMERGE_VVM must have have merge operand
-//  same as false operand.
-// -Masked TA VMERGE_VVM combined with an unmasked TA instruction fold to a
-//  masked TA instruction.
-// -Unmasked TU VMERGE_VVM combined with a masked MU TA instruction folds to
-//  masked TU instruction. Both instructions must have the same merge operand.
-//  VMERGE_VVM must have have merge operand same as false operand.
-// Note: The VMERGE_VVM forms above (TA, and TU) refer to the policy implied,
-// not the pseudo name.  That is, a TA VMERGE_VVM can be either the _TU pseudo
-// form with an IMPLICIT_DEF passthrough operand or the unsuffixed (TA) pseudo
-// form.
-bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) {
-  SDValue Merge, False, True, VL, Mask, Glue;
-  // A vmv.v.v is equivalent to a vmerge with an all-ones mask.
-  if (IsVMv(N)) {
-    Merge = N->getOperand(0);
-    False = N->getOperand(0);
-    True = N->getOperand(1);
-    VL = N->getOperand(2);
-    // A vmv.v.v won't have a Mask or Glue, instead we'll construct an all-ones
-    // mask later below.
-  } else {
-    assert(IsVMerge(N));
-    Merge = N->getOperand(0);
-    False = N->getOperand(1);
-    True = N->getOperand(2);
-    Mask = N->getOperand(3);
-    VL = N->getOperand(4);
-    // We always have a glue node for the mask at v0.
-    Glue = N->getOperand(N->getNumOperands() - 1);
-  }
-  assert(!Mask || cast<RegisterSDNode>(Mask)->getReg() == RISCV::V0);
-  assert(!Glue || Glue.getValueType() == MVT::Glue);
-
-  // We require that either merge and false are the same, or that merge
-  // is undefined.
-  if (Merge != False && !isImplicitDef(Merge))
-    return false;
-
-  assert(True.getResNo() == 0 &&
-         "Expect True is the first output of an instruction.");
-
-  // Need N is the exactly one using True.
-  if (!True.hasOneUse())
-    return false;
-
-  if (!True.isMachineOpcode())
-    return false;
-
-  unsigned TrueOpc = True.getMachineOpcode();
-  const MCInstrDesc &TrueMCID = TII->get(TrueOpc);
-  uint64_t TrueTSFlags = TrueMCID.TSFlags;
-  bool HasTiedDest = RISCVII::isFirstDefTiedToFirstUse(TrueMCID);
-
-  bool IsMasked = false;
-  const RISCV::RISCVMaskedPseudoInfo *Info =
-      RISCV::lookupMaskedIntrinsicByUnmasked(TrueOpc);
-  if (!Info && HasTiedDest) {
-    Info = RISCV::getMaskedPseudoInfo(TrueOpc);
-    IsMasked = true;
-  }
-
-  if (!Info)
-    return false;
-
-  // When Mask is not a true mask, this transformation is illegal for some
-  // operations whose results are affected by mask, like viota.m.
-  if (Info->MaskAffectsResult && Mask && !usesAllOnesMask(Mask, Glue))
-    return false;
-
-  if (HasTiedDest && !isImplicitDef(True->getOperand(0))) {
-    // The vmerge instruction must be TU.
-    // FIXME: This could be relaxed, but we need to handle the policy for the
-    // resulting op correctly.
-    if (isImplicitDef(Merge))
-      return false;
-    SDValue MergeOpTrue = True->getOperand(0);
-    // Both the vmerge instruction and the True instruction must have the same
-    // merge operand.
-    if (False != MergeOpTrue)
-      return false;
-  }
-
-  if (IsMasked) {
-    assert(HasTiedDest && "Expected tied dest");
-    // The vmerge instruction must be TU.
-    if (isImplicitDef(Merge))
-      return false;
-    // The vmerge instruction must have an all 1s mask since we're going to keep
-    // the mask from the True instruction.
-    // FIXME: Support mask agnostic True instruction which would have an
-    // undef merge operand.
-    if (Mask && !usesAllOnesMask(Mask, Glue))
-      return false;
-  }
-
-  // Skip if True has side effect.
-  // TODO: Support vleff and vlsegff.
-  if (TII->get(TrueOpc).hasUnmodeledSideEffects())
-    return false;
-
-  // The last operand of a masked instruction may be glued.
-  bool HasGlueOp = True->getGluedNode() != nullptr;
-
-  // The chain operand may exist either before the glued operands or in the last
-  // position.
-  unsigned TrueChainOpIdx = True.getNumOperands() - HasGlueOp - 1;
-  bool HasChainOp =
-      True.getOperand(TrueChainOpIdx).getValueType() == MVT::Other;
-
-  if (HasChainOp) {
-    // Avoid creating cycles in the DAG. We must ensure that none of the other
-    // operands depend on True through it's Chain.
-    SmallVector<const SDNode *, 4> LoopWorklist;
-    SmallPtrSet<const SDNode *, 16> Visited;
-    LoopWorklist.push_back(False.getNode());
-    if (Mask)
-      LoopWorklist.push_back(Mask.getNode());
-    LoopWorklist.push_back(VL.getNode());
-    if (Glue)
-      LoopWorklist.push_back(Glue.getNode());
-    if (SDNode::hasPredecessorHelper(True.getNode(), Visited, LoopWorklist))
-      return false;
-  }
-
-  // The vector policy operand may be present for masked intrinsics
-  bool HasVecPolicyOp = RISCVII::hasVecPolicyOp(TrueTSFlags);
-  unsigned TrueVLIndex =
-      True.getNumOperands() - HasVecPolicyOp - HasChainOp - HasGlueOp - 2;
-  SDValue TrueVL = True.getOperand(TrueVLIndex);
-  SDValue SEW = True.getOperand(TrueVLIndex + 1);
-
-  auto GetMinVL = [](SDValue LHS, SDValue RHS) {
-    if (LHS == RHS)
-      return LHS;
-    if (isAllOnesConstant(LHS))
-      return RHS;
-    if (isAllOnesConstant(RHS))
-      return LHS;
-    auto *CLHS = dyn_cast<ConstantSDNode>(LHS);
-    auto *CRHS = dyn_cast<ConstantSDNode>(RHS);
-    if (!CLHS || !CRHS)
-      return SDValue();
-    return CLHS->getZExtValue() <= CRHS->getZExtValue() ? LHS : RHS;
-  };
-
-  // Because N and True must have the same merge operand (or True's operand is
-  // implicit_def), the "effective" body is the minimum of their VLs.
-  SDValue OrigVL = VL;
-  VL = GetMinVL(TrueVL, VL);
-  if (!VL)
-    return false;
-
-  // If we end up changing the VL or mask of True, then we need to make sure it
-  // doesn't raise any observable fp exceptions, since changing the active
-  // elements will affect how fflags is set.
-  if (TrueVL != VL || !IsMasked)
-    if (mayRaiseFPException(True.getNode()) &&
-        !True->getFlags().hasNoFPExcept())
-      return false;
-
-  SDLoc DL(N);
-
-  // From the preconditions we checked above, we know the mask and thus glue
-  // for the result node will be taken from True.
-  if (IsMasked) {
-    Mask = True->getOperand(Info->MaskOpIdx);
-    Glue = True->getOperand(True->getNumOperands() - 1);
-    assert(Glue.getValueType() == MVT::Glue);
-  }
-  // If we end up using the vmerge mask the vmerge is actually a vmv.v.v, create
-  // an all-ones mask to use.
-  else if (IsVMv(N)) {
-    unsigned TSFlags = TII->get(N->getMachineOpcode()).TSFlags;
-    unsigned VMSetOpc = GetVMSetForLMul(RISCVII::getLMul(TSFlags));
-    ElementCount EC = N->getValueType(0).getVectorElementCount();
-    MVT MaskVT = MVT::getVectorVT(MVT::i1, EC);
-
-    SDValue AllOnesMask =
-        SDValue(CurDAG->getMachineNode(VMSetOpc, DL, MaskVT, VL, SEW), 0);
-    SDValue MaskCopy = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL,
-                                            RISCV::V0, AllOnesMask, SDValue());
-    Mask = CurDAG->getRegister(RISCV::V0, MaskVT);
-    Glue = MaskCopy.getValue(1);
-  }
-
-  unsigned MaskedOpc = Info->MaskedPseudo;
-#ifndef NDEBUG
-  const MCInstrDesc &MaskedMCID = TII->get(MaskedOpc);
-  assert(RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags) &&
-         "Expected instructions with mask have policy operand.");
-  assert(MaskedMCID.getOperandConstraint(MaskedMCID.getNumDefs(),
-                                         MCOI::TIED_TO) == 0 &&
-         "Expected instructions with mask have a tied dest.");
-#endif
-
-  // Use a tumu policy, relaxing it to tail agnostic provided that the merge
-  // operand is undefined.
-  //
-  // However, if the VL became smaller than what the vmerge had originally, then
-  // elements past VL that were previously in the vmerge's body will have moved
-  // to the tail. In that case we always need to use tail undisturbed to
-  // preserve them.
-  bool MergeVLShrunk = VL != OrigVL;
-  uint64_t Policy = (isImplicitDef(Merge) && !MergeVLShrunk)
-                        ? RISCVII::TAIL_AGNOSTIC
-                        : /*TUMU*/ 0;
-  SDValue PolicyOp =
-    CurDAG->getTargetConstant(Policy, DL, Subtarget->getXLenVT());
-
-
-  SmallVector<SDValue, 8> Ops;
-  Ops.push_back(False);
-
-  const bool HasRoundingMode = RISCVII::hasRoundModeOp(TrueTSFlags);
-  const unsigned NormalOpsEnd = TrueVLIndex - IsMasked - HasRoundingMode;
-  assert(!IsMasked || NormalOpsEnd == Info->MaskOpIdx);
-  Ops.append(True->op_begin() + HasTiedDest, True->op_begin() + NormalOpsEnd);
-
-  Ops.push_back(Mask);
-
-  // For unmasked "VOp" with rounding mode operand, that is interfaces like
-  // (..., rm, vl) or (..., rm, vl, policy).
-  // Its masked version is (..., vm, rm, vl, policy).
-  // Check the rounding mode pseudo nodes under RISCVInstrInfoVPseudos.td
-  if (HasRoundingMode)
-    Ops.push_back(True->getOperand(TrueVLIndex - 1));
-
-  Ops.append({VL, SEW, PolicyOp});
-
-  // Result node should have chain operand of True.
-  if (HasChainOp)
-    Ops.push_back(True.getOperand(TrueChainOpIdx));
-
-  // Add the glue for the CopyToReg of mask->v0.
-  Ops.push_back(Glue);
-
-  MachineSDNode *Result =
-      CurDAG->getMachineNode(MaskedOpc, DL, True->getVTList(), Ops);
-  Result->setFlags(True->getFlags());
-
-  if (!cast<MachineSDNode>(True)->memoperands_empty())
-    CurDAG->setNodeMemRefs(Result, cast<MachineSDNode>(True)->memoperands());
-
-  // Replace vmerge.vvm node by Result.
-  ReplaceUses(SDValue(N, 0), SDValue(Result, 0));
-
-  // Replace another value of True. E.g. chain and VL.
-  for (unsigned Idx = 1; Idx < True->getNumValues(); ++Idx)
-    ReplaceUses(True.getValue(Idx), SDValue(Result, Idx));
-
-  // Try to transform Result to unmasked intrinsic.
-  doPeepholeMaskedRVV(Result);
-  return true;
-}
-
-bool RISCVDAGToDAGISel::doPeepholeMergeVVMFold() {
-  bool MadeChange = false;
-  SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
-
-  while (Position != CurDAG->allnodes_begin()) {
-    SDNode *N = &*--Position;
-    if (N->use_empty() || !N->isMachineOpcode())
-      continue;
-
-    if (IsVMerge(N) || IsVMv(N))
-      MadeChange |= performCombineVMergeAndVOps(N);
-  }
-  return MadeChange;
-}
-
 /// If our passthru is an implicit_def, use noreg instead.  This side
 /// steps issues with MachineCSE not being able to CSE expressions with
 /// IMPLICIT_DEF operands while preserving the semantic intent. See
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
index 77e174135a599f9..f7551fe4d29a6be 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
@@ -188,9 +188,7 @@ class RISCVDAGToDAGISel : public SelectionDAGISel {
 private:
   bool doPeepholeSExtW(SDNode *Node);
   bool doPeepholeMaskedRVV(MachineSDNode *Node);
-  bool doPeepholeMergeVVMFold();
   bool doPeepholeNoRegPassThru();
-  bool performCombineVMergeAndVOps(SDNode *N);
 };
 
 namespace RISCV {
diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops-mir.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops-mir.ll
index d612298bf50e737..4f88806054dfbce 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops-mir.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops-mir.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-; RUN: llc < %s -mtriple=riscv64 -mattr=+v -stop-after=finalize-isel | FileCheck %s
+; RUN: llc < %s -mtriple=riscv64 -mattr=+v -stop-after=riscv-fold-masks | FileCheck %s
 
 declare <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1>, <vscale x 2 x i32>, <vscale x 2 x i32>, i32)
 declare <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1>, <vscale x 2 x i32>, <vscale x 2 x i32>, i32)
diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll
index c639f092444fc43..5c8b02237382fdf 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll
@@ -952,22 +952,20 @@ define <vscale x 2 x i32> @vpselect_trunc(<vscale x 2 x i32> %passthru, <vscale
 define void @test_dag_loop() {
 ; CHECK-LABEL: test_dag_loop:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetivli zero, 1, e16, m8, ta, ma
-; CHECK-NEXT:    vle16.v v8, (zero)
 ; CHECK-NEXT:    vsetvli a0, zero, e8, m4, ta, ma
 ; CHECK-NEXT:    vmclr.m v0
-; CHECK-NEXT:    vmv.v.i v16, 0
+; CHECK-NEXT:    vmv.v.i v8, 0
 ; CHECK-NEXT:    vsetivli zero, 0, e8, m4, tu, mu
-; CHECK-NEXT:    vmv4r.v v20, v16
-; CHECK-NEXT:    vssubu.vx v20, v16, zero, v0.t
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vssubu.vx v12, v8, zero, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e8, m4, ta, ma
-; CHECK-NEXT:    vmseq.vv v0, v20, v16
+; CHECK-NEXT:    vmseq.vv v0, v12, v8
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
-; CHECK-NEXT:    vmv.v.i v16, 0
-; CHECK-NEXT:    vsetivli zero, 1, e16, m8, tu, ma
-; CHECK-NEXT:    vmerge.vvm v16, v16, v8, v0
+; CHECK-NEXT:    vmv.v.i v8, 0
+; CHECK-NEXT:    vsetivli zero, 1, e16, m8, tu, mu
+; CHECK-NEXT:    vle16.v v8, (zero), v0.t
 ; CHECK-NEXT:    vsetivli zero, 0, e16, m8, ta, ma
-; CHECK-NEXT:    vse16.v v16, (zero)
+; CHECK-NEXT:    vse16.v v8, (zero)
 ; CHECK-NEXT:    ret
 entry:
   %0 = call <vscale x 32 x i16> @llvm.riscv.vle.nxv32i16.i64(<vscale x 32 x i16> undef, <vscale x 32 x i16>* null, i64 1)

>From 0caf80eaec023425a7ad71ccb27dac38c29a695b Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Thu, 9 Nov 2023 12:30:29 +0800
Subject: [PATCH 2/4] Keep a more canonical form by copying masks from a
 virtual register first

---
 llvm/lib/Target/RISCV/RISCVFoldMasks.cpp | 25 ++++++++++++------------
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVFoldMasks.cpp b/llvm/lib/Target/RISCV/RISCVFoldMasks.cpp
index 197e4d04feee752..07ef72567194676 100644
--- a/llvm/lib/Target/RISCV/RISCVFoldMasks.cpp
+++ b/llvm/lib/Target/RISCV/RISCVFoldMasks.cpp
@@ -79,16 +79,13 @@ INITIALIZE_PASS(RISCVFoldMasks, DEBUG_TYPE, "RISC-V Fold Masks", false, false)
 bool RISCVFoldMasks::isAllOnesMask(MachineInstr *MaskDef) {
   if (!MaskDef)
     return false;
-  if (MaskDef->isCopy()) {
-    assert(MaskDef->getOperand(0).getReg() == RISCV::V0);
-    Register SrcReg =
-        TRI->lookThruCopyLike(MaskDef->getOperand(1).getReg(), MRI);
-    if (!SrcReg.isVirtual())
-      return false;
-    MaskDef = MRI->getVRegDef(SrcReg);
-    if (!MaskDef)
-      return false;
-  }
+  assert(MaskDef->isCopy() && MaskDef->getOperand(0).getReg() == RISCV::V0);
+  Register SrcReg = TRI->lookThruCopyLike(MaskDef->getOperand(1).getReg(), MRI);
+  if (!SrcReg.isVirtual())
+    return false;
+  MaskDef = MRI->getVRegDef(SrcReg);
+  if (!MaskDef)
+    return false;
 
   // TODO: Check that the VMSET is the expected bitwidth? The pseudo has
   // undefined behaviour if it's the wrong bitwidth, so we could choose to
@@ -365,10 +362,14 @@ bool RISCVFoldMasks::foldVMergeIntoOps(MachineInstr &MI,
       // mask just before True.
       unsigned VMSetOpc =
           getVMSetForLMul(RISCVII::getLMul(MI.getDesc().TSFlags));
-      BuildMI(*MI.getParent(), TrueMI, MI.getDebugLoc(), TII->get(VMSetOpc))
-          .addDef(RISCV::V0)
+      Register Dest = MRI->createVirtualRegister(&RISCV::VRRegClass);
+      BuildMI(*MI.getParent(), TrueMI, MI.getDebugLoc(), TII->get(VMSetOpc),
+              Dest)
           .add(VL)
           .add(TrueMI.getOperand(RISCVII::getSEWOpNum(TrueMCID)));
+      BuildMI(*MI.getParent(), TrueMI, MI.getDebugLoc(), TII->get(RISCV::COPY),
+              RISCV::V0)
+          .addReg(Dest);
     }
 
     TrueMI.setDesc(MaskedMCID);

>From 889a20c1673500d5494fb4639478a365f31975b2 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Thu, 9 Nov 2023 12:32:20 +0800
Subject: [PATCH 3/4] Remove unnecessary includes

---
 llvm/lib/Target/RISCV/RISCVFoldMasks.cpp | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVFoldMasks.cpp b/llvm/lib/Target/RISCV/RISCVFoldMasks.cpp
index 07ef72567194676..99cf80b00b03185 100644
--- a/llvm/lib/Target/RISCV/RISCVFoldMasks.cpp
+++ b/llvm/lib/Target/RISCV/RISCVFoldMasks.cpp
@@ -28,14 +28,8 @@
 
 #include "RISCV.h"
 #include "RISCVISelDAGToDAG.h"
-#include "RISCVInstrInfo.h"
 #include "RISCVSubtarget.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallSet.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/TargetInstrInfo.h"
-#include "llvm/CodeGen/TargetRegisterInfo.h"
 
 using namespace llvm;
 

>From 26fa8062017aeb7e6f497d3994fb75b3cc1f911c Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Thu, 9 Nov 2023 12:39:03 +0800
Subject: [PATCH 4/4] Use MachineInstr::insert

---
 llvm/lib/Target/RISCV/RISCVFoldMasks.cpp | 17 ++---------------
 1 file changed, 2 insertions(+), 15 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVFoldMasks.cpp b/llvm/lib/Target/RISCV/RISCVFoldMasks.cpp
index 99cf80b00b03185..ce9059626486b1e 100644
--- a/llvm/lib/Target/RISCV/RISCVFoldMasks.cpp
+++ b/llvm/lib/Target/RISCV/RISCVFoldMasks.cpp
@@ -120,19 +120,6 @@ static unsigned getVMSetForLMul(RISCVII::VLMUL LMUL) {
   llvm_unreachable("Unknown VLMUL enum");
 }
 
-/// Inserts an operand at Idx in MI, pushing back any operands.
-static void insertOperand(MachineInstr &MI, MachineOperand MO, unsigned Idx) {
-  SmallVector<MachineOperand> OpsToAddBack;
-  unsigned NumTailOps = MI.getNumOperands() - Idx;
-  for (unsigned I = 0; I < NumTailOps; I++) {
-    OpsToAddBack.push_back(MI.getOperand(Idx));
-    MI.removeOperand(Idx);
-  }
-  MI.addOperand(MO);
-  for (MachineOperand &TailOp : OpsToAddBack)
-    MI.addOperand(TailOp);
-}
-
 // Try to sink From to before To, also sinking any instructions between From and
 // To where there is a write-after-read dependency on a physical register.
 static bool sinkInstructionAndDeps(MachineInstr &From, MachineInstr &To) {
@@ -370,8 +357,8 @@ bool RISCVFoldMasks::foldVMergeIntoOps(MachineInstr &MI,
 
     // TODO: Increment MaskOpIdx by number of explicit defs in tablegen?
     unsigned MaskOpIdx = Info->MaskOpIdx + TrueMI.getNumExplicitDefs();
-    insertOperand(TrueMI, MachineOperand::CreateReg(RISCV::V0, false),
-                  MaskOpIdx);
+    TrueMI.insert(&TrueMI.getOperand(MaskOpIdx),
+                  MachineOperand::CreateReg(RISCV::V0, false));
   }
 
   // Update the AVL.



More information about the llvm-commits mailing list