[llvm] [RISCV] Separate doLocalPostpass into new pass and move to post vector regalloc (PR #88295)

Wed Apr 10 09:48:39 PDT 2024

llvmbot wrote:




@llvm/pr-subscribers-backend-risc-v

Author: Luke Lau (lukel97)

<details>
<summary>Changes</summary>

This patch splits off part of the work to move vsetvli insertion to post regalloc in #70549.

The doLocalPostpass operates outside of RISCVInsertVSETVLis dataflow, so we can move it to its own pass. We can then move it to post vector regalloc, which should be a smaller change since it only touches GPR registers.

A couple of things that are different from #70549:

- This manually fixes up the LiveIntervals rather than recomputing it via createAndComputeVirtRegInterval. I'm not sure if there's much of a difference with either.

- For the postpass it's sufficient enough to just check isUndef() in hasUndefinedMergeOp, i.e. we don't need to lookup the def in VNInfo.

Running on llvm-test-suite and SPEC CPU 2017 there aren't any changes in the number of vsetvls removed.

We could also potentially turn off this pass for unoptimised builds.


---

Patch is 292.45 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/88295.diff


34 Files Affected:

- (modified) llvm/lib/Target/RISCV/RISCV.h (+3) 
- (modified) llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp (+157-70) 
- (modified) llvm/lib/Target/RISCV/RISCVTargetMachine.cpp (+3) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll (+2-2) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll (+2-2) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll (+4-4) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll (+4-4) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll (+16-16) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll (+331-306) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll (+6-6) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll (+3-3) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll (+14-14) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-formation.ll (+28-28) 
- (modified) llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll (+3-3) 
- (modified) llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll (+6-6) 
- (modified) llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll (+112-64) 
- (modified) llvm/test/CodeGen/RISCV/rvv/vmfeq.ll (+30-42) 
- (modified) llvm/test/CodeGen/RISCV/rvv/vmfge.ll (+30-42) 
- (modified) llvm/test/CodeGen/RISCV/rvv/vmfgt.ll (+30-42) 
- (modified) llvm/test/CodeGen/RISCV/rvv/vmfle.ll (+30-42) 
- (modified) llvm/test/CodeGen/RISCV/rvv/vmflt.ll (+30-42) 
- (modified) llvm/test/CodeGen/RISCV/rvv/vmfne.ll (+30-42) 
- (modified) llvm/test/CodeGen/RISCV/rvv/vmseq.ll (+44-62) 
- (modified) llvm/test/CodeGen/RISCV/rvv/vmsge.ll (+44-62) 
- (modified) llvm/test/CodeGen/RISCV/rvv/vmsgeu.ll (+44-62) 
- (modified) llvm/test/CodeGen/RISCV/rvv/vmsgt.ll (+44-62) 
- (modified) llvm/test/CodeGen/RISCV/rvv/vmsgtu.ll (+44-62) 
- (modified) llvm/test/CodeGen/RISCV/rvv/vmsle.ll (+44-62) 
- (modified) llvm/test/CodeGen/RISCV/rvv/vmsleu.ll (+44-62) 
- (modified) llvm/test/CodeGen/RISCV/rvv/vmslt.ll (+44-62) 
- (modified) llvm/test/CodeGen/RISCV/rvv/vmsltu.ll (+44-62) 
- (modified) llvm/test/CodeGen/RISCV/rvv/vmsne.ll (+44-62) 
- (modified) llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.mir (+8-8) 
- (modified) llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir (+1-1) 


``````````diff

diff --git a/llvm/lib/Target/RISCV/RISCV.h b/llvm/lib/Target/RISCV/RISCV.h
index 7af543f018ccbd..d405395dcf9ec4 100644
--- a/llvm/lib/Target/RISCV/RISCV.h
+++ b/llvm/lib/Target/RISCV/RISCV.h
@@ -61,6 +61,9 @@ void initializeRISCVExpandAtomicPseudoPass(PassRegistry &);
 FunctionPass *createRISCVInsertVSETVLIPass();
 void initializeRISCVInsertVSETVLIPass(PassRegistry &);
 
+FunctionPass *createRISCVCoalesceVSETVLIPass();
+void initializeRISCVCoalesceVSETVLIPass(PassRegistry &);
+
 FunctionPass *createRISCVPostRAExpandPseudoPass();
 void initializeRISCVPostRAExpandPseudoPass(PassRegistry &);
 FunctionPass *createRISCVInsertReadWriteCSRPass();
diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
index a14f9a28354737..b774cc37ad79a2 100644
--- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
@@ -28,15 +28,17 @@
 #include "RISCVSubtarget.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/LiveStacks.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include <queue>
 using namespace llvm;
 
 #define DEBUG_TYPE "riscv-insert-vsetvli"
 #define RISCV_INSERT_VSETVLI_NAME "RISC-V Insert VSETVLI pass"
+#define RISCV_COALESCE_VSETVLI_NAME "RISC-V Coalesce VSETVLI pass"
 
 STATISTIC(NumInsertedVSETVL, "Number of VSETVL inst inserted");
-STATISTIC(NumRemovedVSETVL, "Number of VSETVL inst removed");
+STATISTIC(NumCoalescedVSETVL, "Number of VSETVL inst coalesced");
 
 static cl::opt<bool> DisableInsertVSETVLPHIOpt(
     "riscv-disable-insert-vsetvl-phi-opt", cl::init(false), cl::Hidden,
@@ -190,6 +192,11 @@ static bool hasUndefinedMergeOp(const MachineInstr &MI,
   if (UseMO.getReg() == RISCV::NoRegister)
     return true;
 
+  if (UseMO.isUndef())
+    return true;
+  if (UseMO.getReg().isPhysical())
+    return false;
+
   if (MachineInstr *UseMI = MRI.getVRegDef(UseMO.getReg())) {
     if (UseMI->isImplicitDef())
       return true;
@@ -777,11 +784,32 @@ class RISCVInsertVSETVLI : public MachineFunctionPass {
                              VSETVLIInfo &Info) const;
   void computeIncomingVLVTYPE(const MachineBasicBlock &MBB);
   void emitVSETVLIs(MachineBasicBlock &MBB);
-  void doLocalPostpass(MachineBasicBlock &MBB);
   void doPRE(MachineBasicBlock &MBB);
   void insertReadVL(MachineBasicBlock &MBB);
 };
 
+class RISCVCoalesceVSETVLI : public MachineFunctionPass {
+public:
+  static char ID;
+
+  RISCVCoalesceVSETVLI() : MachineFunctionPass(ID) {}
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesCFG();
+
+    AU.addRequired<LiveIntervals>();
+    AU.addPreserved<LiveIntervals>();
+    AU.addRequired<SlotIndexes>();
+    AU.addPreserved<SlotIndexes>();
+    AU.addPreserved<LiveStacks>();
+
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+
+  StringRef getPassName() const override { return RISCV_COALESCE_VSETVLI_NAME; }
+};
+
 } // end anonymous namespace
 
 char RISCVInsertVSETVLI::ID = 0;
@@ -789,6 +817,11 @@ char RISCVInsertVSETVLI::ID = 0;
 INITIALIZE_PASS(RISCVInsertVSETVLI, DEBUG_TYPE, RISCV_INSERT_VSETVLI_NAME,
                 false, false)
 
+char RISCVCoalesceVSETVLI::ID = 0;
+
+INITIALIZE_PASS(RISCVCoalesceVSETVLI, "riscv-coalesce-vsetvli",
+                RISCV_COALESCE_VSETVLI_NAME, false, false)
+
 // Return a VSETVLIInfo representing the changes made by this VSETVLI or
 // VSETIVLI instruction.
 static VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) {
@@ -1510,7 +1543,10 @@ static bool canMutatePriorConfig(const MachineInstr &PrevMI,
 
     auto &AVL = MI.getOperand(1);
     auto &PrevAVL = PrevMI.getOperand(1);
-    assert(MRI.isSSA());
+    assert(!AVL.isReg() || !AVL.getReg().isVirtual() ||
+           MRI.hasOneDef(AVL.getReg()));
+    assert(!PrevAVL.isReg() || !PrevAVL.getReg().isVirtual() ||
+           MRI.hasOneDef(PrevAVL.getReg()));
 
     // If the AVL is a register, we need to make sure MI's AVL dominates PrevMI.
     // For now just check that PrevMI uses the same virtual register.
@@ -1530,64 +1566,6 @@ static bool canMutatePriorConfig(const MachineInstr &PrevMI,
   return areCompatibleVTYPEs(PriorVType, VType, Used);
 }
 
-void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) {
-  MachineInstr *NextMI = nullptr;
-  // We can have arbitrary code in successors, so VL and VTYPE
-  // must be considered demanded.
-  DemandedFields Used;
-  Used.demandVL();
-  Used.demandVTYPE();
-  SmallVector<MachineInstr*> ToDelete;
-  for (MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend())) {
-
-    if (!isVectorConfigInstr(MI)) {
-      doUnion(Used, getDemanded(MI, MRI, ST));
-      continue;
-    }
-
-    Register VRegDef = MI.getOperand(0).getReg();
-    if (VRegDef != RISCV::X0 &&
-        !(VRegDef.isVirtual() && MRI->use_nodbg_empty(VRegDef)))
-      Used.demandVL();
-
-    if (NextMI) {
-      if (!Used.usedVL() && !Used.usedVTYPE()) {
-        ToDelete.push_back(&MI);
-        // Leave NextMI unchanged
-        continue;
-      } else if (canMutatePriorConfig(MI, *NextMI, Used, *MRI)) {
-        if (!isVLPreservingConfig(*NextMI)) {
-          MI.getOperand(0).setReg(NextMI->getOperand(0).getReg());
-          MI.getOperand(0).setIsDead(false);
-          Register OldVLReg;
-          if (MI.getOperand(1).isReg())
-            OldVLReg = MI.getOperand(1).getReg();
-          if (NextMI->getOperand(1).isImm())
-            MI.getOperand(1).ChangeToImmediate(NextMI->getOperand(1).getImm());
-          else
-            MI.getOperand(1).ChangeToRegister(NextMI->getOperand(1).getReg(), false);
-          if (OldVLReg) {
-            MachineInstr *VLOpDef = MRI->getUniqueVRegDef(OldVLReg);
-            if (VLOpDef && TII->isAddImmediate(*VLOpDef, OldVLReg) &&
-                MRI->use_nodbg_empty(OldVLReg))
-              VLOpDef->eraseFromParent();
-          }
-          MI.setDesc(NextMI->getDesc());
-        }
-        MI.getOperand(2).setImm(NextMI->getOperand(2).getImm());
-        ToDelete.push_back(NextMI);
-        // fallthrough
-      }
-    }
-    NextMI = &MI;
-    Used = getDemanded(MI, MRI, ST);
-  }
-
-  NumRemovedVSETVL += ToDelete.size();
-  for (auto *MI : ToDelete)
-    MI->eraseFromParent();
-}
-
 void RISCVInsertVSETVLI::insertReadVL(MachineBasicBlock &MBB) {
   for (auto I = MBB.begin(), E = MBB.end(); I != E;) {
     MachineInstr &MI = *I++;
@@ -1660,15 +1638,6 @@ bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) {
   for (MachineBasicBlock &MBB : MF)
     emitVSETVLIs(MBB);
 
-  // Now that all vsetvlis are explicit, go through and do block local
-  // DSE and peephole based demanded fields based transforms.  Note that
-  // this *must* be done outside the main dataflow so long as we allow
-  // any cross block analysis within the dataflow.  We can't have both
-  // demanded fields based mutation and non-local analysis in the
-  // dataflow at the same time without introducing inconsistencies.
-  for (MachineBasicBlock &MBB : MF)
-    doLocalPostpass(MBB);
-
   // Insert PseudoReadVL after VLEFF/VLSEGFF and replace it with the vl output
   // of VLEFF/VLSEGFF.
   for (MachineBasicBlock &MBB : MF)
@@ -1682,3 +1651,121 @@ bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) {
 FunctionPass *llvm::createRISCVInsertVSETVLIPass() {
   return new RISCVInsertVSETVLI();
 }
+
+// Now that all vsetvlis are explicit, go through and do block local
+// DSE and peephole based demanded fields based transforms.  Note that
+// this *must* be done outside the main dataflow so long as we allow
+// any cross block analysis within the dataflow.  We can't have both
+// demanded fields based mutation and non-local analysis in the
+// dataflow at the same time without introducing inconsistencies.
+bool RISCVCoalesceVSETVLI::runOnMachineFunction(MachineFunction &MF) {
+  // Skip if the vector extension is not enabled.
+  auto *ST = &MF.getSubtarget<RISCVSubtarget>();
+  if (!ST->hasVInstructions())
+    return false;
+
+  LiveIntervals &LIS = getAnalysis<LiveIntervals>();
+
+  bool Changed = false;
+
+  const auto *TII = ST->getInstrInfo();
+  auto *MRI = &MF.getRegInfo();
+  for (MachineBasicBlock &MBB : MF) {
+    MachineInstr *NextMI = nullptr;
+    // We can have arbitrary code in successors, so VL and VTYPE
+    // must be considered demanded.
+    DemandedFields Used;
+    Used.demandVL();
+    Used.demandVTYPE();
+    SmallVector<MachineInstr *> ToDelete;
+    for (MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend())) {
+
+      if (!isVectorConfigInstr(MI)) {
+        doUnion(Used, getDemanded(MI, MRI, ST));
+        continue;
+      }
+
+      Register VRegDef = MI.getOperand(0).getReg();
+      if (VRegDef != RISCV::X0 &&
+          !(VRegDef.isVirtual() && MRI->use_nodbg_empty(VRegDef)))
+        Used.demandVL();
+
+      if (NextMI) {
+        if (!Used.usedVL() && !Used.usedVTYPE()) {
+          ToDelete.push_back(&MI);
+          // Leave NextMI unchanged
+          continue;
+        } else if (canMutatePriorConfig(MI, *NextMI, Used, *MRI)) {
+          if (!isVLPreservingConfig(*NextMI)) {
+
+            Register DefReg = NextMI->getOperand(0).getReg();
+
+            MI.getOperand(0).setReg(DefReg);
+            MI.getOperand(0).setIsDead(false);
+
+            // The def of DefReg moved to MI, so extend the LiveInterval up to
+            // it.
+            if (DefReg.isVirtual()) {
+              LiveInterval &DefLI = LIS.getInterval(DefReg);
+              SlotIndex MISlot = LIS.getInstructionIndex(MI).getRegSlot();
+              VNInfo *DefVNI = DefLI.getVNInfoAt(DefLI.beginIndex());
+              LiveInterval::Segment S(MISlot, DefLI.beginIndex(), DefVNI);
+              DefLI.addSegment(S);
+              DefVNI->def = MISlot;
+
+              // DefReg may have had no uses, in which case we need to shrink
+              // the LiveInterval up to MI.
+              LIS.shrinkToUses(&DefLI);
+            }
+
+            Register OldVLReg;
+            if (MI.getOperand(1).isReg())
+              OldVLReg = MI.getOperand(1).getReg();
+            if (NextMI->getOperand(1).isImm())
+              MI.getOperand(1).ChangeToImmediate(
+                  NextMI->getOperand(1).getImm());
+            else
+              MI.getOperand(1).ChangeToRegister(NextMI->getOperand(1).getReg(),
+                                                false);
+
+            // Clear NextMI's AVL early so we're not counting it as a use.
+            if (NextMI->getOperand(1).isReg())
+              NextMI->getOperand(1).setReg(RISCV::NoRegister);
+
+            if (OldVLReg) {
+              MachineInstr *VLOpDef = MRI->getUniqueVRegDef(OldVLReg);
+              if (VLOpDef && TII->isAddImmediate(*VLOpDef, OldVLReg) &&
+                  MRI->use_nodbg_empty(OldVLReg)) {
+                VLOpDef->eraseFromParent();
+              }
+
+              // NextMI no longer uses OldVLReg so shrink its LiveInterval.
+              if (OldVLReg.isVirtual())
+                LIS.shrinkToUses(&LIS.getInterval(OldVLReg));
+            }
+
+            MI.setDesc(NextMI->getDesc());
+          }
+          MI.getOperand(2).setImm(NextMI->getOperand(2).getImm());
+          ToDelete.push_back(NextMI);
+          // fallthrough
+        }
+      }
+      NextMI = &MI;
+      Used = getDemanded(MI, MRI, ST);
+    }
+
+    Changed |= !ToDelete.empty();
+    NumCoalescedVSETVL += ToDelete.size();
+    for (auto *MI : ToDelete) {
+      LIS.RemoveMachineInstrFromMaps(*MI);
+      MI->eraseFromParent();
+    }
+  }
+
+  return Changed;
+}
+
+FunctionPass *llvm::createRISCVCoalesceVSETVLIPass() {
+  return new RISCVCoalesceVSETVLI();
+}
diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
index ae1a6f179a49e3..2a56ce03d38a93 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -121,6 +121,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() {
   initializeRISCVExpandPseudoPass(*PR);
   initializeRISCVFoldMasksPass(*PR);
   initializeRISCVInsertVSETVLIPass(*PR);
+  initializeRISCVCoalesceVSETVLIPass(*PR);
   initializeRISCVInsertReadWriteCSRPass(*PR);
   initializeRISCVInsertWriteVXRMPass(*PR);
   initializeRISCVDAGToDAGISelPass(*PR);
@@ -394,6 +395,7 @@ FunctionPass *RISCVPassConfig::createRVVRegAllocPass(bool Optimized) {
 bool RISCVPassConfig::addRegAssignAndRewriteFast() {
   if (EnableSplitRegAlloc)
     addPass(createRVVRegAllocPass(false));
+  addPass(createRISCVCoalesceVSETVLIPass());
   return TargetPassConfig::addRegAssignAndRewriteFast();
 }
 
@@ -402,6 +404,7 @@ bool RISCVPassConfig::addRegAssignAndRewriteOptimized() {
     addPass(createRVVRegAllocPass(true));
     addPass(createVirtRegRewriter(false));
   }
+  addPass(createRISCVCoalesceVSETVLIPass());
   return TargetPassConfig::addRegAssignAndRewriteOptimized();
 }
 
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
index 8e214e40547832..9e83efd3519539 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
@@ -1407,8 +1407,8 @@ define <8 x float> @buildvec_v8f32_zvl256(float %e0, float %e1, float %e2, float
 ; CHECK-NEXT:    vfmv.v.f v8, fa4
 ; CHECK-NEXT:    vfslide1down.vf v8, v8, fa5
 ; CHECK-NEXT:    vfslide1down.vf v8, v8, fa6
-; CHECK-NEXT:    vmv.v.i v0, 15
 ; CHECK-NEXT:    vfslide1down.vf v8, v8, fa7
+; CHECK-NEXT:    vmv.v.i v0, 15
 ; CHECK-NEXT:    vslidedown.vi v8, v9, 4, v0.t
 ; CHECK-NEXT:    ret
   %v0 = insertelement <8 x float> poison, float %e0, i64 0
@@ -1458,8 +1458,8 @@ define <8 x double> @buildvec_v8f64_zvl512(double %e0, double %e1, double %e2, d
 ; CHECK-NEXT:    vfmv.v.f v8, fa4
 ; CHECK-NEXT:    vfslide1down.vf v8, v8, fa5
 ; CHECK-NEXT:    vfslide1down.vf v8, v8, fa6
-; CHECK-NEXT:    vmv.v.i v0, 15
 ; CHECK-NEXT:    vfslide1down.vf v8, v8, fa7
+; CHECK-NEXT:    vmv.v.i v0, 15
 ; CHECK-NEXT:    vslidedown.vi v8, v9, 4, v0.t
 ; CHECK-NEXT:    ret
   %v0 = insertelement <8 x double> poison, double %e0, i64 0
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll
index 6bfd0ac932672f..ed152e64a91ef4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll
@@ -57,8 +57,8 @@ define <4 x double> @interleave_v2f64(<2 x double> %x, <2 x double> %y) {
 ; RV32-V512-NEXT:    vid.v v10
 ; RV32-V512-NEXT:    vsrl.vi v11, v10, 1
 ; RV32-V512-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; RV32-V512-NEXT:    vmv.v.i v0, 10
 ; RV32-V512-NEXT:    vrgatherei16.vv v10, v8, v11
+; RV32-V512-NEXT:    vmv.v.i v0, 10
 ; RV32-V512-NEXT:    vrgatherei16.vv v10, v9, v11, v0.t
 ; RV32-V512-NEXT:    vmv.v.v v8, v10
 ; RV32-V512-NEXT:    ret
@@ -68,8 +68,8 @@ define <4 x double> @interleave_v2f64(<2 x double> %x, <2 x double> %y) {
 ; RV64-V512-NEXT:    vsetivli zero, 4, e64, m1, ta, mu
 ; RV64-V512-NEXT:    vid.v v10
 ; RV64-V512-NEXT:    vsrl.vi v11, v10, 1
-; RV64-V512-NEXT:    vmv.v.i v0, 10
 ; RV64-V512-NEXT:    vrgather.vv v10, v8, v11
+; RV64-V512-NEXT:    vmv.v.i v0, 10
 ; RV64-V512-NEXT:    vrgather.vv v10, v9, v11, v0.t
 ; RV64-V512-NEXT:    vmv.v.v v8, v10
 ; RV64-V512-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll
index 85b849045e8cee..a8e4af2d7368e8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll
@@ -395,8 +395,8 @@ define void @fp2si_v8f64_v8i8(ptr %x, ptr %y) {
 ; RV32-NEXT:    fmin.d fa5, fa5, fa4
 ; RV32-NEXT:    fcvt.w.d a2, fa5, rtz
 ; RV32-NEXT:    and a0, a0, a2
-; RV32-NEXT:    vmv.v.i v0, 15
 ; RV32-NEXT:    vslide1down.vx v9, v9, a0
+; RV32-NEXT:    vmv.v.i v0, 15
 ; RV32-NEXT:    vslidedown.vi v9, v8, 4, v0.t
 ; RV32-NEXT:    vse8.v v9, (a1)
 ; RV32-NEXT:    addi sp, s0, -128
@@ -496,8 +496,8 @@ define void @fp2si_v8f64_v8i8(ptr %x, ptr %y) {
 ; RV64-NEXT:    fmin.d fa5, fa5, fa4
 ; RV64-NEXT:    fcvt.l.d a2, fa5, rtz
 ; RV64-NEXT:    and a0, a0, a2
-; RV64-NEXT:    vmv.v.i v0, 15
 ; RV64-NEXT:    vslide1down.vx v9, v9, a0
+; RV64-NEXT:    vmv.v.i v0, 15
 ; RV64-NEXT:    vslidedown.vi v9, v8, 4, v0.t
 ; RV64-NEXT:    vse8.v v9, (a1)
 ; RV64-NEXT:    addi sp, s0, -128
@@ -580,8 +580,8 @@ define void @fp2ui_v8f64_v8i8(ptr %x, ptr %y) {
 ; RV32-NEXT:    fmax.d fa4, fa4, fa3
 ; RV32-NEXT:    fmin.d fa5, fa4, fa5
 ; RV32-NEXT:    fcvt.wu.d a0, fa5, rtz
-; RV32-NEXT:    vmv.v.i v0, 15
 ; RV32-NEXT:    vslide1down.vx v9, v9, a0
+; RV32-NEXT:    vmv.v.i v0, 15
 ; RV32-NEXT:    vslidedown.vi v9, v8, 4, v0.t
 ; RV32-NEXT:    vse8.v v9, (a1)
 ; RV32-NEXT:    addi sp, s0, -128
@@ -656,8 +656,8 @@ define void @fp2ui_v8f64_v8i8(ptr %x, ptr %y) {
 ; RV64-NEXT:    fmax.d fa4, fa4, fa3
 ; RV64-NEXT:    fmin.d fa5, fa4, fa5
 ; RV64-NEXT:    fcvt.lu.d a0, fa5, rtz
-; RV64-NEXT:    vmv.v.i v0, 15
 ; RV64-NEXT:    vslide1down.vx v9, v9, a0
+; RV64-NEXT:    vmv.v.i v0, 15
 ; RV64-NEXT:    vslidedown.vi v9, v8, 4, v0.t
 ; RV64-NEXT:    vse8.v v9, (a1)
 ; RV64-NEXT:    addi sp, s0, -128
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll
index 6da83644413bc2..40ff8b50d99d8d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll
@@ -70,8 +70,8 @@ define <4 x i64> @interleave_v2i64(<2 x i64> %x, <2 x i64> %y) {
 ; RV32-V512-NEXT:    vid.v v10
 ; RV32-V512-NEXT:    vsrl.vi v11, v10, 1
 ; RV32-V512-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; RV32-V512-NEXT:    vmv.v.i v0, 10
 ; RV32-V512-NEXT:    vrgatherei16.vv v10, v8, v11
+; RV32-V512-NEXT:    vmv.v.i v0, 10
 ; RV32-V512-NEXT:    vrgatherei16.vv v10, v9, v11, v0.t
 ; RV32-V512-NEXT:    vmv.v.v v8, v10
 ; RV32-V512-NEXT:    ret
@@ -81,8 +81,8 @@ define <4 x i64> @interleave_v2i64(<2 x i64> %x, <2 x i64> %y) {
 ; RV64-V512-NEXT:    vsetivli zero, 4, e64, m1, ta, mu
 ; RV64-V512-NEXT:    vid.v v10
 ; RV64-V512-NEXT:    vsrl.vi v11, v10, 1
-; RV64-V512-NEXT:    vmv.v.i v0, 10
 ; RV64-V512-NEXT:    vrgather.vv v10, v8, v11
+; RV64-V512-NEXT:    vmv.v.i v0, 10
 ; RV64-V512-NEXT:    vrgather.vv v10, v9, v11, v0.t
 ; RV64-V512-NEXT:    vmv.v.v v8, v10
 ; RV64-V512-NEXT:    ret
@@ -195,8 +195,8 @@ define <4 x i32> @interleave_v4i32_offset_1(<4 x i32> %x, <4 x i32> %y) {
 ; V128-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
 ; V128-NEXT:    vid.v v8
 ; V128-NEXT:    vsrl.vi v8, v8, 1
-; V128-NEXT:    vmv.v.i v0, 10
 ; V128-NEXT:    vadd.vi v8, v8, 1
+; V128-NEXT:    vmv.v.i v0, 10
 ; V128-NEXT:    vrgather.vv v10, v9, v8, v0.t
 ; V128-NEXT:    vmv.v.v v8, v10
 ; V128-NEXT:    ret
@@ -210,8 +210,8 @@ define <4 x i32> @interleave_v4i32_offset_1(<4 x i32> %x, <4 x i32> %y) {
 ; V512-NEXT:    vsetivli zero, 4, e32, mf2, ta, mu
 ; V512-NEXT:    vid.v v8
 ; V512-NEXT:    vsrl.vi v8, v8, 1
-; V512-NEXT:    vmv.v.i v0, 10
 ; V512-NEXT:    vadd.vi v8, v8, 1
+; V512-NEXT:    vmv.v.i v0, 10
 ; V512-NEXT:    vrgather.vv v10, v9, v8, v0.t
 ; V512-NEXT:    vmv1r.v v8, v10
 ; V512-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll
index 0e8d9cf0306690..58af6ac246d161 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll
@@ -89,8 +89,8 @@ define <4 x i16> @vrgather_shuffle_vv_v4i16(<4 x i16> %x, <4 x i16> %y) {
 ; CHECK-NEXT:    addi a0, a0, %lo(.LCPI6_0)
 ; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, mu
 ; CHECK-NEXT:    vle16.v v11, (a0)
-; CHECK-NEXT:    vmv.v.i v0, 8
 ; CHECK-NEXT:    vrgather.vv v10, v8, v11
+; CHECK-NEXT:    vmv.v.i v0, 8
 ; CHECK-NEXT:    vrgather.vi v10, v9, 1, v0.t
 ; CHECK-NEXT:    vmv1r.v v8, v10
 ; CHECK-NEXT:    ret
@@ -162,16 +162,16 @@ define <8 x i64> @vrgather_shuffle_vv_v8i64(<8 x i64> %x, <8 x i64> %y) {
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; RV32-NEXT:    vmv.v.i v16, 2
-; RV32-NEXT:    li a0, 5
-; RV32-NEXT:    vslide1down.vx v20, v16, a0
 ; RV32-NEXT:    lui a0, %hi(.LCPI11_0)
 ; RV32-NEXT:    a...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/88295