[llvm] [RISCV] Introduce VLOptimizer pass (PR #108640)

Michael Maitland via llvm-commits llvm-commits at lists.llvm.org
Fri Oct 11 06:44:27 PDT 2024


https://github.com/michaelmaitland updated https://github.com/llvm/llvm-project/pull/108640

>From 4f504d7a1988cfe8cecc7d6bbe178d699e1cf7fd Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Tue, 27 Jun 2023 09:19:36 -0700
Subject: [PATCH 01/17] [RISCV] Add VLOptimizer pass

The purpose of this optimization is to make the VL argument, for instructions
that have a VL argument, as small as possible. This is implemented by
visiting each instruction in reverse order and checking that if it has a VL
argument, whether the VL can be reduced.

This is done before vsetvli insertion to reduce the number of generated
vsetvlis. It can also reduce the number of vsetvli instructions that toggle
the VL (the vtype may still need to get set).

The list of supported instructions is currently whitelisted for
safety. In the future, we could add more instructions to isSupportedInstr
to support even more VL optimization.

Co-authored-by: Craig Topper <craig.topper at sifive.com>
Co-authored-by: Kito Cheng <kito.cheng at sifive.com>
---
 llvm/lib/Target/RISCV/CMakeLists.txt          |    1 +
 llvm/lib/Target/RISCV/RISCV.h                 |    3 +
 llvm/lib/Target/RISCV/RISCVTargetMachine.cpp  |    9 +-
 llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp    | 1569 +++++++++++++++++
 llvm/test/CodeGen/RISCV/O3-pipeline.ll        |    3 +-
 .../CodeGen/RISCV/rvv/narrow-shift-extend.ll  |   54 +-
 llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll   |   54 +-
 llvm/test/CodeGen/RISCV/rvv/vdiv-vp.ll        |    2 -
 llvm/test/CodeGen/RISCV/rvv/vdivu-vp.ll       |    2 -
 llvm/test/CodeGen/RISCV/rvv/vfwmacc-vp.ll     |   42 +-
 llvm/test/CodeGen/RISCV/rvv/vfwmsac-vp.ll     |   36 +-
 llvm/test/CodeGen/RISCV/rvv/vfwnmacc-vp.ll    |   45 +-
 llvm/test/CodeGen/RISCV/rvv/vfwnmsac-vp.ll    |   45 +-
 llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.ll |    6 +-
 llvm/test/CodeGen/RISCV/rvv/vmax-vp.ll        |    2 -
 llvm/test/CodeGen/RISCV/rvv/vmaxu-vp.ll       |    2 -
 llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll        |    2 -
 llvm/test/CodeGen/RISCV/rvv/vminu-vp.ll       |    2 -
 llvm/test/CodeGen/RISCV/rvv/vmul-vp.ll        |    7 +-
 .../test/CodeGen/RISCV/rvv/vpgather-sdnode.ll |  408 ++---
 .../CodeGen/RISCV/rvv/vpscatter-sdnode.ll     |  434 ++---
 llvm/test/CodeGen/RISCV/rvv/vrem-vp.ll        |    2 -
 llvm/test/CodeGen/RISCV/rvv/vremu-vp.ll       |    2 -
 .../RISCV/rvv/vsetvli-insert-crossbb.ll       |    3 -
 llvm/test/CodeGen/RISCV/rvv/vshl-vp.ll        |    3 +-
 llvm/test/CodeGen/RISCV/rvv/vsra-sdnode.ll    |    4 +-
 llvm/test/CodeGen/RISCV/rvv/vsra-vp.ll        |    2 -
 llvm/test/CodeGen/RISCV/rvv/vsrl-vp.ll        |    2 -
 llvm/test/CodeGen/RISCV/rvv/vssub-vp.ll       |    3 +-
 llvm/test/CodeGen/RISCV/rvv/vssubu-vp.ll      |    3 +-
 llvm/test/CodeGen/RISCV/rvv/vwsll-vp.ll       |   90 +-
 31 files changed, 2098 insertions(+), 744 deletions(-)
 create mode 100644 llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp

diff --git a/llvm/lib/Target/RISCV/CMakeLists.txt b/llvm/lib/Target/RISCV/CMakeLists.txt
index 574f1756cc733d..fd049d1a57860e 100644
--- a/llvm/lib/Target/RISCV/CMakeLists.txt
+++ b/llvm/lib/Target/RISCV/CMakeLists.txt
@@ -59,6 +59,7 @@ add_llvm_target(RISCVCodeGen
   RISCVTargetObjectFile.cpp
   RISCVTargetTransformInfo.cpp
   RISCVVectorPeephole.cpp
+  RISCVVLOptimizer.cpp
   RISCVZacasABIFix.cpp
   GISel/RISCVCallLowering.cpp
   GISel/RISCVInstructionSelector.cpp
diff --git a/llvm/lib/Target/RISCV/RISCV.h b/llvm/lib/Target/RISCV/RISCV.h
index 482c0cce78b10f..d7bab601d545cc 100644
--- a/llvm/lib/Target/RISCV/RISCV.h
+++ b/llvm/lib/Target/RISCV/RISCV.h
@@ -102,6 +102,9 @@ void initializeRISCVO0PreLegalizerCombinerPass(PassRegistry &);
 
 FunctionPass *createRISCVPreLegalizerCombiner();
 void initializeRISCVPreLegalizerCombinerPass(PassRegistry &);
+
+FunctionPass *createRISCVVLOptimizerPass();
+void initializeRISCVVLOptimizerPass(PassRegistry &);
 } // namespace llvm
 
 #endif
diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
index 6a72857b93b6c7..9273b305f1b8bf 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -103,6 +103,10 @@ static cl::opt<bool> EnableVSETVLIAfterRVVRegAlloc(
     cl::desc("Insert vsetvls after vector register allocation"),
     cl::init(true));
 
+static cl::opt<bool> EnableVLOptimizer("riscv-enable-vloptimizer",
+                                       cl::desc("Enable the VL Optimizer pass"),
+                                       cl::init(true), cl::Hidden);
+
 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() {
   RegisterTargetMachine<RISCVTargetMachine> X(getTheRISCV32Target());
   RegisterTargetMachine<RISCVTargetMachine> Y(getTheRISCV64Target());
@@ -551,8 +555,11 @@ void RISCVPassConfig::addMachineSSAOptimization() {
 
 void RISCVPassConfig::addPreRegAlloc() {
   addPass(createRISCVPreRAExpandPseudoPass());
-  if (TM->getOptLevel() != CodeGenOptLevel::None)
+  if (TM->getOptLevel() != CodeGenOptLevel::None) {
     addPass(createRISCVMergeBaseOffsetOptPass());
+    if (EnableVLOptimizer)
+      addPass(createRISCVVLOptimizerPass());
+  }
 
   addPass(createRISCVInsertReadWriteCSRPass());
   addPass(createRISCVInsertWriteVXRMPass());
diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
new file mode 100644
index 00000000000000..ae23d5dda5eb04
--- /dev/null
+++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
@@ -0,0 +1,1569 @@
+//===-------------- RISCVVLOptimizer.cpp - VL Optimizer -------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===---------------------------------------------------------------------===//
+//
+// This pass reduces the VL where possible at the MI level, before VSETVLI
+// instructions are inserted.
+//
+// The purpose of this optimization is to make the VL argument, for instructions
+// that have a VL argument, as small as possible. This is implemented by
+// visiting each instruction in reverse order and checking that if it has a VL
+// argument, whether the VL can be reduced.
+//
+//===---------------------------------------------------------------------===//
+
+#include "RISCV.h"
+#include "RISCVMachineFunctionInfo.h"
+#include "RISCVSubtarget.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/InitializePasses.h"
+
+#include <algorithm>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "riscv-vl-optimizer"
+
+namespace {
+
+class RISCVVLOptimizer : public MachineFunctionPass {
+  const MachineRegisterInfo *MRI;
+  const MachineDominatorTree *MDT;
+
+public:
+  static char ID;
+
+  RISCVVLOptimizer() : MachineFunctionPass(ID) { }
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesCFG();
+    AU.addRequired<MachineDominatorTreeWrapperPass>();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+
+  StringRef getPassName() const override { return "RISC-V VL Optimizer"; }
+
+private:
+  bool tryReduceVL(MachineInstr &MI);
+  bool isCandidate(const MachineInstr &MI) const;
+};
+
+} // end anonymous namespace
+
+char RISCVVLOptimizer::ID = 0;
+INITIALIZE_PASS_BEGIN(RISCVVLOptimizer, DEBUG_TYPE, "RISC-V VL Optimizer",
+                      false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
+INITIALIZE_PASS_END(RISCVVLOptimizer, DEBUG_TYPE, "RISC-V VL Optimizer", false,
+                    false)
+
+FunctionPass *llvm::createRISCVVLOptimizerPass() {
+  return new RISCVVLOptimizer();
+}
+
+/// Return true if R is a physical or virtual vector register, false otherwise.
+static bool isVectorRegClass(Register R, const MachineRegisterInfo *MRI) {
+  if (R.isPhysical())
+    return RISCV::VRRegClass.contains(R);
+  const TargetRegisterClass *RC = MRI->getRegClass(R);
+  return RISCV::VRRegClass.hasSubClassEq(RC) ||
+         RISCV::VRM2RegClass.hasSubClassEq(RC) ||
+         RISCV::VRM4RegClass.hasSubClassEq(RC) ||
+         RISCV::VRM8RegClass.hasSubClassEq(RC);
+}
+
+/// Represents the EMUL and EEW of a MachineOperand.
+struct OperandInfo {
+  enum class State {
+    Unknown,
+    Known,
+  } S;
+
+  // Represent as 1,2,4,8, ... and fractional indicator. This is because
+  // EMUL can take on values that don't map to RISCVII::VLMUL values exactly.
+  // For example, a mask operand can have an EMUL less than MF8.
+  std::pair<unsigned, bool> EMUL;
+
+  unsigned Log2EEW;
+
+  OperandInfo(RISCVII::VLMUL EMUL, unsigned Log2EEW)
+      : S(State::Known), EMUL(RISCVVType::decodeVLMUL(EMUL)), Log2EEW(Log2EEW) {
+  }
+
+  OperandInfo(std::pair<unsigned, bool> EMUL, unsigned Log2EEW)
+      : S(State::Known), EMUL(EMUL), Log2EEW(Log2EEW) {}
+
+  OperandInfo(State S) : S(S) {
+    assert(S != State::Known &&
+           "This constructor may only be used to construct "
+           "an Unknown OperandInfo");
+  }
+
+  bool isUnknown() const { return S == State::Unknown; }
+  bool isKnown() const { return S == State::Known; }
+
+  static bool EMULAndEEWAreEqual(const OperandInfo &A, const OperandInfo &B) {
+    assert(A.isKnown() && B.isKnown() && "Both operands must be known");
+    return A.Log2EEW == B.Log2EEW && A.EMUL.first == B.EMUL.first &&
+           A.EMUL.second == B.EMUL.second;
+  }
+
+  void print(raw_ostream &OS) const {
+    if (isUnknown()) {
+      OS << "Unknown";
+      return;
+    }
+    OS << "EMUL: ";
+    if (EMUL.second)
+      OS << "m";
+    OS << "f" << EMUL.first;
+    OS << ", EEW: " << (1 << Log2EEW);
+  }
+};
+
+static raw_ostream &operator<<(raw_ostream &OS, const OperandInfo &OI) {
+  OI.print(OS);
+  return OS;
+}
+
+/// Return the RISCVII::VLMUL that is two times VLMul.
+/// Precondition: VLMul is not LMUL_RESERVED or LMUL_8.
+static RISCVII::VLMUL twoTimesVLMUL(RISCVII::VLMUL VLMul) {
+  switch (VLMul) {
+  case RISCVII::VLMUL::LMUL_F8:
+    return RISCVII::VLMUL::LMUL_F4;
+  case RISCVII::VLMUL::LMUL_F4:
+    return RISCVII::VLMUL::LMUL_F2;
+  case RISCVII::VLMUL::LMUL_F2:
+    return RISCVII::VLMUL::LMUL_1;
+  case RISCVII::VLMUL::LMUL_1:
+    return RISCVII::VLMUL::LMUL_2;
+  case RISCVII::VLMUL::LMUL_2:
+    return RISCVII::VLMUL::LMUL_4;
+  case RISCVII::VLMUL::LMUL_4:
+    return RISCVII::VLMUL::LMUL_8;
+  case RISCVII::VLMUL::LMUL_8:
+  default:
+    llvm_unreachable("Could not multiply VLMul by 2");
+  }
+}
+
+/// Return the RISCVII::VLMUL that is VLMul / 2.
+/// Precondition: VLMul is not LMUL_RESERVED or LMUL_MF8.
+static RISCVII::VLMUL halfVLMUL(RISCVII::VLMUL VLMul) {
+  switch (VLMul) {
+  case RISCVII::VLMUL::LMUL_F4:
+    return RISCVII::VLMUL::LMUL_F8;
+  case RISCVII::VLMUL::LMUL_F2:
+    return RISCVII::VLMUL::LMUL_F4;
+  case RISCVII::VLMUL::LMUL_1:
+    return RISCVII::VLMUL::LMUL_F2;
+  case RISCVII::VLMUL::LMUL_2:
+    return RISCVII::VLMUL::LMUL_1;
+  case RISCVII::VLMUL::LMUL_4:
+    return RISCVII::VLMUL::LMUL_2;
+  case RISCVII::VLMUL::LMUL_8:
+    return RISCVII::VLMUL::LMUL_4;
+  case RISCVII::VLMUL::LMUL_F8:
+  default:
+    llvm_unreachable("Could not divide VLMul by 2");
+  }
+}
+
+/// Return EMUL = (EEW / SEW) * LMUL where EEW comes from Log2EEW and LMUL and
+/// SEW are from the TSFlags of MI.
+static std::pair<unsigned, bool>
+getEMULEqualsEEWDivSEWTimesLMUL(unsigned Log2EEW, const MachineInstr &MI) {
+  RISCVII::VLMUL MIVLMUL = RISCVII::getLMul(MI.getDesc().TSFlags);
+  auto [MILMUL, MILMULIsFractional] = RISCVVType::decodeVLMUL(MIVLMUL);
+  unsigned MILog2SEW =
+      MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm();
+  unsigned MISEW = 1 << MILog2SEW;
+
+  unsigned EEW = 1 << Log2EEW;
+  // Calculate (EEW/SEW)*LMUL preserving fractions less than 1. Use GCD
+  // to put fraction in simplest form.
+  unsigned Num = EEW, Denom = MISEW;
+  int GCD = MILMULIsFractional ? std::gcd(Num, Denom * MILMUL)
+                               : std::gcd(Num * MILMUL, Denom);
+  Num = MILMULIsFractional ? Num / GCD : Num * MILMUL / GCD;
+  Denom = MILMULIsFractional ? Denom * MILMUL / GCD : Denom / GCD;
+  return std::make_pair(Num > Denom ? Num : Denom, Denom > Num);
+}
+
+static bool isOpN(const MachineOperand &MO, unsigned OpN) {
+  const MachineInstr &MI = *MO.getParent();
+  bool HasPassthru = RISCVII::isFirstDefTiedToFirstUse(MI.getDesc());
+
+  if (HasPassthru)
+    return MO.getOperandNo() == OpN + 1;
+
+  return MO.getOperandNo() == OpN;
+}
+
+/// An index segment load or store operand has the form v.*seg<nf>ei<eeew>.v.
+/// Data has EEW=SEW, EMUL=LMUL. Index has EEW=<eew>, EMUL=(EEW/SEW)*LMUL. LMUL
+/// and SEW comes from TSFlags of MI.
+static OperandInfo getIndexSegmentLoadStoreOperandInfo(unsigned Log2EEW,
+                                                       const MachineInstr &MI,
+                                                       const MachineOperand &MO,
+                                                       bool IsLoad) {
+  // Operand 0 is data register
+  // Data vector register group has EEW=SEW, EMUL=LMUL.
+  if (MO.getOperandNo() == 0) {
+    RISCVII::VLMUL MIVLMul = RISCVII::getLMul(MI.getDesc().TSFlags);
+    unsigned MILog2SEW =
+        MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm();
+    return OperandInfo(MIVLMul, MILog2SEW);
+  }
+
+  // Operand 2 is index vector register
+  // v.*seg<nf>ei<eeew>.v
+  // Index vector register group has EEW=<eew>, EMUL=(EEW/SEW)*LMUL.
+  if (isOpN(MO, 2))
+    return OperandInfo(getEMULEqualsEEWDivSEWTimesLMUL(Log2EEW, MI), Log2EEW);
+
+  llvm_unreachable("Could not get OperandInfo for non-vector register of an "
+                   "indexed segment load or store instruction");
+}
+
+/// Dest has EEW=SEW and EMUL=LMUL. Source EEW=SEW/Factor (i.e. F2 => EEW/2).
+/// Source has EMUL=(EEW/SEW)*LMUL. LMUL and SEW comes from TSFlags of MI.
+static OperandInfo getIntegerExtensionOperandInfo(unsigned Factor,
+                                                  const MachineInstr &MI,
+                                                  const MachineOperand &MO) {
+  RISCVII::VLMUL MIVLMul = RISCVII::getLMul(MI.getDesc().TSFlags);
+  unsigned MILog2SEW =
+      MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm();
+
+  if (MO.getOperandNo() == 0)
+    return OperandInfo(MIVLMul, MILog2SEW);
+
+  unsigned MISEW = 1 << MILog2SEW;
+  unsigned EEW = MISEW / Factor;
+  unsigned Log2EEW = Log2_32(EEW);
+
+  return OperandInfo(getEMULEqualsEEWDivSEWTimesLMUL(Log2EEW, MI), Log2EEW);
+}
+
+/// Check whether MO is a mask operand of MI.
+static bool isMaskOperand(const MachineInstr &MI, const MachineOperand &MO,
+                          const MachineRegisterInfo *MRI) {
+
+  if (!MO.isReg() || !isVectorRegClass(MO.getReg(), MRI))
+    return false;
+
+  const MCInstrDesc &Desc = MI.getDesc();
+  return Desc.operands()[MO.getOperandNo()].RegClass == RISCV::VMV0RegClassID;
+}
+
+/// Return the OperandInfo for MO, which is an operand of MI.
+static OperandInfo getOperandInfo(const MachineInstr &MI,
+                                  const MachineOperand &MO,
+                                  const MachineRegisterInfo *MRI) {
+  const RISCVVPseudosTable::PseudoInfo *RVV =
+      RISCVVPseudosTable::getPseudoInfo(MI.getOpcode());
+  assert(RVV && "Could not find MI in PseudoTable");
+
+  // MI has a VLMUL and SEW associated with it. The RVV specification defines
+  // the LMUL and SEW of each operand and definition in relation to MI.VLMUL and
+  // MI.SEW.
+  RISCVII::VLMUL MIVLMul = RISCVII::getLMul(MI.getDesc().TSFlags);
+  unsigned MILog2SEW =
+      MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm();
+
+  const bool HasPassthru = RISCVII::isFirstDefTiedToFirstUse(MI.getDesc());
+
+  // We bail out early for instructions that have passthru with non NoRegister,
+  // which means they are using TU policy. We are not interested in these
+  // since they must preserve the entire register content.
+  if (HasPassthru && MO.getOperandNo() == MI.getNumExplicitDefs() &&
+      (MO.getReg() != RISCV::NoRegister))
+    return OperandInfo(OperandInfo::State::Unknown);
+
+  bool IsMODef = MO.getOperandNo() == 0;
+  bool IsOp1 = isOpN(MO, 1);
+  bool IsOp2 = isOpN(MO, 2);
+  bool IsOp3 = isOpN(MO, 3);
+
+  // All mask operands have EEW=1, EMUL=(EEW/SEW)*LMUL
+  if (isMaskOperand(MI, MO, MRI))
+    return OperandInfo(getEMULEqualsEEWDivSEWTimesLMUL(0, MI), 0);
+
+  // switch against BaseInstr to reduce number of cases that need to be
+  // considered.
+  switch (RVV->BaseInstr) {
+
+  // 6. Configuration-Setting Instructions
+  // Configuration setting instructions do not read or write vector registers
+  case RISCV::VSETIVLI:
+  case RISCV::VSETVL:
+  case RISCV::VSETVLI:
+    llvm_unreachable("Configuration setting instructions do not read or write "
+                     "vector registers");
+
+  // 7. Vector Loads and Stores
+  // 7.4. Vector Unit-Stride Instructions
+  // 7.5. Vector Strided Instructions
+  // 7.7. Unit-stride Fault-Only-First Loads
+  /// Dest EEW encoded in the instruction and EMUL=(EEW/SEW)*LMUL
+  case RISCV::VLE8_V:
+  case RISCV::VSE8_V:
+  case RISCV::VLM_V:
+  case RISCV::VSM_V:
+  case RISCV::VLSE8_V:
+  case RISCV::VSSE8_V:
+  case RISCV::VLE8FF_V:
+    return OperandInfo(getEMULEqualsEEWDivSEWTimesLMUL(3, MI), 3);
+  case RISCV::VLE16_V:
+  case RISCV::VSE16_V:
+  case RISCV::VLSE16_V:
+  case RISCV::VSSE16_V:
+  case RISCV::VLE16FF_V:
+    return OperandInfo(getEMULEqualsEEWDivSEWTimesLMUL(4, MI), 4);
+  case RISCV::VLE32_V:
+  case RISCV::VSE32_V:
+  case RISCV::VLSE32_V:
+  case RISCV::VSSE32_V:
+  case RISCV::VLE32FF_V:
+    return OperandInfo(getEMULEqualsEEWDivSEWTimesLMUL(5, MI), 5);
+  case RISCV::VLE64_V:
+  case RISCV::VSE64_V:
+  case RISCV::VLSE64_V:
+  case RISCV::VSSE64_V:
+  case RISCV::VLE64FF_V:
+    return OperandInfo(getEMULEqualsEEWDivSEWTimesLMUL(6, MI), 6);
+
+  // 7.6. Vector Indexed Instructions
+  // Data EEW=SEW, EMUL=LMUL. Index EEW=<eew> and EMUL=(EEW/SEW)*LMUL
+  case RISCV::VLUXEI8_V:
+  case RISCV::VLOXEI8_V:
+  case RISCV::VSUXEI8_V:
+  case RISCV::VSOXEI8_V:
+    if (MO.getOperandNo() == 0)
+      return OperandInfo(MIVLMul, MILog2SEW);
+    return OperandInfo(getEMULEqualsEEWDivSEWTimesLMUL(3, MI), 3);
+  case RISCV::VLUXEI16_V:
+  case RISCV::VLOXEI16_V:
+  case RISCV::VSUXEI16_V:
+  case RISCV::VSOXEI16_V:
+    if (MO.getOperandNo() == 0)
+      return OperandInfo(MIVLMul, MILog2SEW);
+    return OperandInfo(getEMULEqualsEEWDivSEWTimesLMUL(4, MI), 4);
+  case RISCV::VLUXEI32_V:
+  case RISCV::VLOXEI32_V:
+  case RISCV::VSUXEI32_V:
+  case RISCV::VSOXEI32_V:
+    if (MO.getOperandNo() == 0)
+      return OperandInfo(MIVLMul, MILog2SEW);
+    return OperandInfo(getEMULEqualsEEWDivSEWTimesLMUL(5, MI), 5);
+  case RISCV::VLUXEI64_V:
+  case RISCV::VLOXEI64_V:
+  case RISCV::VSUXEI64_V:
+  case RISCV::VSOXEI64_V:
+    if (MO.getOperandNo() == 0)
+      return OperandInfo(MIVLMul, MILog2SEW);
+    return OperandInfo(getEMULEqualsEEWDivSEWTimesLMUL(6, MI), 6);
+
+  // 7.8. Vector Load/Store Segment Instructions
+  // 7.8.1. Vector Unit-Stride Segment Loads and Stores
+  // v.*seg<nf>e<eew>.*
+  // EEW=eew, EMUL=LMUL
+  case RISCV::VLSEG2E8_V:
+  case RISCV::VLSEG2E8FF_V:
+  case RISCV::VLSEG3E8_V:
+  case RISCV::VLSEG3E8FF_V:
+  case RISCV::VLSEG4E8_V:
+  case RISCV::VLSEG4E8FF_V:
+  case RISCV::VLSEG5E8_V:
+  case RISCV::VLSEG5E8FF_V:
+  case RISCV::VLSEG6E8_V:
+  case RISCV::VLSEG6E8FF_V:
+  case RISCV::VLSEG7E8_V:
+  case RISCV::VLSEG7E8FF_V:
+  case RISCV::VLSEG8E8_V:
+  case RISCV::VLSEG8E8FF_V:
+  case RISCV::VSSEG2E8_V:
+  case RISCV::VSSEG3E8_V:
+  case RISCV::VSSEG4E8_V:
+  case RISCV::VSSEG5E8_V:
+  case RISCV::VSSEG6E8_V:
+  case RISCV::VSSEG7E8_V:
+  case RISCV::VSSEG8E8_V:
+    return OperandInfo(MIVLMul, 3);
+  case RISCV::VLSEG2E16_V:
+  case RISCV::VLSEG2E16FF_V:
+  case RISCV::VLSEG3E16_V:
+  case RISCV::VLSEG3E16FF_V:
+  case RISCV::VLSEG4E16_V:
+  case RISCV::VLSEG4E16FF_V:
+  case RISCV::VLSEG5E16_V:
+  case RISCV::VLSEG5E16FF_V:
+  case RISCV::VLSEG6E16_V:
+  case RISCV::VLSEG6E16FF_V:
+  case RISCV::VLSEG7E16_V:
+  case RISCV::VLSEG7E16FF_V:
+  case RISCV::VLSEG8E16_V:
+  case RISCV::VLSEG8E16FF_V:
+  case RISCV::VSSEG2E16_V:
+  case RISCV::VSSEG3E16_V:
+  case RISCV::VSSEG4E16_V:
+  case RISCV::VSSEG5E16_V:
+  case RISCV::VSSEG6E16_V:
+  case RISCV::VSSEG7E16_V:
+  case RISCV::VSSEG8E16_V:
+    return OperandInfo(MIVLMul, 4);
+  case RISCV::VLSEG2E32_V:
+  case RISCV::VLSEG2E32FF_V:
+  case RISCV::VLSEG3E32_V:
+  case RISCV::VLSEG3E32FF_V:
+  case RISCV::VLSEG4E32_V:
+  case RISCV::VLSEG4E32FF_V:
+  case RISCV::VLSEG5E32_V:
+  case RISCV::VLSEG5E32FF_V:
+  case RISCV::VLSEG6E32_V:
+  case RISCV::VLSEG6E32FF_V:
+  case RISCV::VLSEG7E32_V:
+  case RISCV::VLSEG7E32FF_V:
+  case RISCV::VLSEG8E32_V:
+  case RISCV::VLSEG8E32FF_V:
+  case RISCV::VSSEG2E32_V:
+  case RISCV::VSSEG3E32_V:
+  case RISCV::VSSEG4E32_V:
+  case RISCV::VSSEG5E32_V:
+  case RISCV::VSSEG6E32_V:
+  case RISCV::VSSEG7E32_V:
+  case RISCV::VSSEG8E32_V:
+    return OperandInfo(MIVLMul, 5);
+  case RISCV::VLSEG2E64_V:
+  case RISCV::VLSEG2E64FF_V:
+  case RISCV::VLSEG3E64_V:
+  case RISCV::VLSEG3E64FF_V:
+  case RISCV::VLSEG4E64_V:
+  case RISCV::VLSEG4E64FF_V:
+  case RISCV::VLSEG5E64_V:
+  case RISCV::VLSEG5E64FF_V:
+  case RISCV::VLSEG6E64_V:
+  case RISCV::VLSEG6E64FF_V:
+  case RISCV::VLSEG7E64_V:
+  case RISCV::VLSEG7E64FF_V:
+  case RISCV::VLSEG8E64_V:
+  case RISCV::VLSEG8E64FF_V:
+  case RISCV::VSSEG2E64_V:
+  case RISCV::VSSEG3E64_V:
+  case RISCV::VSSEG4E64_V:
+  case RISCV::VSSEG5E64_V:
+  case RISCV::VSSEG6E64_V:
+  case RISCV::VSSEG7E64_V:
+  case RISCV::VSSEG8E64_V:
+    return OperandInfo(MIVLMul, 6);
+
+  // 7.8.2. Vector Strided Segment Loads and Stores
+  case RISCV::VLSSEG2E8_V:
+  case RISCV::VLSSEG3E8_V:
+  case RISCV::VLSSEG4E8_V:
+  case RISCV::VLSSEG5E8_V:
+  case RISCV::VLSSEG6E8_V:
+  case RISCV::VLSSEG7E8_V:
+  case RISCV::VLSSEG8E8_V:
+  case RISCV::VSSSEG2E8_V:
+  case RISCV::VSSSEG3E8_V:
+  case RISCV::VSSSEG4E8_V:
+  case RISCV::VSSSEG5E8_V:
+  case RISCV::VSSSEG6E8_V:
+  case RISCV::VSSSEG7E8_V:
+  case RISCV::VSSSEG8E8_V:
+    return OperandInfo(MIVLMul, 3);
+  case RISCV::VLSSEG2E16_V:
+  case RISCV::VLSSEG3E16_V:
+  case RISCV::VLSSEG4E16_V:
+  case RISCV::VLSSEG5E16_V:
+  case RISCV::VLSSEG6E16_V:
+  case RISCV::VLSSEG7E16_V:
+  case RISCV::VLSSEG8E16_V:
+  case RISCV::VSSSEG2E16_V:
+  case RISCV::VSSSEG3E16_V:
+  case RISCV::VSSSEG4E16_V:
+  case RISCV::VSSSEG5E16_V:
+  case RISCV::VSSSEG6E16_V:
+  case RISCV::VSSSEG7E16_V:
+  case RISCV::VSSSEG8E16_V:
+    return OperandInfo(MIVLMul, 4);
+  case RISCV::VLSSEG2E32_V:
+  case RISCV::VLSSEG3E32_V:
+  case RISCV::VLSSEG4E32_V:
+  case RISCV::VLSSEG5E32_V:
+  case RISCV::VLSSEG6E32_V:
+  case RISCV::VLSSEG7E32_V:
+  case RISCV::VLSSEG8E32_V:
+  case RISCV::VSSSEG2E32_V:
+  case RISCV::VSSSEG3E32_V:
+  case RISCV::VSSSEG4E32_V:
+  case RISCV::VSSSEG5E32_V:
+  case RISCV::VSSSEG6E32_V:
+  case RISCV::VSSSEG7E32_V:
+  case RISCV::VSSSEG8E32_V:
+    return OperandInfo(MIVLMul, 5);
+  case RISCV::VLSSEG2E64_V:
+  case RISCV::VLSSEG3E64_V:
+  case RISCV::VLSSEG4E64_V:
+  case RISCV::VLSSEG5E64_V:
+  case RISCV::VLSSEG6E64_V:
+  case RISCV::VLSSEG7E64_V:
+  case RISCV::VLSSEG8E64_V:
+  case RISCV::VSSSEG2E64_V:
+  case RISCV::VSSSEG3E64_V:
+  case RISCV::VSSSEG4E64_V:
+  case RISCV::VSSSEG5E64_V:
+  case RISCV::VSSSEG6E64_V:
+  case RISCV::VSSSEG7E64_V:
+  case RISCV::VSSSEG8E64_V:
+    return OperandInfo(MIVLMul, 6);
+
+  // 7.8.3. Vector Indexed Segment Loads and Stores
+  case RISCV::VLUXSEG2EI8_V:
+  case RISCV::VLUXSEG3EI8_V:
+  case RISCV::VLUXSEG4EI8_V:
+  case RISCV::VLUXSEG5EI8_V:
+  case RISCV::VLUXSEG6EI8_V:
+  case RISCV::VLUXSEG7EI8_V:
+  case RISCV::VLUXSEG8EI8_V:
+  case RISCV::VLOXSEG2EI8_V:
+  case RISCV::VLOXSEG3EI8_V:
+  case RISCV::VLOXSEG4EI8_V:
+  case RISCV::VLOXSEG5EI8_V:
+  case RISCV::VLOXSEG6EI8_V:
+  case RISCV::VLOXSEG7EI8_V:
+  case RISCV::VLOXSEG8EI8_V:
+    return getIndexSegmentLoadStoreOperandInfo(3, MI, MO, /* IsLoad */ true);
+  case RISCV::VSUXSEG2EI8_V:
+  case RISCV::VSUXSEG3EI8_V:
+  case RISCV::VSUXSEG4EI8_V:
+  case RISCV::VSUXSEG5EI8_V:
+  case RISCV::VSUXSEG6EI8_V:
+  case RISCV::VSUXSEG7EI8_V:
+  case RISCV::VSUXSEG8EI8_V:
+  case RISCV::VSOXSEG2EI8_V:
+  case RISCV::VSOXSEG3EI8_V:
+  case RISCV::VSOXSEG4EI8_V:
+  case RISCV::VSOXSEG5EI8_V:
+  case RISCV::VSOXSEG6EI8_V:
+  case RISCV::VSOXSEG7EI8_V:
+  case RISCV::VSOXSEG8EI8_V:
+    return getIndexSegmentLoadStoreOperandInfo(3, MI, MO, /* IsLoad */ false);
+  case RISCV::VLUXSEG2EI16_V:
+  case RISCV::VLUXSEG3EI16_V:
+  case RISCV::VLUXSEG4EI16_V:
+  case RISCV::VLUXSEG5EI16_V:
+  case RISCV::VLUXSEG6EI16_V:
+  case RISCV::VLUXSEG7EI16_V:
+  case RISCV::VLUXSEG8EI16_V:
+  case RISCV::VLOXSEG2EI16_V:
+  case RISCV::VLOXSEG3EI16_V:
+  case RISCV::VLOXSEG4EI16_V:
+  case RISCV::VLOXSEG5EI16_V:
+  case RISCV::VLOXSEG6EI16_V:
+  case RISCV::VLOXSEG7EI16_V:
+  case RISCV::VLOXSEG8EI16_V:
+    return getIndexSegmentLoadStoreOperandInfo(4, MI, MO, /* IsLoad */ true);
+  case RISCV::VSUXSEG2EI16_V:
+  case RISCV::VSUXSEG3EI16_V:
+  case RISCV::VSUXSEG4EI16_V:
+  case RISCV::VSUXSEG5EI16_V:
+  case RISCV::VSUXSEG6EI16_V:
+  case RISCV::VSUXSEG7EI16_V:
+  case RISCV::VSUXSEG8EI16_V:
+  case RISCV::VSOXSEG2EI16_V:
+  case RISCV::VSOXSEG3EI16_V:
+  case RISCV::VSOXSEG4EI16_V:
+  case RISCV::VSOXSEG5EI16_V:
+  case RISCV::VSOXSEG6EI16_V:
+  case RISCV::VSOXSEG7EI16_V:
+  case RISCV::VSOXSEG8EI16_V:
+    return getIndexSegmentLoadStoreOperandInfo(4, MI, MO, /* IsLoad */ false);
+  case RISCV::VLUXSEG2EI32_V:
+  case RISCV::VLUXSEG3EI32_V:
+  case RISCV::VLUXSEG4EI32_V:
+  case RISCV::VLUXSEG5EI32_V:
+  case RISCV::VLUXSEG6EI32_V:
+  case RISCV::VLUXSEG7EI32_V:
+  case RISCV::VLUXSEG8EI32_V:
+  case RISCV::VLOXSEG2EI32_V:
+  case RISCV::VLOXSEG3EI32_V:
+  case RISCV::VLOXSEG4EI32_V:
+  case RISCV::VLOXSEG5EI32_V:
+  case RISCV::VLOXSEG6EI32_V:
+  case RISCV::VLOXSEG7EI32_V:
+  case RISCV::VLOXSEG8EI32_V:
+    return getIndexSegmentLoadStoreOperandInfo(5, MI, MO, /* IsLoad */ true);
+  case RISCV::VSUXSEG2EI32_V:
+  case RISCV::VSUXSEG3EI32_V:
+  case RISCV::VSUXSEG4EI32_V:
+  case RISCV::VSUXSEG5EI32_V:
+  case RISCV::VSUXSEG6EI32_V:
+  case RISCV::VSUXSEG7EI32_V:
+  case RISCV::VSUXSEG8EI32_V:
+  case RISCV::VSOXSEG2EI32_V:
+  case RISCV::VSOXSEG3EI32_V:
+  case RISCV::VSOXSEG4EI32_V:
+  case RISCV::VSOXSEG5EI32_V:
+  case RISCV::VSOXSEG6EI32_V:
+  case RISCV::VSOXSEG7EI32_V:
+  case RISCV::VSOXSEG8EI32_V:
+    return getIndexSegmentLoadStoreOperandInfo(5, MI, MO, /* IsLoad */ false);
+  case RISCV::VLUXSEG2EI64_V:
+  case RISCV::VLUXSEG3EI64_V:
+  case RISCV::VLUXSEG4EI64_V:
+  case RISCV::VLUXSEG5EI64_V:
+  case RISCV::VLUXSEG6EI64_V:
+  case RISCV::VLUXSEG7EI64_V:
+  case RISCV::VLUXSEG8EI64_V:
+  case RISCV::VLOXSEG2EI64_V:
+  case RISCV::VLOXSEG3EI64_V:
+  case RISCV::VLOXSEG4EI64_V:
+  case RISCV::VLOXSEG5EI64_V:
+  case RISCV::VLOXSEG6EI64_V:
+  case RISCV::VLOXSEG7EI64_V:
+  case RISCV::VLOXSEG8EI64_V:
+    return getIndexSegmentLoadStoreOperandInfo(6, MI, MO, /* IsLoad */ true);
+  case RISCV::VSUXSEG2EI64_V:
+  case RISCV::VSUXSEG3EI64_V:
+  case RISCV::VSUXSEG4EI64_V:
+  case RISCV::VSUXSEG5EI64_V:
+  case RISCV::VSUXSEG6EI64_V:
+  case RISCV::VSUXSEG7EI64_V:
+  case RISCV::VSUXSEG8EI64_V:
+  case RISCV::VSOXSEG2EI64_V:
+  case RISCV::VSOXSEG3EI64_V:
+  case RISCV::VSOXSEG4EI64_V:
+  case RISCV::VSOXSEG5EI64_V:
+  case RISCV::VSOXSEG6EI64_V:
+  case RISCV::VSOXSEG7EI64_V:
+  case RISCV::VSOXSEG8EI64_V:
+    return getIndexSegmentLoadStoreOperandInfo(6, MI, MO, /* IsLoad */ false);
+
+  // 7.9. Vector Load/Store Whole Register Instructions
+  // EMUL=nr. EEW=eew. Since in-register byte layouts are idential to in-memory
+  // byte layouts, the same data is writen to destination register regardless
+  // of EEW. eew is just a hint to the hardware and has not functional impact.
+  // Therefore, it is be okay if we ignore eew and always use the same EEW to
+  // create more optimization opportunities.
+  // FIXME: Instead of using any SEW, we really ought to return the SEW in the
+  // instruction and add a field to OperandInfo that says the SEW is just a hint
+  // so that this optimization can use any sew to construct a ratio.
+  case RISCV::VL1RE8_V:
+  case RISCV::VL1RE16_V:
+  case RISCV::VL1RE32_V:
+  case RISCV::VL1RE64_V:
+  case RISCV::VS1R_V:
+    return OperandInfo(RISCVII::VLMUL::LMUL_1, 0);
+  case RISCV::VL2RE8_V:
+  case RISCV::VL2RE16_V:
+  case RISCV::VL2RE32_V:
+  case RISCV::VL2RE64_V:
+  case RISCV::VS2R_V:
+    return OperandInfo(RISCVII::VLMUL::LMUL_2, 0);
+  case RISCV::VL4RE8_V:
+  case RISCV::VL4RE16_V:
+  case RISCV::VL4RE32_V:
+  case RISCV::VL4RE64_V:
+  case RISCV::VS4R_V:
+    return OperandInfo(RISCVII::VLMUL::LMUL_4, 0);
+  case RISCV::VL8RE8_V:
+  case RISCV::VL8RE16_V:
+  case RISCV::VL8RE32_V:
+  case RISCV::VL8RE64_V:
+  case RISCV::VS8R_V:
+    return OperandInfo(RISCVII::VLMUL::LMUL_8, 0);
+
+  // 11. Vector Integer Arithmetic Instructions
+  // 11.1. Vector Single-Width Integer Add and Subtract
+  case RISCV::VADD_VI:
+  case RISCV::VADD_VV:
+  case RISCV::VADD_VX:
+  case RISCV::VSUB_VV:
+  case RISCV::VSUB_VX:
+  case RISCV::VRSUB_VI:
+  case RISCV::VRSUB_VX:
+    return OperandInfo(MIVLMul, MILog2SEW);
+
+  // 11.2. Vector Widening Integer Add/Subtract
+  // Def uses EEW=2*SEW and EMUL=2*LMUL. Operands use EEW=SEW and EMUL=LMUL.
+  case RISCV::VWADDU_VV:
+  case RISCV::VWADDU_VX:
+  case RISCV::VWSUBU_VV:
+  case RISCV::VWSUBU_VX:
+  case RISCV::VWADD_VV:
+  case RISCV::VWADD_VX:
+  case RISCV::VWSUB_VV:
+  case RISCV::VWSUB_VX:
+  case RISCV::VWSLL_VI: {
+    unsigned Log2EEW = IsMODef ? MILog2SEW + 1 : MILog2SEW;
+    RISCVII::VLMUL EMUL = IsMODef ? twoTimesVLMUL(MIVLMul) : MIVLMul;
+    return OperandInfo(EMUL, Log2EEW);
+  }
+  // Def and Op1 uses EEW=2*SEW and EMUL=2*LMUL. Op2 uses EEW=SEW and EMUL=LMUL
+  case RISCV::VWADDU_WV:
+  case RISCV::VWADDU_WX:
+  case RISCV::VWSUBU_WV:
+  case RISCV::VWSUBU_WX:
+  case RISCV::VWADD_WV:
+  case RISCV::VWADD_WX:
+  case RISCV::VWSUB_WV:
+  case RISCV::VWSUB_WX: {
+    bool TwoTimes = IsMODef || IsOp1;
+    unsigned Log2EEW = TwoTimes ? MILog2SEW + 1 : MILog2SEW;
+    RISCVII::VLMUL EMUL = TwoTimes ? twoTimesVLMUL(MIVLMul) : MIVLMul;
+    return OperandInfo(EMUL, Log2EEW);
+  }
+  // 11.3. Vector Integer Extension
+  case RISCV::VZEXT_VF2:
+  case RISCV::VSEXT_VF2:
+    return getIntegerExtensionOperandInfo(2, MI, MO);
+  case RISCV::VZEXT_VF4:
+  case RISCV::VSEXT_VF4:
+    return getIntegerExtensionOperandInfo(4, MI, MO);
+  case RISCV::VZEXT_VF8:
+  case RISCV::VSEXT_VF8:
+    return getIntegerExtensionOperandInfo(8, MI, MO);
+
+  // 11.4. Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions
+  // EEW=SEW and EMUL=LMUL. Mask Operand EEW=1 and EMUL=(EEW/SEW)*LMUL
+  case RISCV::VADC_VIM:
+  case RISCV::VADC_VVM:
+  case RISCV::VADC_VXM:
+  case RISCV::VSBC_VVM:
+  case RISCV::VSBC_VXM:
+    return MO.getOperandNo() == 3
+               ? OperandInfo(getEMULEqualsEEWDivSEWTimesLMUL(0, MI), 0)
+               : OperandInfo(MIVLMul, MILog2SEW);
+  // Dest EEW=1 and EMUL=(EEW/SEW)*LMUL. Source EEW=SEW and EMUL=LMUL. Mask
+  // operand EEW=1 and EMUL=(EEW/SEW)*LMUL
+  case RISCV::VMADC_VIM:
+  case RISCV::VMADC_VVM:
+  case RISCV::VMADC_VXM:
+  case RISCV::VMSBC_VVM:
+  case RISCV::VMSBC_VXM:
+    return IsMODef || MO.getOperandNo() == 3
+               ? OperandInfo(getEMULEqualsEEWDivSEWTimesLMUL(0, MI), 0)
+               : OperandInfo(MIVLMul, MILog2SEW);
+  // Dest EEW=1 and EMUL=(EEW/SEW)*LMUL. Source EEW=SEW and EMUL=LMUL.
+  case RISCV::VMADC_VV:
+  case RISCV::VMADC_VI:
+  case RISCV::VMADC_VX:
+  case RISCV::VMSBC_VV:
+  case RISCV::VMSBC_VX:
+    return IsMODef ? OperandInfo(getEMULEqualsEEWDivSEWTimesLMUL(0, MI), 0)
+                   : OperandInfo(MIVLMul, MILog2SEW);
+
+  // 11.5. Vector Bitwise Logical Instructions
+  // 11.6. Vector Single-Width Shift Instructions
+  // EEW=SEW. EMUL=LMUL.
+  case RISCV::VAND_VI:
+  case RISCV::VAND_VV:
+  case RISCV::VAND_VX:
+  case RISCV::VOR_VI:
+  case RISCV::VOR_VV:
+  case RISCV::VOR_VX:
+  case RISCV::VXOR_VI:
+  case RISCV::VXOR_VV:
+  case RISCV::VXOR_VX:
+  case RISCV::VSLL_VI:
+  case RISCV::VSLL_VV:
+  case RISCV::VSLL_VX:
+  case RISCV::VSRL_VI:
+  case RISCV::VSRL_VV:
+  case RISCV::VSRL_VX:
+  case RISCV::VSRA_VI:
+  case RISCV::VSRA_VV:
+  case RISCV::VSRA_VX:
+    return OperandInfo(MIVLMul, MILog2SEW);
+
+  // 11.7. Vector Narrowing Integer Right Shift Instructions
+  // Destination EEW=SEW and EMUL=LMUL, Op 1 has EEW=2*SEW EMUL=2*LMUL. Op2 has
+  // EEW=SEW EMUL=LMUL.
+  case RISCV::VNSRL_WX:
+  case RISCV::VNSRL_WI:
+  case RISCV::VNSRL_WV:
+  case RISCV::VNSRA_WI:
+  case RISCV::VNSRA_WV:
+  case RISCV::VNSRA_WX: {
+    bool TwoTimes = IsOp1;
+    unsigned Log2EEW = TwoTimes ? MILog2SEW + 1 : MILog2SEW;
+    RISCVII::VLMUL EMUL = TwoTimes ? twoTimesVLMUL(MIVLMul) : MIVLMul;
+    return OperandInfo(EMUL, Log2EEW);
+  }
+  // 11.8. Vector Integer Compare Instructions
+  // Dest EEW=1 and EMUL=(EEW/SEW)*LMUL. Source EEW=SEW and EMUL=LMUL.
+  case RISCV::VMSEQ_VI:
+  case RISCV::VMSEQ_VV:
+  case RISCV::VMSEQ_VX:
+  case RISCV::VMSNE_VI:
+  case RISCV::VMSNE_VV:
+  case RISCV::VMSNE_VX:
+  case RISCV::VMSLTU_VV:
+  case RISCV::VMSLTU_VX:
+  case RISCV::VMSLT_VV:
+  case RISCV::VMSLT_VX:
+  case RISCV::VMSLEU_VV:
+  case RISCV::VMSLEU_VI:
+  case RISCV::VMSLEU_VX:
+  case RISCV::VMSLE_VV:
+  case RISCV::VMSLE_VI:
+  case RISCV::VMSLE_VX:
+  case RISCV::VMSGTU_VI:
+  case RISCV::VMSGTU_VX:
+  case RISCV::VMSGT_VI:
+  case RISCV::VMSGT_VX:
+    if (IsMODef)
+      return OperandInfo(getEMULEqualsEEWDivSEWTimesLMUL(0, MI), 0);
+    return OperandInfo(MIVLMul, MILog2SEW);
+
+  // 11.9. Vector Integer Min/Max Instructions
+  // EEW=SEW. EMUL=LMUL.
+  case RISCV::VMINU_VV:
+  case RISCV::VMINU_VX:
+  case RISCV::VMIN_VV:
+  case RISCV::VMIN_VX:
+  case RISCV::VMAXU_VV:
+  case RISCV::VMAXU_VX:
+  case RISCV::VMAX_VV:
+  case RISCV::VMAX_VX:
+    return OperandInfo(MIVLMul, MILog2SEW);
+
+  // 11.10. Vector Single-Width Integer Multiply Instructions
+  // Source and Dest EEW=SEW and EMUL=LMUL.
+  case RISCV::VMUL_VV:
+  case RISCV::VMUL_VX:
+  case RISCV::VMULH_VV:
+  case RISCV::VMULH_VX:
+  case RISCV::VMULHU_VV:
+  case RISCV::VMULHU_VX:
+  case RISCV::VMULHSU_VV:
+  case RISCV::VMULHSU_VX: {
+    return OperandInfo(MIVLMul, MILog2SEW);
+  }
+  // 11.11. Vector Integer Divide Instructions
+  // EEW=SEW. EMUL=LMUL.
+  case RISCV::VDIVU_VV:
+  case RISCV::VDIVU_VX:
+  case RISCV::VDIV_VV:
+  case RISCV::VDIV_VX:
+  case RISCV::VREMU_VV:
+  case RISCV::VREMU_VX:
+  case RISCV::VREM_VV:
+  case RISCV::VREM_VX:
+    return OperandInfo(MIVLMul, MILog2SEW);
+
+  // 11.12. Vector Widening Integer Multiply Instructions
+  // Source and Destination EMUL=LMUL. Destination EEW=2*SEW. Source EEW=SEW.
+  case RISCV::VWMUL_VV:
+  case RISCV::VWMUL_VX:
+  case RISCV::VWMULSU_VV:
+  case RISCV::VWMULSU_VX:
+  case RISCV::VWMULU_VV:
+  case RISCV::VWMULU_VX: {
+    unsigned Log2EEW = IsMODef ? MILog2SEW + 1 : MILog2SEW;
+    RISCVII::VLMUL EMUL = IsMODef ? twoTimesVLMUL(MIVLMul) : MIVLMul;
+    return OperandInfo(EMUL, Log2EEW);
+  }
+  // 11.13. Vector Single-Width Integer Multiply-Add Instructions
+  // EEW=SEW. EMUL=LMUL.
+  case RISCV::VMACC_VV:
+  case RISCV::VMACC_VX:
+  case RISCV::VNMSAC_VV:
+  case RISCV::VNMSAC_VX:
+  case RISCV::VMADD_VV:
+  case RISCV::VMADD_VX:
+  case RISCV::VNMSUB_VV:
+  case RISCV::VNMSUB_VX:
+    return OperandInfo(MIVLMul, MILog2SEW);
+
+  // 11.14. Vector Widening Integer Multiply-Add Instructions
+  // Destination EEW=2*SEW and EMUL=2*LMUL. Source EEW=SEW and EMUL=LMUL.
+  // Even though the add is a 2*SEW addition, the operands of the add are the
+  // Dest which is 2*SEW and the result of the multiply which is 2*SEW.
+  case RISCV::VWMACCU_VV:
+  case RISCV::VWMACCU_VX:
+  case RISCV::VWMACC_VV:
+  case RISCV::VWMACC_VX:
+  case RISCV::VWMACCSU_VV:
+  case RISCV::VWMACCSU_VX:
+  case RISCV::VWMACCUS_VX: {
+    // Operand 0 is destination as a def and Operand 1 is destination as a use
+    // due to SSA.
+    bool TwoTimes = IsMODef || IsOp1;
+    unsigned Log2EEW = TwoTimes ? MILog2SEW + 1 : MILog2SEW;
+    RISCVII::VLMUL EMUL = TwoTimes ? twoTimesVLMUL(MIVLMul) : MIVLMul;
+    return OperandInfo(EMUL, Log2EEW);
+  }
+  // 11.15. Vector Integer Merge Instructions
+  // EEW=SEW and EMUL=LMUL, except the mask operand has EEW=1 and EMUL=
+  // (EEW/SEW)*LMUL.
+  case RISCV::VMERGE_VIM:
+  case RISCV::VMERGE_VVM:
+  case RISCV::VMERGE_VXM:
+    if (MO.getOperandNo() == 3)
+      return OperandInfo(getEMULEqualsEEWDivSEWTimesLMUL(0, MI), 0);
+    return OperandInfo(MIVLMul, MILog2SEW);
+
+  // 11.16. Vector Integer Move Instructions
+  // 12. Vector Fixed-Point Arithmetic Instructions
+  // 12.1. Vector Single-Width Saturating Add and Subtract
+  // 12.2. Vector Single-Width Averaging Add and Subtract
+  // EEW=SEW. EMUL=LMUL.
+  case RISCV::VMV_V_I:
+  case RISCV::VMV_V_V:
+  case RISCV::VMV_V_X:
+  case RISCV::VSADDU_VI:
+  case RISCV::VSADDU_VV:
+  case RISCV::VSADDU_VX:
+  case RISCV::VSADD_VI:
+  case RISCV::VSADD_VV:
+  case RISCV::VSADD_VX:
+  case RISCV::VSSUBU_VV:
+  case RISCV::VSSUBU_VX:
+  case RISCV::VSSUB_VV:
+  case RISCV::VSSUB_VX:
+  case RISCV::VAADDU_VV:
+  case RISCV::VAADDU_VX:
+  case RISCV::VAADD_VV:
+  case RISCV::VAADD_VX:
+  case RISCV::VASUBU_VV:
+  case RISCV::VASUBU_VX:
+  case RISCV::VASUB_VV:
+  case RISCV::VASUB_VX:
+    return OperandInfo(MIVLMul, MILog2SEW);
+
+  // 12.3. Vector Single-Width Fractional Multiply with Rounding and Saturation
+  // Destination EEW=2*SEW and EMUL=2*EMUL. Source EEW=SEW and EMUL=LMUL.
+  case RISCV::VSMUL_VV:
+  case RISCV::VSMUL_VX: {
+    unsigned Log2EEW = IsMODef ? MILog2SEW + 1 : MILog2SEW;
+    RISCVII::VLMUL EMUL = IsMODef ? twoTimesVLMUL(MIVLMul) : MIVLMul;
+    return OperandInfo(EMUL, Log2EEW);
+  }
+  // 12.4. Vector Single-Width Scaling Shift Instructions
+  // EEW=SEW. EMUL=LMUL.
+  case RISCV::VSSRL_VI:
+  case RISCV::VSSRL_VV:
+  case RISCV::VSSRL_VX:
+  case RISCV::VSSRA_VI:
+  case RISCV::VSSRA_VV:
+  case RISCV::VSSRA_VX:
+    return OperandInfo(MIVLMul, MILog2SEW);
+
+  // 12.5. Vector Narrowing Fixed-Point Clip Instructions
+  // Destination and Op1 EEW=SEW and EMUL=LMUL. Op2 EEW=2*SEW and EMUL=2*LMUL
+  case RISCV::VNCLIPU_WI:
+  case RISCV::VNCLIPU_WV:
+  case RISCV::VNCLIPU_WX:
+  case RISCV::VNCLIP_WI:
+  case RISCV::VNCLIP_WV:
+  case RISCV::VNCLIP_WX: {
+    bool TwoTimes = !IsMODef && IsOp1;
+    unsigned Log2EEW = TwoTimes ? MILog2SEW + 1 : MILog2SEW;
+    RISCVII::VLMUL EMUL = TwoTimes ? twoTimesVLMUL(MIVLMul) : MIVLMul;
+    return OperandInfo(EMUL, Log2EEW);
+  }
+  // 13. Vector Floating-Point Instructions
+  // 13.2. Vector Single-Width Floating-Point Add/Subtract Instructions
+  // EEW=SEW. EMUL=LMUL.
+  case RISCV::VFADD_VF:
+  case RISCV::VFADD_VV:
+  case RISCV::VFSUB_VF:
+  case RISCV::VFSUB_VV:
+  case RISCV::VFRSUB_VF:
+    return OperandInfo(MIVLMul, MILog2SEW);
+
+  // 13.3. Vector Widening Floating-Point Add/Subtract Instructions
+  // Dest EEW=2*SEW and EMUL=2*LMUL. Source EEW=SEW and EMUL=LMUL.
+  case RISCV::VFWADD_VV:
+  case RISCV::VFWADD_VF:
+  case RISCV::VFWSUB_VV:
+  case RISCV::VFWSUB_VF: {
+    unsigned Log2EEW = IsMODef ? MILog2SEW + 1 : MILog2SEW;
+    RISCVII::VLMUL EMUL = IsMODef ? twoTimesVLMUL(MIVLMul) : MIVLMul;
+    return OperandInfo(EMUL, Log2EEW);
+  }
+  // Dest and Op1 EEW=2*SEW and EMUL=2*LMUL. Op2 EEW=SEW and EMUL=LMUL.
+  case RISCV::VFWADD_WF:
+  case RISCV::VFWADD_WV:
+  case RISCV::VFWSUB_WF:
+  case RISCV::VFWSUB_WV: {
+    bool TwoTimes = IsMODef || IsOp1;
+    unsigned Log2EEW = TwoTimes ? MILog2SEW + 1 : MILog2SEW;
+    RISCVII::VLMUL EMUL = TwoTimes ? twoTimesVLMUL(MIVLMul) : MIVLMul;
+    return OperandInfo(EMUL, Log2EEW);
+  }
+  // 13.4. Vector Single-Width Floating-Point Multiply/Divide Instructions
+  // EEW=SEW. EMUL=LMUL.
+  case RISCV::VFMUL_VF:
+  case RISCV::VFMUL_VV:
+  case RISCV::VFDIV_VF:
+  case RISCV::VFDIV_VV:
+  case RISCV::VFRDIV_VF:
+    return OperandInfo(MIVLMul, MILog2SEW);
+
+  // 13.5. Vector Widening Floating-Point Multiply
+  case RISCV::VFWMUL_VF:
+  case RISCV::VFWMUL_VV:
+    return OperandInfo(MIVLMul, MILog2SEW);
+
+  // 13.6. Vector Single-Width Floating-Point Fused Multiply-Add Instructions
+  // EEW=SEW. EMUL=LMUL.
+  // TODO: FMA instructions reads 3 registers but MC layer only reads 2
+  // registers since its missing that the output operand should be part of the
+  // input operand list.
+  case RISCV::VFMACC_VF:
+  case RISCV::VFMACC_VV:
+  case RISCV::VFNMACC_VF:
+  case RISCV::VFNMACC_VV:
+  case RISCV::VFMSAC_VF:
+  case RISCV::VFMSAC_VV:
+  case RISCV::VFNMSAC_VF:
+  case RISCV::VFNMSAC_VV:
+  case RISCV::VFMADD_VF:
+  case RISCV::VFMADD_VV:
+  case RISCV::VFNMADD_VF:
+  case RISCV::VFNMADD_VV:
+  case RISCV::VFMSUB_VF:
+  case RISCV::VFMSUB_VV:
+  case RISCV::VFNMSUB_VF:
+  case RISCV::VFNMSUB_VV:
+    return OperandInfo(OperandInfo::State::Unknown);
+
+  // 13.7. Vector Widening Floating-Point Fused Multiply-Add Instructions
+  // Dest EEW=2*SEW and EMUL=2*LMUL. Source EEW=SEW EMUL=LMUL.
+  case RISCV::VFWMACC_VF:
+  case RISCV::VFWMACC_VV:
+  case RISCV::VFWNMACC_VF:
+  case RISCV::VFWNMACC_VV:
+  case RISCV::VFWMSAC_VF:
+  case RISCV::VFWMSAC_VV:
+  case RISCV::VFWNMSAC_VF:
+  case RISCV::VFWNMSAC_VV: {
+    // Operand 0 is destination as a def and Operand 1 is destination as a use
+    // due to SSA.
+    bool TwoTimes = IsMODef || IsOp1;
+    unsigned Log2EEW = TwoTimes ? MILog2SEW + 1 : MILog2SEW;
+    RISCVII::VLMUL EMUL = TwoTimes ? twoTimesVLMUL(MIVLMul) : MIVLMul;
+    return OperandInfo(EMUL, Log2EEW);
+  }
+  // 13.8. Vector Floating-Point Square-Root Instruction
+  // 13.9. Vector Floating-Point Reciprocal Square-Root Estimate Instruction
+  // 13.10. Vector Floating-Point Reciprocal Estimate Instruction
+  // 13.11. Vector Floating-Point MIN/MAX Instructions
+  // 13.12. Vector Floating-Point Sign-Injection Instructions
+  // 13.14. Vector Floating-Point Classify Instruction
+  // 13.16. Vector Floating-Point Move Instruction
+  // 13.17. Single-Width Floating-Point/Integer Type-Convert Instructions
+  // EEW=SEW. EMUL=LMUL.
+  case RISCV::VFSQRT_V:
+  case RISCV::VFRSQRT7_V:
+  case RISCV::VFREC7_V:
+  case RISCV::VFMIN_VF:
+  case RISCV::VFMIN_VV:
+  case RISCV::VFMAX_VF:
+  case RISCV::VFMAX_VV:
+  case RISCV::VFSGNJ_VF:
+  case RISCV::VFSGNJ_VV:
+  case RISCV::VFSGNJN_VV:
+  case RISCV::VFSGNJN_VF:
+  case RISCV::VFSGNJX_VF:
+  case RISCV::VFSGNJX_VV:
+  case RISCV::VFCLASS_V:
+  case RISCV::VFMV_V_F:
+  case RISCV::VFCVT_XU_F_V:
+  case RISCV::VFCVT_X_F_V:
+  case RISCV::VFCVT_RTZ_XU_F_V:
+  case RISCV::VFCVT_RTZ_X_F_V:
+  case RISCV::VFCVT_F_XU_V:
+  case RISCV::VFCVT_F_X_V:
+    return OperandInfo(MIVLMul, MILog2SEW);
+
+  // 13.13. Vector Floating-Point Compare Instructions
+  // Dest EEW=1 and EMUL=(EEW/SEW)*LMUL. Source EEW=SEW EMUL=LMUL.
+  case RISCV::VMFEQ_VF:
+  case RISCV::VMFEQ_VV:
+  case RISCV::VMFNE_VF:
+  case RISCV::VMFNE_VV:
+  case RISCV::VMFLT_VF:
+  case RISCV::VMFLT_VV:
+  case RISCV::VMFLE_VF:
+  case RISCV::VMFLE_VV:
+  case RISCV::VMFGT_VF:
+  case RISCV::VMFGE_VF:
+    if (IsMODef)
+      return OperandInfo(getEMULEqualsEEWDivSEWTimesLMUL(0, MI), 0);
+    return OperandInfo(MIVLMul, MILog2SEW);
+
+  // 13.15. Vector Floating-Point Merge Instruction
+  // EEW=SEW and EMUL=LMUL, except the mask operand has EEW=1 and EMUL=
+  // (EEW/SEW)*LMUL.
+  case RISCV::VFMERGE_VFM:
+    if (IsOp3)
+      return OperandInfo(getEMULEqualsEEWDivSEWTimesLMUL(0, MI), 0);
+    return OperandInfo(MIVLMul, MILog2SEW);
+
+  // 13.18. Widening Floating-Point/Integer Type-Convert Instructions
+  // Dest EEW=2*SEW and EMUL=2*LMUL. Source EEW=SEW and EMUL=LMUL.
+  case RISCV::VFWCVT_XU_F_V:
+  case RISCV::VFWCVT_X_F_V:
+  case RISCV::VFWCVT_RTZ_XU_F_V:
+  case RISCV::VFWCVT_RTZ_X_F_V:
+  case RISCV::VFWCVT_F_XU_V:
+  case RISCV::VFWCVT_F_X_V:
+  case RISCV::VFWCVT_F_F_V: {
+    unsigned Log2EEW = IsMODef ? MILog2SEW + 1 : MILog2SEW;
+    RISCVII::VLMUL EMUL = IsMODef ? twoTimesVLMUL(MIVLMul) : MIVLMul;
+    return OperandInfo(EMUL, Log2EEW);
+  }
+  // 13.19. Narrowing Floating-Point/Integer Type-Convert Instructions
+  // EMUL=LMUL. Dest EEW=SEW/2. Source EEW=SEW EMUL=LMUL.
+  case RISCV::VFNCVT_XU_F_W:
+  case RISCV::VFNCVT_X_F_W:
+  case RISCV::VFNCVT_RTZ_XU_F_W:
+  case RISCV::VFNCVT_RTZ_X_F_W:
+  case RISCV::VFNCVT_F_XU_W:
+  case RISCV::VFNCVT_F_X_W:
+  case RISCV::VFNCVT_F_F_W:
+  case RISCV::VFNCVT_ROD_F_F_W: {
+    unsigned Log2EEW = IsMODef ? MILog2SEW - 1 : MILog2SEW;
+    RISCVII::VLMUL EMUL = IsMODef ? halfVLMUL(MIVLMul) : MIVLMul;
+    return OperandInfo(EMUL, Log2EEW);
+  }
+  // 14. Vector Reduction Operations
+  // 14.1. Vector Single-Width Integer Reduction Instructions
+  // We need to return Unknown since only element 0 of reduction is valid but it
+  // was generated by reducing over all of the input elements. There are 3
+  // vector sources for reductions. One for scalar, one for tail value, and one
+  // for the elements to reduce over. Only the one with the elements to reduce
+  // over obeys VL. The other two only read element 0 from the register.
+  case RISCV::VREDAND_VS:
+  case RISCV::VREDMAX_VS:
+  case RISCV::VREDMAXU_VS:
+  case RISCV::VREDMIN_VS:
+  case RISCV::VREDMINU_VS:
+  case RISCV::VREDOR_VS:
+  case RISCV::VREDSUM_VS:
+  case RISCV::VREDXOR_VS:
+    return OperandInfo(OperandInfo::State::Unknown);
+
+  // 14.2. Vector Widening Integer Reduction Instructions
+  // Dest EEW=2*SEW and EMUL=2*LMUL. Source EEW=SEW EMUL=LMUL. Source is zero
+  // extended to 2*SEW in order to generate 2*SEW Dest.
+  case RISCV::VWREDSUM_VS:
+  case RISCV::VWREDSUMU_VS: {
+    unsigned Log2EEW = IsMODef ? MILog2SEW + 1 : MILog2SEW;
+    RISCVII::VLMUL EMUL = IsMODef ? twoTimesVLMUL(MIVLMul) : MIVLMul;
+    return OperandInfo(EMUL, Log2EEW);
+  }
+  // 14.3. Vector Single-Width Floating-Point Reduction Instructions
+  // EMUL=LMUL. EEW=SEW.
+  case RISCV::VFREDMAX_VS:
+  case RISCV::VFREDMIN_VS:
+  case RISCV::VFREDOSUM_VS:
+  case RISCV::VFREDUSUM_VS:
+    return OperandInfo(MIVLMul, MILog2SEW);
+
+  // 14.4. Vector Widening Floating-Point Reduction Instructions
+  // Source EEW=SEW and EMUL=LMUL. Dest EEW=2*SEW and EMUL=2*LMUL.
+  case RISCV::VFWREDOSUM_VS:
+  case RISCV::VFWREDUSUM_VS: {
+    unsigned Log2EEW = IsMODef ? MILog2SEW + 1 : MILog2SEW;
+    RISCVII::VLMUL EMUL = IsMODef ? twoTimesVLMUL(MIVLMul) : MIVLMul;
+    return OperandInfo(EMUL, Log2EEW);
+  }
+
+  // 15. Vector Mask Instructions
+  // 15.2. Vector count population in mask vcpop.m
+  // 15.3. vfirst find-first-set mask bit
+  // 15.4. vmsbf.m set-before-first mask bit
+  // 15.6. vmsof.m set-only-first mask bit
+  // EEW=1 and EMUL= (EEW/SEW)*LMUL
+  case RISCV::VMAND_MM:
+  case RISCV::VMNAND_MM:
+  case RISCV::VMANDN_MM:
+  case RISCV::VMXOR_MM:
+  case RISCV::VMOR_MM:
+  case RISCV::VMNOR_MM:
+  case RISCV::VMORN_MM:
+  case RISCV::VMXNOR_MM:
+  case RISCV::VCPOP_M:
+  case RISCV::VFIRST_M:
+  case RISCV::VMSBF_M:
+  case RISCV::VMSIF_M:
+  case RISCV::VMSOF_M: {
+    return OperandInfo(getEMULEqualsEEWDivSEWTimesLMUL(0, MI), 0);
+  }
+  // 15.8. Vector Iota Instruction
+  // Dest and Op1 EEW=SEW and EMUL=LMUL. Op2 EEW=1 and EMUL(EEW/SEW)*LMUL.
+  case RISCV::VIOTA_M: {
+    bool IsDefOrOp1 = IsMODef || IsOp1;
+    unsigned Log2EEW = IsDefOrOp1 ? 0 : MILog2SEW;
+    if (IsDefOrOp1)
+      return OperandInfo(MIVLMul, Log2EEW);
+    return OperandInfo(getEMULEqualsEEWDivSEWTimesLMUL(MILog2SEW, MI), Log2EEW);
+  }
+  // 15.9. Vector Element Index Instruction
+  // Dest EEW=SEW EMUL=LMUL. Mask Operand EEW=1 and EMUL(EEW/SEW)*LMUL.
+  case RISCV::VID_V: {
+    unsigned Log2EEW = IsMODef ? MILog2SEW : 0;
+    if (IsMODef)
+      return OperandInfo(MIVLMul, Log2EEW);
+    return OperandInfo(getEMULEqualsEEWDivSEWTimesLMUL(Log2EEW, MI), Log2EEW);
+  }
+  // 16. Vector Permutation Instructions
+  // 16.1. Integer Scalar Move Instructions
+  // 16.2. Floating-Point Scalar Move Instructions
+  // EMUL=LMUL. EEW=SEW.
+  case RISCV::VMV_X_S:
+  case RISCV::VMV_S_X:
+  case RISCV::VFMV_F_S:
+  case RISCV::VFMV_S_F:
+    return OperandInfo(MIVLMul, MILog2SEW);
+
+  // 16.3. Vector Slide Instructions
+  // EMUL=LMUL. EEW=SEW.
+  case RISCV::VSLIDEUP_VI:
+  case RISCV::VSLIDEUP_VX:
+  case RISCV::VSLIDEDOWN_VI:
+  case RISCV::VSLIDEDOWN_VX:
+  case RISCV::VSLIDE1UP_VX:
+  case RISCV::VFSLIDE1UP_VF:
+  case RISCV::VSLIDE1DOWN_VX:
+  case RISCV::VFSLIDE1DOWN_VF:
+    return OperandInfo(MIVLMul, MILog2SEW);
+
+  // 16.4. Vector Register Gather Instructions
+  // EMUL=LMUL. EEW=SEW. For mask operand, EMUL=1 and EEW=1.
+  case RISCV::VRGATHER_VI:
+  case RISCV::VRGATHER_VV:
+  case RISCV::VRGATHER_VX:
+    return OperandInfo(MIVLMul, MILog2SEW);
+  // Destination EMUL=LMUL and EEW=SEW. Op2 EEW=SEW and EMUL=LMUL. Op1 EEW=16
+  // and EMUL=(16/SEW)*LMUL.
+  case RISCV::VRGATHEREI16_VV: {
+    if (IsMODef || IsOp2)
+      return OperandInfo(MIVLMul, MILog2SEW);
+    return OperandInfo(getEMULEqualsEEWDivSEWTimesLMUL(4, MI), 4);
+  }
+  // 16.5. Vector Compress Instruction
+  // EMUL=LMUL. EEW=SEW.
+  case RISCV::VCOMPRESS_VM:
+    return OperandInfo(MIVLMul, MILog2SEW);
+
+  // 16.6. Whole Vector Register Move
+  case RISCV::VMV1R_V:
+  case RISCV::VMV2R_V:
+  case RISCV::VMV4R_V:
+  case RISCV::VMV8R_V:
+    llvm_unreachable("These instructions don't have pseudo versions so they "
+                     "don't have an SEW operand.");
+
+  default:
+    return OperandInfo(OperandInfo::State::Unknown);
+  }
+}
+
+/// Return true if this optimization should consider MI for VL reduction. This
+/// white-list approach simplifies this optimization for instructions that may
+/// have more complex semantics with relation to how it uses VL.
+static bool isSupportedInstr(const MachineInstr &MI) {
+  const RISCVVPseudosTable::PseudoInfo *RVV =
+      RISCVVPseudosTable::getPseudoInfo(MI.getOpcode());
+
+  if (!RVV)
+    return false;
+
+  switch (RVV->BaseInstr) {
+  case RISCV::VADD_VI:
+  case RISCV::VADD_VV:
+  case RISCV::VADD_VX:
+  case RISCV::VMUL_VV:
+  case RISCV::VMUL_VX:
+  case RISCV::VSLL_VI:
+  case RISCV::VSEXT_VF2:
+  case RISCV::VSEXT_VF4:
+  case RISCV::VSEXT_VF8:
+  case RISCV::VZEXT_VF2:
+  case RISCV::VZEXT_VF4:
+  case RISCV::VZEXT_VF8:
+  case RISCV::VMV_V_I:
+  case RISCV::VMV_V_X:
+  case RISCV::VNSRL_WI:
+  case RISCV::VWADD_VV:
+  case RISCV::VWADDU_VV:
+  case RISCV::VWMACC_VX:
+  case RISCV::VWMACCU_VX:
+  case RISCV::VWSLL_VI:
+    return true;
+  }
+
+  return false;
+}
+
+/// Return true if MO is a vector operand but is used as a scalar operand.
+static bool isVectorOpUsedAsScalarOp(MachineOperand &MO) {
+  MachineInstr *MI = MO.getParent();
+  const RISCVVPseudosTable::PseudoInfo *RVV =
+      RISCVVPseudosTable::getPseudoInfo(MI->getOpcode());
+
+  if (!RVV)
+    return false;
+
+  switch (RVV->BaseInstr) {
+  // Reductions only use vs1[0] of vs1
+  case RISCV::VREDAND_VS:
+  case RISCV::VREDMAX_VS:
+  case RISCV::VREDMAXU_VS:
+  case RISCV::VREDMIN_VS:
+  case RISCV::VREDMINU_VS:
+  case RISCV::VREDOR_VS:
+  case RISCV::VREDSUM_VS:
+  case RISCV::VREDXOR_VS:
+  case RISCV::VWREDSUM_VS:
+  case RISCV::VWREDSUMU_VS:
+  case RISCV::VFREDMAX_VS:
+  case RISCV::VFREDMIN_VS:
+  case RISCV::VFREDOSUM_VS:
+  case RISCV::VFREDUSUM_VS:
+  case RISCV::VFWREDOSUM_VS:
+  case RISCV::VFWREDUSUM_VS: {
+    return isOpN(MO, 2);
+  }
+  default:
+    return false;
+  }
+}
+
+static bool safeToPropgateVL(const MachineInstr &MI) {
+  const RISCVVPseudosTable::PseudoInfo *RVV =
+      RISCVVPseudosTable::getPseudoInfo(MI.getOpcode());
+  if (!RVV)
+    return false;
+
+  switch (RVV->BaseInstr) {
+  // vslidedown instructions may use the higher part of the input operand beyond
+  // the VL.
+  case RISCV::VSLIDEDOWN_VI:
+  case RISCV::VSLIDEDOWN_VX:
+  case RISCV::VSLIDE1DOWN_VX:
+  case RISCV::VFSLIDE1DOWN_VF:
+
+  // vrgather instructions may index beyond the VL.
+  case RISCV::VRGATHER_VI:
+  case RISCV::VRGATHER_VV:
+  case RISCV::VRGATHER_VX:
+  case RISCV::VRGATHEREI16_VV:
+    return false;
+
+  default:
+    return true;
+  }
+}
+
+bool RISCVVLOptimizer::isCandidate(const MachineInstr &MI) const {
+
+  LLVM_DEBUG(
+      dbgs() << "Check whether the instruction is a candidate for reducing VL:"
+             << MI << "\n");
+
+  const MCInstrDesc &Desc = MI.getDesc();
+  if (!RISCVII::hasVLOp(Desc.TSFlags) || !RISCVII::hasSEWOp(Desc.TSFlags)) {
+    LLVM_DEBUG(dbgs() << "  Not a candidate due to lack of vl op or sew op\n");
+    return false;
+  }
+
+  if (MI.getNumDefs() != 1) {
+    LLVM_DEBUG(dbgs() << " Not a candidate due to it def more than one\n");
+    return false;
+  }
+  unsigned VLOpNum = RISCVII::getVLOpNum(Desc);
+  const MachineOperand &VLOp = MI.getOperand(VLOpNum);
+  if (!VLOp.isImm() || VLOp.getImm() != RISCV::VLMaxSentinel) {
+    LLVM_DEBUG(dbgs() << "  Not a candidate due to VL is not VLMAX\n");
+    return false;
+  }
+
+  // Some instructions that produce vectors have semantics that make it more
+  // difficult to determine whether the VL can be reduced. For example, some
+  // instructions, such as reductions, may write lanes past VL to a scalar
+  // register. Other instructions, such as some loads or stores, may write
+  // lower lanes using data from higher lanes. There may be other complex
+  // semantics not mentioned here that make it hard to determine whether
+  // the VL can be optimized. As a result, a white-list of supported
+  // instructions is used. Over time, more instructions cam be supported
+  // upon careful examination of their semantics under the logic in this
+  // optimization.
+  // TODO: Use a better approach than a white-list, such as adding
+  // properties to instructions using something like TSFlags.
+  if (!isSupportedInstr(MI)) {
+    LLVM_DEBUG(dbgs() << "  Not a candidate due to unsupported instruction\n");
+    return false;
+  }
+
+  return true;
+}
+
+bool RISCVVLOptimizer::tryReduceVL(MachineInstr &OrigMI) {
+  SetVector<MachineInstr *> Worklist;
+  Worklist.insert(&OrigMI);
+
+  bool MadeChange = false;
+  while (!Worklist.empty()) {
+    MachineInstr &MI = *Worklist.pop_back_val();
+    LLVM_DEBUG(dbgs() << "Try reduce VL for " << MI << "\n");
+    std::optional<Register> CommonVL;
+    bool CanReduceVL = true;
+    for (auto &UserOp : MRI->use_operands(MI.getOperand(0).getReg())) {
+      const MachineInstr &UserMI = *UserOp.getParent();
+      LLVM_DEBUG(dbgs() << "  Check user: " << UserMI << "\n");
+
+      // Instructions like reductions may use a vector register as a scalar
+      // register. In this case, we should treat it like a scalar register which
+      // does not impact the decision on whether to optimize VL.
+      if (isVectorOpUsedAsScalarOp(UserOp)) {
+        [[maybe_unused]] Register R = UserOp.getReg();
+        [[maybe_unused]] const TargetRegisterClass *RC = MRI->getRegClass(R);
+        assert(RISCV::VRRegClass.hasSubClassEq(RC) &&
+               "Expect LMUL 1 register class for vector as scalar operands!");
+        LLVM_DEBUG(dbgs() << "    Use this operand as a scalar operand\n");
+        continue;
+      }
+
+      if (!safeToPropgateVL(UserMI)) {
+        LLVM_DEBUG(dbgs() << "    Abort due to used by unsafe instruction\n");
+        CanReduceVL = false;
+        break;
+      }
+
+      // Tied operands might pass through.
+      if (UserOp.isTied()) {
+        LLVM_DEBUG(dbgs() << "    Abort due to user use it as tied operand\n");
+        CanReduceVL = false;
+        break;
+      }
+
+      const MCInstrDesc &Desc = UserMI.getDesc();
+      if (!RISCVII::hasVLOp(Desc.TSFlags) || !RISCVII::hasSEWOp(Desc.TSFlags)) {
+        LLVM_DEBUG(dbgs() << "    Abort due to lack of VL or SEW, assume that"
+                             " use VLMAX.\n");
+        CanReduceVL = false;
+        break;
+      }
+
+      unsigned VLOpNum = RISCVII::getVLOpNum(Desc);
+      const MachineOperand &VLOp = UserMI.getOperand(VLOpNum);
+      // Looking for a register VL that isn't X0.
+      if (!VLOp.isReg() || VLOp.getReg() == RISCV::X0) {
+        LLVM_DEBUG(dbgs() << "    Abort due to user use X0 as VL.\n");
+        CanReduceVL = false;
+        break;
+      }
+
+      if (!CommonVL) {
+        CommonVL = VLOp.getReg();
+      } else if (*CommonVL != VLOp.getReg()) {
+        LLVM_DEBUG(dbgs() << "    Abort due to users have different VL!\n");
+        CanReduceVL = false;
+        break;
+      }
+
+      // The SEW and LMUL of destination and source registers need to match.
+
+      // If the produced Dest is not a vector register, then it has no EEW or
+      // EMUL, so there is no need to check that producer and consumer LMUL and
+      // SEW match. We've already checked above that UserOp is a vector
+      // register.
+      if (!isVectorRegClass(MI.getOperand(0).getReg(), MRI)) {
+        LLVM_DEBUG(dbgs() << "    Abort due to register class mismatch between "
+                             "USE and DEF\n");
+        continue;
+      }
+
+      OperandInfo ConsumerInfo = getOperandInfo(UserMI, UserOp, MRI);
+      OperandInfo ProducerInfo = getOperandInfo(MI, MI.getOperand(0), MRI);
+      if (ConsumerInfo.isUnknown() || ProducerInfo.isUnknown() ||
+          !OperandInfo::EMULAndEEWAreEqual(ConsumerInfo, ProducerInfo)) {
+        LLVM_DEBUG(dbgs() << "    Abort due to incompatible or unknown "
+                             "information for EMUL or EEW.\n");
+        LLVM_DEBUG(dbgs() << "      ConsumerInfo is: " << ConsumerInfo << "\n");
+        LLVM_DEBUG(dbgs() << "      ProducerInfo is: " << ProducerInfo << "\n");
+        CanReduceVL = false;
+        break;
+      }
+    }
+
+    if (!CanReduceVL || !CommonVL)
+      continue;
+
+    if (!CommonVL->isVirtual()) {
+      LLVM_DEBUG(
+          dbgs() << "    Abort due to new VL is not virtual register.\n");
+      continue;
+    }
+
+    const MachineInstr *VLMI = MRI->getVRegDef(*CommonVL);
+    if (!MDT->dominates(VLMI, &MI))
+      continue;
+
+    // All our checks passed. We can reduce VL.
+    unsigned VLOpNum = RISCVII::getVLOpNum(MI.getDesc());
+    MachineOperand &VLOp = MI.getOperand(VLOpNum);
+    VLOp.ChangeToRegister(*CommonVL, false);
+    MadeChange = true;
+
+    // Now add all inputs to this instruction to the worklist.
+    for (auto &Op : MI.operands()) {
+      if (!Op.isReg() || !Op.isUse() || !Op.getReg().isVirtual())
+        continue;
+
+      if (!isVectorRegClass(Op.getReg(), MRI))
+        continue;
+
+      MachineInstr *DefMI = MRI->getVRegDef(Op.getReg());
+
+      if (!isCandidate(*DefMI))
+        continue;
+
+      Worklist.insert(DefMI);
+    }
+  }
+
+  return MadeChange;
+}
+
+bool RISCVVLOptimizer::runOnMachineFunction(MachineFunction &MF) {
+  if (skipFunction(MF.getFunction()))
+    return false;
+
+  MRI = &MF.getRegInfo();
+  MDT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
+
+  const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>();
+  if (!ST.hasVInstructions())
+    return false;
+
+  bool MadeChange = false;
+  for (MachineBasicBlock &MBB : MF) {
+    // Visit instructions in reverse order.
+    for (auto &MI : make_range(MBB.rbegin(), MBB.rend())) {
+      if (!isCandidate(MI))
+        continue;
+
+      MadeChange |= tryReduceVL(MI);
+    }
+  }
+
+  return MadeChange;
+}
diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
index c29f15a15c1503..9c3c6e6e42346e 100644
--- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll
+++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
@@ -119,6 +119,8 @@
 ; RV64-NEXT:        RISC-V Optimize W Instructions
 ; CHECK-NEXT:       RISC-V Pre-RA pseudo instruction expansion pass
 ; CHECK-NEXT:       RISC-V Merge Base Offset
+; CHECK-NEXT:       MachineDominator Tree Construction
+; CHECK-NEXT:       RISC-V VL Optimizer
 ; CHECK-NEXT:       RISC-V Insert Read/Write CSR Pass
 ; CHECK-NEXT:       RISC-V Insert Write VXRM Pass
 ; CHECK-NEXT:       RISC-V Landing Pad Setup
@@ -129,7 +131,6 @@
 ; CHECK-NEXT:       Live Variable Analysis
 ; CHECK-NEXT:       Eliminate PHI nodes for register allocation
 ; CHECK-NEXT:       Two-Address instruction pass
-; CHECK-NEXT:       MachineDominator Tree Construction
 ; CHECK-NEXT:       Slot index numbering
 ; CHECK-NEXT:       Live Interval Analysis
 ; CHECK-NEXT:       Register Coalescer
diff --git a/llvm/test/CodeGen/RISCV/rvv/narrow-shift-extend.ll b/llvm/test/CodeGen/RISCV/rvv/narrow-shift-extend.ll
index e47517abacb4d3..63212e7cd5e004 100644
--- a/llvm/test/CodeGen/RISCV/rvv/narrow-shift-extend.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/narrow-shift-extend.ll
@@ -10,10 +10,10 @@ declare <vscale x 4 x i32> @llvm.riscv.vloxei.nxv4i32.nxv4i64(
 define <vscale x 4 x i32> @test_vloxei(ptr %ptr, <vscale x 4 x i8> %offset, i64 %vl) {
 ; CHECK-LABEL: test_vloxei:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, zero, e64, m4, ta, ma
+; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
 ; CHECK-NEXT:    vzext.vf8 v12, v8
 ; CHECK-NEXT:    vsll.vi v12, v12, 4
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vloxei64.v v8, (a0), v12
 ; CHECK-NEXT:    ret
 entry:
@@ -30,10 +30,10 @@ entry:
 define <vscale x 4 x i32> @test_vloxei2(ptr %ptr, <vscale x 4 x i8> %offset, i64 %vl) {
 ; CHECK-LABEL: test_vloxei2:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, zero, e64, m4, ta, ma
+; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
 ; CHECK-NEXT:    vzext.vf8 v12, v8
 ; CHECK-NEXT:    vsll.vi v12, v12, 14
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vloxei64.v v8, (a0), v12
 ; CHECK-NEXT:    ret
 entry:
@@ -50,10 +50,10 @@ entry:
 define <vscale x 4 x i32> @test_vloxei3(ptr %ptr, <vscale x 4 x i8> %offset, i64 %vl) {
 ; CHECK-LABEL: test_vloxei3:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, zero, e64, m4, ta, ma
+; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
 ; CHECK-NEXT:    vzext.vf8 v12, v8
 ; CHECK-NEXT:    vsll.vi v12, v12, 26
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vloxei64.v v8, (a0), v12
 ; CHECK-NEXT:    ret
 entry:
@@ -74,9 +74,8 @@ define <vscale x 4 x i32> @test_vloxei4(ptr %ptr, <vscale x 4 x i8> %offset, <vs
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
 ; CHECK-NEXT:    vzext.vf8 v12, v8, v0.t
-; CHECK-NEXT:    vsetvli a2, zero, e64, m4, ta, ma
 ; CHECK-NEXT:    vsll.vi v12, v12, 4
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vloxei64.v v8, (a0), v12
 ; CHECK-NEXT:    ret
 entry:
@@ -100,10 +99,10 @@ declare <vscale x 4 x i32> @llvm.riscv.vloxei.nxv4i32.nxv4i16(
 define <vscale x 4 x i32> @test_vloxei5(ptr %ptr, <vscale x 4 x i8> %offset, i64 %vl) {
 ; CHECK-LABEL: test_vloxei5:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, zero, e16, m1, ta, ma
+; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v9, v8
 ; CHECK-NEXT:    vsll.vi v10, v9, 12
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vloxei16.v v8, (a0), v10
 ; CHECK-NEXT:    ret
 entry:
@@ -123,10 +122,10 @@ define <vscale x 4 x i32> @test_vloxei6(ptr %ptr, <vscale x 4 x i7> %offset, i64
 ; CHECK-NEXT:    li a2, 127
 ; CHECK-NEXT:    vsetvli a3, zero, e8, mf2, ta, ma
 ; CHECK-NEXT:    vand.vx v8, v8, a2
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
+; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
 ; CHECK-NEXT:    vzext.vf8 v12, v8
 ; CHECK-NEXT:    vsll.vi v12, v12, 4
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vloxei64.v v8, (a0), v12
 ; CHECK-NEXT:    ret
 entry:
@@ -146,8 +145,9 @@ define <vscale x 4 x i32> @test_vloxei7(ptr %ptr, <vscale x 4 x i1> %offset, i64
 ; CHECK-NEXT:    vsetvli a2, zero, e64, m4, ta, ma
 ; CHECK-NEXT:    vmv.v.i v8, 0
 ; CHECK-NEXT:    vmerge.vim v8, v8, 1, v0
+; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
 ; CHECK-NEXT:    vsll.vi v12, v8, 2
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vloxei64.v v8, (a0), v12
 ; CHECK-NEXT:    ret
 entry:
@@ -172,10 +172,10 @@ declare <vscale x 4 x i32> @llvm.riscv.vloxei.mask.nxv4i32.nxv4i64(
 define <vscale x 4 x i32> @test_vloxei_mask(ptr %ptr, <vscale x 4 x i8> %offset, <vscale x 4 x i1> %m, i64 %vl) {
 ; CHECK-LABEL: test_vloxei_mask:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, zero, e64, m4, ta, ma
+; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
 ; CHECK-NEXT:    vzext.vf8 v12, v8
 ; CHECK-NEXT:    vsll.vi v12, v12, 4
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vloxei64.v v8, (a0), v12, v0.t
 ; CHECK-NEXT:    ret
 entry:
@@ -199,10 +199,10 @@ declare <vscale x 4 x i32> @llvm.riscv.vluxei.nxv4i32.nxv4i64(
 define <vscale x 4 x i32> @test_vluxei(ptr %ptr, <vscale x 4 x i8> %offset, i64 %vl) {
 ; CHECK-LABEL: test_vluxei:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, zero, e64, m4, ta, ma
+; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
 ; CHECK-NEXT:    vzext.vf8 v12, v8
 ; CHECK-NEXT:    vsll.vi v12, v12, 4
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vluxei64.v v8, (a0), v12
 ; CHECK-NEXT:    ret
 entry:
@@ -227,10 +227,10 @@ declare <vscale x 4 x i32> @llvm.riscv.vluxei.mask.nxv4i32.nxv4i64(
 define <vscale x 4 x i32> @test_vluxei_mask(ptr %ptr, <vscale x 4 x i8> %offset, <vscale x 4 x i1> %m, i64 %vl) {
 ; CHECK-LABEL: test_vluxei_mask:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, zero, e64, m4, ta, ma
+; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
 ; CHECK-NEXT:    vzext.vf8 v12, v8
 ; CHECK-NEXT:    vsll.vi v12, v12, 4
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vluxei64.v v8, (a0), v12, v0.t
 ; CHECK-NEXT:    ret
 entry:
@@ -254,10 +254,10 @@ declare void @llvm.riscv.vsoxei.nxv4i32.nxv4i64(
 define void @test_vsoxei(<vscale x 4 x i32> %val, ptr %ptr, <vscale x 4 x i8> %offset, i64 %vl) {
 ; CHECK-LABEL: test_vsoxei:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, zero, e64, m4, ta, ma
+; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
 ; CHECK-NEXT:    vzext.vf8 v12, v10
 ; CHECK-NEXT:    vsll.vi v12, v12, 4
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vsoxei64.v v8, (a0), v12
 ; CHECK-NEXT:    ret
 entry:
@@ -281,10 +281,10 @@ declare void @llvm.riscv.vsoxei.mask.nxv4i32.nxv4i64(
 define void @test_vsoxei_mask(<vscale x 4 x i32> %val, ptr %ptr, <vscale x 4 x i8> %offset, <vscale x 4 x i1> %m, i64 %vl) {
 ; CHECK-LABEL: test_vsoxei_mask:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, zero, e64, m4, ta, ma
+; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
 ; CHECK-NEXT:    vzext.vf8 v12, v10
 ; CHECK-NEXT:    vsll.vi v12, v12, 4
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vsoxei64.v v8, (a0), v12, v0.t
 ; CHECK-NEXT:    ret
 entry:
@@ -308,10 +308,10 @@ declare void @llvm.riscv.vsuxei.nxv4i32.nxv4i64(
 define void @test_vsuxei(<vscale x 4 x i32> %val, ptr %ptr, <vscale x 4 x i8> %offset, i64 %vl) {
 ; CHECK-LABEL: test_vsuxei:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, zero, e64, m4, ta, ma
+; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
 ; CHECK-NEXT:    vzext.vf8 v12, v10
 ; CHECK-NEXT:    vsll.vi v12, v12, 4
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vsuxei64.v v8, (a0), v12
 ; CHECK-NEXT:    ret
 entry:
@@ -335,10 +335,10 @@ declare void @llvm.riscv.vsuxei.mask.nxv4i32.nxv4i64(
 define void @test_vsuxei_mask(<vscale x 4 x i32> %val, ptr %ptr, <vscale x 4 x i8> %offset, <vscale x 4 x i1> %m, i64 %vl) {
 ; CHECK-LABEL: test_vsuxei_mask:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, zero, e64, m4, ta, ma
+; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
 ; CHECK-NEXT:    vzext.vf8 v12, v10
 ; CHECK-NEXT:    vsll.vi v12, v12, 4
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vsuxei64.v v8, (a0), v12, v0.t
 ; CHECK-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll
index eb8c58d2d37790..073a0aecb8f732 100644
--- a/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll
@@ -184,9 +184,8 @@ define <vscale x 1 x i1> @icmp_uge_vv_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1
 define <vscale x 1 x i1> @icmp_uge_vx_nxv1i8(<vscale x 1 x i8> %va, i8 %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: icmp_uge_vx_nxv1i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a2, zero, e8, mf8, ta, ma
-; CHECK-NEXT:    vmv.v.x v9, a0
 ; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT:    vmv.v.x v9, a0
 ; CHECK-NEXT:    vmsleu.vv v0, v9, v8, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x i8> poison, i8 %b, i32 0
@@ -348,9 +347,8 @@ define <vscale x 1 x i1> @icmp_sge_vv_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1
 define <vscale x 1 x i1> @icmp_sge_vx_nxv1i8(<vscale x 1 x i8> %va, i8 %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: icmp_sge_vx_nxv1i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a2, zero, e8, mf8, ta, ma
-; CHECK-NEXT:    vmv.v.x v9, a0
 ; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT:    vmv.v.x v9, a0
 ; CHECK-NEXT:    vmsle.vv v0, v9, v8, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x i8> poison, i8 %b, i32 0
@@ -470,9 +468,8 @@ define <vscale x 1 x i1> @icmp_sle_vx_nxv1i8(<vscale x 1 x i8> %va, i8 %b, <vsca
 define <vscale x 1 x i1> @icmp_sle_vx_swap_nxv1i8(<vscale x 1 x i8> %va, i8 %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: icmp_sle_vx_swap_nxv1i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a2, zero, e8, mf8, ta, ma
-; CHECK-NEXT:    vmv.v.x v9, a0
 ; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT:    vmv.v.x v9, a0
 ; CHECK-NEXT:    vmsle.vv v0, v9, v8, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x i8> poison, i8 %b, i32 0
@@ -764,9 +761,8 @@ define <vscale x 8 x i1> @icmp_uge_vv_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8
 define <vscale x 8 x i1> @icmp_uge_vx_nxv8i8(<vscale x 8 x i8> %va, i8 %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: icmp_uge_vx_nxv8i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a2, zero, e8, m1, ta, ma
-; CHECK-NEXT:    vmv.v.x v9, a0
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-NEXT:    vmv.v.x v9, a0
 ; CHECK-NEXT:    vmsleu.vv v0, v9, v8, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x i8> poison, i8 %b, i32 0
@@ -928,9 +924,8 @@ define <vscale x 8 x i1> @icmp_sge_vv_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8
 define <vscale x 8 x i1> @icmp_sge_vx_nxv8i8(<vscale x 8 x i8> %va, i8 %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: icmp_sge_vx_nxv8i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a2, zero, e8, m1, ta, ma
-; CHECK-NEXT:    vmv.v.x v9, a0
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-NEXT:    vmv.v.x v9, a0
 ; CHECK-NEXT:    vmsle.vv v0, v9, v8, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x i8> poison, i8 %b, i32 0
@@ -1050,9 +1045,8 @@ define <vscale x 8 x i1> @icmp_sle_vx_nxv8i8(<vscale x 8 x i8> %va, i8 %b, <vsca
 define <vscale x 8 x i1> @icmp_sle_vx_swap_nxv8i8(<vscale x 8 x i8> %va, i8 %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: icmp_sle_vx_swap_nxv8i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a2, zero, e8, m1, ta, ma
-; CHECK-NEXT:    vmv.v.x v9, a0
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-NEXT:    vmv.v.x v9, a0
 ; CHECK-NEXT:    vmsle.vv v0, v9, v8, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x i8> poison, i8 %b, i32 0
@@ -1375,9 +1369,8 @@ define <vscale x 1 x i1> @icmp_uge_vv_nxv1i32(<vscale x 1 x i32> %va, <vscale x
 define <vscale x 1 x i1> @icmp_uge_vx_nxv1i32(<vscale x 1 x i32> %va, i32 %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: icmp_uge_vx_nxv1i32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a2, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vmv.v.x v9, a0
 ; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-NEXT:    vmv.v.x v9, a0
 ; CHECK-NEXT:    vmsleu.vv v0, v9, v8, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x i32> poison, i32 %b, i32 0
@@ -1539,9 +1532,8 @@ define <vscale x 1 x i1> @icmp_sge_vv_nxv1i32(<vscale x 1 x i32> %va, <vscale x
 define <vscale x 1 x i1> @icmp_sge_vx_nxv1i32(<vscale x 1 x i32> %va, i32 %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: icmp_sge_vx_nxv1i32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a2, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vmv.v.x v9, a0
 ; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-NEXT:    vmv.v.x v9, a0
 ; CHECK-NEXT:    vmsle.vv v0, v9, v8, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x i32> poison, i32 %b, i32 0
@@ -1661,9 +1653,8 @@ define <vscale x 1 x i1> @icmp_sle_vx_nxv1i32(<vscale x 1 x i32> %va, i32 %b, <v
 define <vscale x 1 x i1> @icmp_sle_vx_swap_nxv1i32(<vscale x 1 x i32> %va, i32 %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: icmp_sle_vx_swap_nxv1i32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a2, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vmv.v.x v9, a0
 ; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-NEXT:    vmv.v.x v9, a0
 ; CHECK-NEXT:    vmsle.vv v0, v9, v8, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x i32> poison, i32 %b, i32 0
@@ -1885,9 +1876,8 @@ define <vscale x 8 x i1> @icmp_uge_vv_nxv8i32(<vscale x 8 x i32> %va, <vscale x
 define <vscale x 8 x i1> @icmp_uge_vx_nxv8i32(<vscale x 8 x i32> %va, i32 %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: icmp_uge_vx_nxv8i32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmv.v.x v16, a0
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-NEXT:    vmv.v.x v16, a0
 ; CHECK-NEXT:    vmsleu.vv v12, v16, v8, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v12
 ; CHECK-NEXT:    ret
@@ -2064,9 +2054,8 @@ define <vscale x 8 x i1> @icmp_sge_vv_nxv8i32(<vscale x 8 x i32> %va, <vscale x
 define <vscale x 8 x i1> @icmp_sge_vx_nxv8i32(<vscale x 8 x i32> %va, i32 %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: icmp_sge_vx_nxv8i32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmv.v.x v16, a0
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-NEXT:    vmv.v.x v16, a0
 ; CHECK-NEXT:    vmsle.vv v12, v16, v8, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v12
 ; CHECK-NEXT:    ret
@@ -2197,9 +2186,8 @@ define <vscale x 8 x i1> @icmp_sle_vx_nxv8i32(<vscale x 8 x i32> %va, i32 %b, <v
 define <vscale x 8 x i1> @icmp_sle_vx_swap_nxv8i32(<vscale x 8 x i32> %va, i32 %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: icmp_sle_vx_swap_nxv8i32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmv.v.x v16, a0
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-NEXT:    vmv.v.x v16, a0
 ; CHECK-NEXT:    vmsle.vv v12, v16, v8, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v12
 ; CHECK-NEXT:    ret
@@ -2633,9 +2621,8 @@ define <vscale x 1 x i1> @icmp_uge_vx_nxv1i64(<vscale x 1 x i64> %va, i64 %b, <v
 ;
 ; RV64-LABEL: icmp_uge_vx_nxv1i64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e64, m1, ta, ma
-; RV64-NEXT:    vmv.v.x v9, a0
 ; RV64-NEXT:    vsetvli zero, a1, e64, m1, ta, ma
+; RV64-NEXT:    vmv.v.x v9, a0
 ; RV64-NEXT:    vmsleu.vv v0, v9, v8, v0.t
 ; RV64-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x i64> poison, i64 %b, i32 0
@@ -2881,9 +2868,8 @@ define <vscale x 1 x i1> @icmp_sge_vx_nxv1i64(<vscale x 1 x i64> %va, i64 %b, <v
 ;
 ; RV64-LABEL: icmp_sge_vx_nxv1i64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e64, m1, ta, ma
-; RV64-NEXT:    vmv.v.x v9, a0
 ; RV64-NEXT:    vsetvli zero, a1, e64, m1, ta, ma
+; RV64-NEXT:    vmv.v.x v9, a0
 ; RV64-NEXT:    vmsle.vv v0, v9, v8, v0.t
 ; RV64-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x i64> poison, i64 %b, i32 0
@@ -3073,9 +3059,8 @@ define <vscale x 1 x i1> @icmp_sle_vx_swap_nxv1i64(<vscale x 1 x i64> %va, i64 %
 ;
 ; RV64-LABEL: icmp_sle_vx_swap_nxv1i64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e64, m1, ta, ma
-; RV64-NEXT:    vmv.v.x v9, a0
 ; RV64-NEXT:    vsetvli zero, a1, e64, m1, ta, ma
+; RV64-NEXT:    vmv.v.x v9, a0
 ; RV64-NEXT:    vmsle.vv v0, v9, v8, v0.t
 ; RV64-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x i64> poison, i64 %b, i32 0
@@ -3402,9 +3387,8 @@ define <vscale x 8 x i1> @icmp_uge_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <v
 ;
 ; RV64-LABEL: icmp_uge_vx_nxv8i64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
-; RV64-NEXT:    vmv.v.x v24, a0
 ; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vmv.v.x v24, a0
 ; RV64-NEXT:    vmsleu.vv v16, v24, v8, v0.t
 ; RV64-NEXT:    vmv1r.v v0, v16
 ; RV64-NEXT:    ret
@@ -3671,9 +3655,8 @@ define <vscale x 8 x i1> @icmp_sge_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <v
 ;
 ; RV64-LABEL: icmp_sge_vx_nxv8i64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
-; RV64-NEXT:    vmv.v.x v24, a0
 ; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vmv.v.x v24, a0
 ; RV64-NEXT:    vmsle.vv v16, v24, v8, v0.t
 ; RV64-NEXT:    vmv1r.v v0, v16
 ; RV64-NEXT:    ret
@@ -3879,9 +3862,8 @@ define <vscale x 8 x i1> @icmp_sle_vx_swap_nxv8i64(<vscale x 8 x i64> %va, i64 %
 ;
 ; RV64-LABEL: icmp_sle_vx_swap_nxv8i64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
-; RV64-NEXT:    vmv.v.x v24, a0
 ; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vmv.v.x v24, a0
 ; RV64-NEXT:    vmsle.vv v16, v24, v8, v0.t
 ; RV64-NEXT:    vmv1r.v v0, v16
 ; RV64-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/vdiv-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vdiv-vp.ll
index a4b7ca7f39768f..d276631c5883e8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vdiv-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vdiv-vp.ll
@@ -12,9 +12,7 @@ define <vscale x 8 x i7> @vdiv_vx_nxv8i7(<vscale x 8 x i7> %a, i7 signext %b, <v
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
 ; CHECK-NEXT:    vsll.vi v8, v8, 1, v0.t
 ; CHECK-NEXT:    vsra.vi v8, v8, 1, v0.t
-; CHECK-NEXT:    vsetvli a2, zero, e8, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
-; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
 ; CHECK-NEXT:    vsll.vi v9, v9, 1, v0.t
 ; CHECK-NEXT:    vsra.vi v9, v9, 1, v0.t
 ; CHECK-NEXT:    vdiv.vv v8, v8, v9, v0.t
diff --git a/llvm/test/CodeGen/RISCV/rvv/vdivu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vdivu-vp.ll
index 67c3f9dbf2869a..7794e031d5c677 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vdivu-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vdivu-vp.ll
@@ -12,9 +12,7 @@ define <vscale x 8 x i7> @vdivu_vx_nxv8i7(<vscale x 8 x i7> %a, i7 signext %b, <
 ; CHECK-NEXT:    li a2, 127
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
 ; CHECK-NEXT:    vand.vx v8, v8, a2, v0.t
-; CHECK-NEXT:    vsetvli a3, zero, e8, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
-; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
 ; CHECK-NEXT:    vand.vx v9, v9, a2, v0.t
 ; CHECK-NEXT:    vdivu.vv v8, v8, v9, v0.t
 ; CHECK-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwmacc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfwmacc-vp.ll
index 80ada4670562d7..d4930bd2ae0396 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwmacc-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwmacc-vp.ll
@@ -143,9 +143,8 @@ define <vscale x 1 x float> @vfmacc_vf_nxv1f32(<vscale x 1 x half> %va, half %b,
 ; ZVFHMIN-LABEL: vfmacc_vf_nxv1f32:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
@@ -170,9 +169,8 @@ define <vscale x 1 x float> @vfmacc_vf_nxv1f32_commute(<vscale x 1 x half> %va,
 ; ZVFHMIN-LABEL: vfmacc_vf_nxv1f32_commute:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v11, a1
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vmv.v.x v11, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v11, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
@@ -198,9 +196,8 @@ define <vscale x 1 x float> @vfmacc_vf_nxv1f32_unmasked(<vscale x 1 x half> %va,
 ; ZVFHMIN-LABEL: vfmacc_vf_nxv1f32_unmasked:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
@@ -225,9 +222,8 @@ define <vscale x 1 x float> @vfmacc_vf_nxv1f32_tu(<vscale x 1 x half> %va, half
 ; ZVFHMIN-LABEL: vfmacc_vf_nxv1f32_tu:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, tu, mu
@@ -254,9 +250,8 @@ define <vscale x 1 x float> @vfmacc_vf_nxv1f32_commute_tu(<vscale x 1 x half> %v
 ; ZVFHMIN-LABEL: vfmacc_vf_nxv1f32_commute_tu:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, tu, mu
@@ -283,9 +278,8 @@ define <vscale x 1 x float> @vfmacc_vf_nxv1f32_unmasked_tu(<vscale x 1 x half> %
 ; ZVFHMIN-LABEL: vfmacc_vf_nxv1f32_unmasked_tu:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, tu, ma
@@ -362,9 +356,8 @@ define <vscale x 2 x float> @vfmacc_vf_nxv2f32(<vscale x 2 x half> %va, half %b,
 ; ZVFHMIN-LABEL: vfmacc_vf_nxv2f32:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
@@ -389,9 +382,8 @@ define <vscale x 2 x float> @vfmacc_vf_nxv2f32_unmasked(<vscale x 2 x half> %va,
 ; ZVFHMIN-LABEL: vfmacc_vf_nxv2f32_unmasked:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
@@ -468,9 +460,8 @@ define <vscale x 4 x float> @vfmacc_vf_nxv4f32(<vscale x 4 x half> %va, half %b,
 ; ZVFHMIN-LABEL: vfmacc_vf_nxv4f32:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v12, a1
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vmv.v.x v12, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v8, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
@@ -495,9 +486,8 @@ define <vscale x 4 x float> @vfmacc_vf_nxv4f32_unmasked(<vscale x 4 x half> %va,
 ; ZVFHMIN-LABEL: vfmacc_vf_nxv4f32_unmasked:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v12, a1
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vmv.v.x v12, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
@@ -574,9 +564,8 @@ define <vscale x 8 x float> @vfmacc_vf_nxv8f32(<vscale x 8 x half> %va, half %b,
 ; ZVFHMIN-LABEL: vfmacc_vf_nxv8f32:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v16, a1
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    vmv.v.x v16, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v8, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
@@ -601,9 +590,8 @@ define <vscale x 8 x float> @vfmacc_vf_nxv8f32_unmasked(<vscale x 8 x half> %va,
 ; ZVFHMIN-LABEL: vfmacc_vf_nxv8f32_unmasked:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v16, a1
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    vmv.v.x v16, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
@@ -694,9 +682,8 @@ define <vscale x 16 x float> @vfmacc_vf_nxv16f32(<vscale x 16 x half> %va, half
 ; ZVFHMIN-LABEL: vfmacc_vf_nxv16f32:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v4, a1
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vmv.v.x v4, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v8, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v4, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
@@ -721,9 +708,8 @@ define <vscale x 16 x float> @vfmacc_vf_nxv16f32_unmasked(<vscale x 16 x half> %
 ; ZVFHMIN-LABEL: vfmacc_vf_nxv16f32_unmasked:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v24, a1
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vmv.v.x v24, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v24
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwmsac-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfwmsac-vp.ll
index c92a79e49c1642..94b80075ac14c5 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwmsac-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwmsac-vp.ll
@@ -120,9 +120,8 @@ define <vscale x 1 x float> @vmfsac_vf_nxv1f32(<vscale x 1 x half> %a, half %b,
 ; ZVFHMIN-LABEL: vmfsac_vf_nxv1f32:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
@@ -148,9 +147,8 @@ define <vscale x 1 x float> @vmfsac_vf_nxv1f32_commute(<vscale x 1 x half> %a, h
 ; ZVFHMIN-LABEL: vmfsac_vf_nxv1f32_commute:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v11, a1
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vmv.v.x v11, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v11, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
@@ -177,9 +175,8 @@ define <vscale x 1 x float> @vmfsac_vf_nxv1f32_unmasked(<vscale x 1 x half> %a,
 ; ZVFHMIN-LABEL: vmfsac_vf_nxv1f32_unmasked:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
@@ -255,9 +252,8 @@ define <vscale x 2 x float> @vmfsac_vf_nxv2f32(<vscale x 2 x half> %a, half %b,
 ; ZVFHMIN-LABEL: vmfsac_vf_nxv2f32:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
@@ -283,9 +279,8 @@ define <vscale x 2 x float> @vmfsac_vf_nxv2f32_commute(<vscale x 2 x half> %a, h
 ; ZVFHMIN-LABEL: vmfsac_vf_nxv2f32_commute:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v11, a1
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vmv.v.x v11, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v11, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
@@ -312,9 +307,8 @@ define <vscale x 2 x float> @vmfsac_vf_nxv2f32_unmasked(<vscale x 2 x half> %a,
 ; ZVFHMIN-LABEL: vmfsac_vf_nxv2f32_unmasked:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
@@ -392,9 +386,8 @@ define <vscale x 4 x float> @vmfsac_vf_nxv4f32(<vscale x 4 x half> %a, half %b,
 ; ZVFHMIN-LABEL: vmfsac_vf_nxv4f32:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v12, a1
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vmv.v.x v12, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v8, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
@@ -420,9 +413,8 @@ define <vscale x 4 x float> @vmfsac_vf_nxv4f32_commute(<vscale x 4 x half> %a, h
 ; ZVFHMIN-LABEL: vmfsac_vf_nxv4f32_commute:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v9, a1
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vmv.v.x v9, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v9, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
@@ -449,9 +441,8 @@ define <vscale x 4 x float> @vmfsac_vf_nxv4f32_unmasked(<vscale x 4 x half> %a,
 ; ZVFHMIN-LABEL: vmfsac_vf_nxv4f32_unmasked:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v12, a1
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vmv.v.x v12, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
@@ -529,9 +520,8 @@ define <vscale x 8 x float> @vmfsac_vf_nxv8f32(<vscale x 8 x half> %a, half %b,
 ; ZVFHMIN-LABEL: vmfsac_vf_nxv8f32:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v16, a1
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    vmv.v.x v16, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v8, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
@@ -557,9 +547,8 @@ define <vscale x 8 x float> @vmfsac_vf_nxv8f32_commute(<vscale x 8 x half> %a, h
 ; ZVFHMIN-LABEL: vmfsac_vf_nxv8f32_commute:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
@@ -586,9 +575,8 @@ define <vscale x 8 x float> @vmfsac_vf_nxv8f32_unmasked(<vscale x 8 x half> %a,
 ; ZVFHMIN-LABEL: vmfsac_vf_nxv8f32_unmasked:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v16, a1
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    vmv.v.x v16, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwnmacc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfwnmacc-vp.ll
index 6ea58a4e768736..3fc58acd504e52 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwnmacc-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwnmacc-vp.ll
@@ -71,9 +71,8 @@ define <vscale x 1 x float> @vfnmacc_vf_nxv1f32(<vscale x 1 x half> %a, half %b,
 ; ZVFHMIN-LABEL: vfnmacc_vf_nxv1f32:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
@@ -101,9 +100,8 @@ define <vscale x 1 x float> @vfnmacc_vf_nxv1f32_commute(<vscale x 1 x half> %a,
 ; ZVFHMIN-LABEL: vfnmacc_vf_nxv1f32_commute:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v11, a1
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vmv.v.x v11, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v11, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
@@ -131,9 +129,8 @@ define <vscale x 1 x float> @vfnmacc_vf_nxv1f32_unmasked(<vscale x 1 x half> %a,
 ; ZVFHMIN-LABEL: vfnmacc_vf_nxv1f32_unmasked:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
@@ -212,9 +209,8 @@ define <vscale x 2 x float> @vfnmacc_vf_nxv2f32(<vscale x 2 x half> %a, half %b,
 ; ZVFHMIN-LABEL: vfnmacc_vf_nxv2f32:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
@@ -242,9 +238,8 @@ define <vscale x 2 x float> @vfnmacc_vf_nxv2f32_commute(<vscale x 2 x half> %a,
 ; ZVFHMIN-LABEL: vfnmacc_vf_nxv2f32_commute:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v11, a1
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vmv.v.x v11, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v11, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
@@ -272,9 +267,8 @@ define <vscale x 2 x float> @vfnmacc_vf_nxv2f32_unmasked(<vscale x 2 x half> %a,
 ; ZVFHMIN-LABEL: vfnmacc_vf_nxv2f32_unmasked:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
@@ -355,9 +349,8 @@ define <vscale x 4 x float> @vfnmacc_vf_nxv4f32(<vscale x 4 x half> %a, half %b,
 ; ZVFHMIN-LABEL: vfnmacc_vf_nxv4f32:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v12, a1
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vmv.v.x v12, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v8, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
@@ -385,9 +378,8 @@ define <vscale x 4 x float> @vfnmacc_vf_nxv4f32_commute(<vscale x 4 x half> %a,
 ; ZVFHMIN-LABEL: vfnmacc_vf_nxv4f32_commute:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v9, a1
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vmv.v.x v9, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v9, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
@@ -415,9 +407,8 @@ define <vscale x 4 x float> @vfnmacc_vf_nxv4f32_unmasked(<vscale x 4 x half> %a,
 ; ZVFHMIN-LABEL: vfnmacc_vf_nxv4f32_unmasked:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v12, a1
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vmv.v.x v12, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
@@ -498,9 +489,8 @@ define <vscale x 8 x float> @vfnmacc_vf_nxv8f32(<vscale x 8 x half> %a, half %b,
 ; ZVFHMIN-LABEL: vfnmacc_vf_nxv8f32:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v16, a1
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    vmv.v.x v16, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v8, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
@@ -528,9 +518,8 @@ define <vscale x 8 x float> @vfnmacc_vf_nxv8f32_commute(<vscale x 8 x half> %a,
 ; ZVFHMIN-LABEL: vfnmacc_vf_nxv8f32_commute:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
@@ -558,9 +547,8 @@ define <vscale x 8 x float> @vfnmacc_vf_nxv8f32_unmasked(<vscale x 8 x half> %a,
 ; ZVFHMIN-LABEL: vfnmacc_vf_nxv8f32_unmasked:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v16, a1
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    vmv.v.x v16, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
@@ -655,9 +643,8 @@ define <vscale x 16 x float> @vfnmacc_vf_nxv16f32(<vscale x 16 x half> %a, half
 ; ZVFHMIN-LABEL: vfnmacc_vf_nxv16f32:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v4, a1
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vmv.v.x v4, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v8, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v4, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
@@ -685,9 +672,8 @@ define <vscale x 16 x float> @vfnmacc_vf_nxv16f32_commute(<vscale x 16 x half> %
 ; ZVFHMIN-LABEL: vfnmacc_vf_nxv16f32_commute:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v4, a1
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vmv.v.x v4, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v8, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v4, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
@@ -715,9 +701,8 @@ define <vscale x 16 x float> @vfnmacc_vf_nxv16f32_unmasked(<vscale x 16 x half>
 ; ZVFHMIN-LABEL: vfnmacc_vf_nxv16f32_unmasked:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v24, a1
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vmv.v.x v24, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v24
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwnmsac-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfwnmsac-vp.ll
index 0afbe58038c76f..692a22bde48822 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwnmsac-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwnmsac-vp.ll
@@ -69,9 +69,8 @@ define <vscale x 1 x float> @vfnmsac_vf_nxv1f32(<vscale x 1 x half> %a, half %b,
 ; ZVFHMIN-LABEL: vfnmsac_vf_nxv1f32:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
@@ -97,9 +96,8 @@ define <vscale x 1 x float> @vfnmsac_vf_nxv1f32_commute(<vscale x 1 x half> %a,
 ; ZVFHMIN-LABEL: vfnmsac_vf_nxv1f32_commute:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v11, a1
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vmv.v.x v11, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v11, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
@@ -126,9 +124,8 @@ define <vscale x 1 x float> @vfnmsac_vf_nxv1f32_unmasked(<vscale x 1 x half> %a,
 ; ZVFHMIN-LABEL: vfnmsac_vf_nxv1f32_unmasked:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
@@ -204,9 +201,8 @@ define <vscale x 2 x float> @vfnmsac_vf_nxv2f32(<vscale x 2 x half> %a, half %b,
 ; ZVFHMIN-LABEL: vfnmsac_vf_nxv2f32:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
@@ -232,9 +228,8 @@ define <vscale x 2 x float> @vfnmsac_vf_nxv2f32_commute(<vscale x 2 x half> %a,
 ; ZVFHMIN-LABEL: vfnmsac_vf_nxv2f32_commute:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v11, a1
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vmv.v.x v11, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v11, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
@@ -261,9 +256,8 @@ define <vscale x 2 x float> @vfnmsac_vf_nxv2f32_unmasked(<vscale x 2 x half> %a,
 ; ZVFHMIN-LABEL: vfnmsac_vf_nxv2f32_unmasked:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
@@ -341,9 +335,8 @@ define <vscale x 4 x float> @vfnmsac_vf_nxv4f32(<vscale x 4 x half> %a, half %b,
 ; ZVFHMIN-LABEL: vfnmsac_vf_nxv4f32:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v12, a1
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vmv.v.x v12, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v8, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
@@ -369,9 +362,8 @@ define <vscale x 4 x float> @vfnmsac_vf_nxv4f32_commute(<vscale x 4 x half> %a,
 ; ZVFHMIN-LABEL: vfnmsac_vf_nxv4f32_commute:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v9, a1
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vmv.v.x v9, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v9, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
@@ -398,9 +390,8 @@ define <vscale x 4 x float> @vfnmsac_vf_nxv4f32_unmasked(<vscale x 4 x half> %a,
 ; ZVFHMIN-LABEL: vfnmsac_vf_nxv4f32_unmasked:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v12, a1
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vmv.v.x v12, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
@@ -478,9 +469,8 @@ define <vscale x 8 x float> @vfnmsac_vf_nxv8f32(<vscale x 8 x half> %a, half %b,
 ; ZVFHMIN-LABEL: vfnmsac_vf_nxv8f32:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v16, a1
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    vmv.v.x v16, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v8, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
@@ -506,9 +496,8 @@ define <vscale x 8 x float> @vfnmsac_vf_nxv8f32_commute(<vscale x 8 x half> %a,
 ; ZVFHMIN-LABEL: vfnmsac_vf_nxv8f32_commute:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
@@ -535,9 +524,8 @@ define <vscale x 8 x float> @vfnmsac_vf_nxv8f32_unmasked(<vscale x 8 x half> %a,
 ; ZVFHMIN-LABEL: vfnmsac_vf_nxv8f32_unmasked:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v16, a1
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    vmv.v.x v16, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
@@ -629,9 +617,8 @@ define <vscale x 16 x float> @vfnmsac_vf_nxv16f32(<vscale x 16 x half> %a, half
 ; ZVFHMIN-LABEL: vfnmsac_vf_nxv16f32:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v4, a1
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vmv.v.x v4, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v8, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v4, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
@@ -657,9 +644,8 @@ define <vscale x 16 x float> @vfnmsac_vf_nxv16f32_commute(<vscale x 16 x half> %
 ; ZVFHMIN-LABEL: vfnmsac_vf_nxv16f32_commute:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v4, a1
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vmv.v.x v4, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v8, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v4, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
@@ -686,9 +672,8 @@ define <vscale x 16 x float> @vfnmsac_vf_nxv16f32_unmasked(<vscale x 16 x half>
 ; ZVFHMIN-LABEL: vfnmsac_vf_nxv16f32_unmasked:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v24, a1
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vmv.v.x v24, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v24
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.ll
index 2b3c5417b15b55..4f24da3d861afe 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.ll
@@ -36,9 +36,8 @@ declare <vscale x 2 x i16> @llvm.riscv.vnsrl.nxv2i16.nxv2i32.nxv2i16(
 define <vscale x 2 x i16> @intrinsic_vnsrl_wv_nxv2i16_nxv2i32_nxv2i16(<vscale x 2 x i16> %a, <vscale x 2 x i16> %b, iXLen %2, <vscale x 2 x i32> %3, <vscale x 2 x i32> %4, <vscale x 2 x i16> %z) nounwind {
 ; CHECK-LABEL: intrinsic_vnsrl_wv_nxv2i16_nxv2i32_nxv2i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vwadd.vv v10, v8, v9
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT:    vwadd.vv v10, v8, v9
 ; CHECK-NEXT:    vnsrl.wv v8, v10, v12
 ; CHECK-NEXT:    ret
 entry:
@@ -63,10 +62,9 @@ declare <vscale x 2 x i16> @llvm.riscv.vnclip.nxv2i16.nxv2i32.nxv2i16(
 define <vscale x 2 x i16> @vnclip(<vscale x 2 x i16> %a, <vscale x 2 x i16> %b, iXLen %2, <vscale x 2 x i32> %3, <vscale x 2 x i32> %4, <vscale x 2 x i16> %z) nounwind {
 ; CHECK-LABEL: vnclip:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; CHECK-NEXT:    vwadd.vv v10, v8, v9
 ; CHECK-NEXT:    csrwi vxrm, 0
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; CHECK-NEXT:    vnclip.wv v8, v10, v12
 ; CHECK-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmax-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vmax-vp.ll
index f65e708f5303cc..cc02f54cda21a1 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmax-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmax-vp.ll
@@ -12,9 +12,7 @@ define <vscale x 8 x i7> @vmax_vx_nxv8i7(<vscale x 8 x i7> %a, i7 signext %b, <v
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
 ; CHECK-NEXT:    vsll.vi v8, v8, 1, v0.t
 ; CHECK-NEXT:    vsra.vi v8, v8, 1, v0.t
-; CHECK-NEXT:    vsetvli a2, zero, e8, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
-; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
 ; CHECK-NEXT:    vsll.vi v9, v9, 1, v0.t
 ; CHECK-NEXT:    vsra.vi v9, v9, 1, v0.t
 ; CHECK-NEXT:    vmax.vv v8, v8, v9, v0.t
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmaxu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vmaxu-vp.ll
index df1ad58e5ecbde..45dc63442f157f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmaxu-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmaxu-vp.ll
@@ -12,9 +12,7 @@ define <vscale x 8 x i7> @vmaxu_vx_nxv8i7(<vscale x 8 x i7> %a, i7 signext %b, <
 ; CHECK-NEXT:    li a2, 127
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
 ; CHECK-NEXT:    vand.vx v8, v8, a2, v0.t
-; CHECK-NEXT:    vsetvli a3, zero, e8, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
-; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
 ; CHECK-NEXT:    vand.vx v9, v9, a2, v0.t
 ; CHECK-NEXT:    vmaxu.vv v8, v8, v9, v0.t
 ; CHECK-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll
index 0bf0638633aa45..f1edd99aa0b0dc 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll
@@ -12,9 +12,7 @@ define <vscale x 8 x i7> @vmin_vx_nxv8i7(<vscale x 8 x i7> %a, i7 signext %b, <v
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
 ; CHECK-NEXT:    vsll.vi v8, v8, 1, v0.t
 ; CHECK-NEXT:    vsra.vi v8, v8, 1, v0.t
-; CHECK-NEXT:    vsetvli a2, zero, e8, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
-; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
 ; CHECK-NEXT:    vsll.vi v9, v9, 1, v0.t
 ; CHECK-NEXT:    vsra.vi v9, v9, 1, v0.t
 ; CHECK-NEXT:    vmin.vv v8, v8, v9, v0.t
diff --git a/llvm/test/CodeGen/RISCV/rvv/vminu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vminu-vp.ll
index 2acebdf2e646d4..9929cc3b784556 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vminu-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vminu-vp.ll
@@ -12,9 +12,7 @@ define <vscale x 8 x i7> @vminu_vx_nxv8i7(<vscale x 8 x i7> %a, i7 signext %b, <
 ; CHECK-NEXT:    li a2, 127
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
 ; CHECK-NEXT:    vand.vx v8, v8, a2, v0.t
-; CHECK-NEXT:    vsetvli a3, zero, e8, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
-; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
 ; CHECK-NEXT:    vand.vx v9, v9, a2, v0.t
 ; CHECK-NEXT:    vminu.vv v8, v8, v9, v0.t
 ; CHECK-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmul-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vmul-vp.ll
index 51026cbcb8c4bf..0adee0157c8f38 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmul-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmul-vp.ll
@@ -1440,11 +1440,10 @@ define <vscale x 8 x i64> @vmul_vadd_vx_nxv8i64_unmasked(<vscale x 8 x i64> %va,
 ; CHECK-LABEL: vmul_vadd_vx_nxv8i64_unmasked:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a1, 21
-; CHECK-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmv.v.x v16, a1
-; CHECK-NEXT:    li a1, 7
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmadd.vx v8, a1, v16
+; CHECK-NEXT:    vmv.v.x v16, a1
+; CHECK-NEXT:    li a0, 7
+; CHECK-NEXT:    vmadd.vx v8, a0, v16
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll
index 84c8321b5b9342..9d45b0cd078818 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll
@@ -235,17 +235,17 @@ define <vscale x 8 x i8> @vpgather_nxv8i8(<vscale x 8 x ptr> %ptrs, <vscale x 8
 define <vscale x 8 x i8> @vpgather_baseidx_nxv8i8(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_nxv8i8:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf4 v12, v8
-; RV32-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
+; RV32-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_nxv8i8:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf8 v16, v8
-; RV64-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
+; RV64-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
 ; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds i8, ptr %base, <vscale x 8 x i8> %idxs
@@ -268,18 +268,18 @@ define <vscale x 32 x i8> @vpgather_baseidx_nxv32i8(ptr %base, <vscale x 32 x i8
 ; RV32-NEXT:    srli a3, a3, 2
 ; RV32-NEXT:    vsetvli a5, zero, e8, mf2, ta, ma
 ; RV32-NEXT:    vslidedown.vx v0, v0, a3
-; RV32-NEXT:    vsetvli a3, zero, e32, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, a4, e32, m8, ta, ma
 ; RV32-NEXT:    vsext.vf4 v16, v10
-; RV32-NEXT:    vsetvli zero, a4, e8, m2, ta, ma
+; RV32-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
 ; RV32-NEXT:    vluxei32.v v10, (a0), v16, v0.t
 ; RV32-NEXT:    bltu a1, a2, .LBB12_2
 ; RV32-NEXT:  # %bb.1:
 ; RV32-NEXT:    mv a1, a2
 ; RV32-NEXT:  .LBB12_2:
-; RV32-NEXT:    vsetvli a2, zero, e32, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
 ; RV32-NEXT:    vsext.vf4 v16, v8
 ; RV32-NEXT:    vmv1r.v v0, v12
-; RV32-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
+; RV32-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
@@ -302,18 +302,18 @@ define <vscale x 32 x i8> @vpgather_baseidx_nxv32i8(ptr %base, <vscale x 32 x i8
 ; RV64-NEXT:    srli a4, a2, 3
 ; RV64-NEXT:    vsetvli a7, zero, e8, mf4, ta, ma
 ; RV64-NEXT:    vslidedown.vx v0, v13, a4
-; RV64-NEXT:    vsetvli a7, zero, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, a6, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf8 v16, v11
-; RV64-NEXT:    vsetvli zero, a6, e8, m1, ta, ma
+; RV64-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
 ; RV64-NEXT:    vluxei64.v v11, (a0), v16, v0.t
 ; RV64-NEXT:    bltu a5, a2, .LBB12_2
 ; RV64-NEXT:  # %bb.1:
 ; RV64-NEXT:    mv a5, a2
 ; RV64-NEXT:  .LBB12_2:
-; RV64-NEXT:    vsetvli a6, zero, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, a5, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf8 v16, v10
 ; RV64-NEXT:    vmv1r.v v0, v13
-; RV64-NEXT:    vsetvli zero, a5, e8, m1, ta, ma
+; RV64-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
 ; RV64-NEXT:    vluxei64.v v10, (a0), v16, v0.t
 ; RV64-NEXT:    bltu a1, a3, .LBB12_4
 ; RV64-NEXT:  # %bb.3:
@@ -325,18 +325,18 @@ define <vscale x 32 x i8> @vpgather_baseidx_nxv32i8(ptr %base, <vscale x 32 x i8
 ; RV64-NEXT:    and a3, a5, a3
 ; RV64-NEXT:    vsetvli a5, zero, e8, mf4, ta, ma
 ; RV64-NEXT:    vslidedown.vx v0, v12, a4
-; RV64-NEXT:    vsetvli a4, zero, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, a3, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf8 v16, v9
-; RV64-NEXT:    vsetvli zero, a3, e8, m1, ta, ma
+; RV64-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
 ; RV64-NEXT:    vluxei64.v v9, (a0), v16, v0.t
 ; RV64-NEXT:    bltu a1, a2, .LBB12_6
 ; RV64-NEXT:  # %bb.5:
 ; RV64-NEXT:    mv a1, a2
 ; RV64-NEXT:  .LBB12_6:
-; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf8 v16, v8
 ; RV64-NEXT:    vmv1r.v v0, v12
-; RV64-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
+; RV64-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
 ; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds i8, ptr %base, <vscale x 32 x i8> %idxs
@@ -529,19 +529,19 @@ define <vscale x 8 x i16> @vpgather_nxv8i16(<vscale x 8 x ptr> %ptrs, <vscale x
 define <vscale x 8 x i16> @vpgather_baseidx_nxv8i8_nxv8i16(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_nxv8i8_nxv8i16:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf4 v12, v8
 ; RV32-NEXT:    vadd.vv v12, v12, v12
-; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
+; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_nxv8i8_nxv8i16:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf8 v16, v8
 ; RV64-NEXT:    vadd.vv v16, v16, v16
-; RV64-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
+; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i8> %idxs
@@ -552,19 +552,19 @@ define <vscale x 8 x i16> @vpgather_baseidx_nxv8i8_nxv8i16(ptr %base, <vscale x
 define <vscale x 8 x i16> @vpgather_baseidx_sext_nxv8i8_nxv8i16(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8i16:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf4 v12, v8
 ; RV32-NEXT:    vadd.vv v12, v12, v12
-; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
+; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8i16:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf8 v16, v8
 ; RV64-NEXT:    vadd.vv v16, v16, v16
-; RV64-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
+; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
@@ -576,17 +576,17 @@ define <vscale x 8 x i16> @vpgather_baseidx_sext_nxv8i8_nxv8i16(ptr %base, <vsca
 define <vscale x 8 x i16> @vpgather_baseidx_zext_nxv8i8_nxv8i16(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8i16:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e8, m1, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
 ; RV32-NEXT:    vwaddu.vv v10, v8, v8
-; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
+; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; RV32-NEXT:    vluxei16.v v8, (a0), v10, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8i16:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e8, m1, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
 ; RV64-NEXT:    vwaddu.vv v10, v8, v8
-; RV64-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
+; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; RV64-NEXT:    vluxei16.v v8, (a0), v10, v0.t
 ; RV64-NEXT:    ret
   %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
@@ -598,18 +598,17 @@ define <vscale x 8 x i16> @vpgather_baseidx_zext_nxv8i8_nxv8i16(ptr %base, <vsca
 define <vscale x 8 x i16> @vpgather_baseidx_nxv8i16(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_nxv8i16:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
-; RV32-NEXT:    vwadd.vv v12, v8, v8
 ; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
+; RV32-NEXT:    vwadd.vv v12, v8, v8
 ; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_nxv8i16:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf4 v16, v8
 ; RV64-NEXT:    vadd.vv v16, v16, v16
-; RV64-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
+; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i16> %idxs
@@ -755,19 +754,18 @@ define <vscale x 8 x i32> @vpgather_nxv8i32(<vscale x 8 x ptr> %ptrs, <vscale x
 define <vscale x 8 x i32> @vpgather_baseidx_nxv8i8_nxv8i32(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_nxv8i8_nxv8i32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf4 v12, v8
 ; RV32-NEXT:    vsll.vi v8, v12, 2
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v8, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_nxv8i8_nxv8i32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf8 v16, v8
 ; RV64-NEXT:    vsll.vi v16, v16, 2
-; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i8> %idxs
@@ -778,19 +776,18 @@ define <vscale x 8 x i32> @vpgather_baseidx_nxv8i8_nxv8i32(ptr %base, <vscale x
 define <vscale x 8 x i32> @vpgather_baseidx_sext_nxv8i8_nxv8i32(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8i32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf4 v12, v8
 ; RV32-NEXT:    vsll.vi v8, v12, 2
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v8, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8i32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf8 v16, v8
 ; RV64-NEXT:    vsll.vi v16, v16, 2
-; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i32>
@@ -802,19 +799,19 @@ define <vscale x 8 x i32> @vpgather_baseidx_sext_nxv8i8_nxv8i32(ptr %base, <vsca
 define <vscale x 8 x i32> @vpgather_baseidx_zext_nxv8i8_nxv8i32(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8i32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
 ; RV32-NEXT:    vzext.vf2 v10, v8
 ; RV32-NEXT:    vsll.vi v12, v10, 2
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vluxei16.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8i32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
 ; RV64-NEXT:    vzext.vf2 v10, v8
 ; RV64-NEXT:    vsll.vi v12, v10, 2
-; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; RV64-NEXT:    vluxei16.v v8, (a0), v12, v0.t
 ; RV64-NEXT:    ret
   %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i32>
@@ -826,19 +823,18 @@ define <vscale x 8 x i32> @vpgather_baseidx_zext_nxv8i8_nxv8i32(ptr %base, <vsca
 define <vscale x 8 x i32> @vpgather_baseidx_nxv8i16_nxv8i32(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_nxv8i16_nxv8i32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf2 v12, v8
 ; RV32-NEXT:    vsll.vi v8, v12, 2
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v8, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_nxv8i16_nxv8i32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf4 v16, v8
 ; RV64-NEXT:    vsll.vi v16, v16, 2
-; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i16> %idxs
@@ -849,19 +845,18 @@ define <vscale x 8 x i32> @vpgather_baseidx_nxv8i16_nxv8i32(ptr %base, <vscale x
 define <vscale x 8 x i32> @vpgather_baseidx_sext_nxv8i16_nxv8i32(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_sext_nxv8i16_nxv8i32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf2 v12, v8
 ; RV32-NEXT:    vsll.vi v8, v12, 2
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v8, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_sext_nxv8i16_nxv8i32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf4 v16, v8
 ; RV64-NEXT:    vsll.vi v16, v16, 2
-; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i32>
@@ -873,19 +868,17 @@ define <vscale x 8 x i32> @vpgather_baseidx_sext_nxv8i16_nxv8i32(ptr %base, <vsc
 define <vscale x 8 x i32> @vpgather_baseidx_zext_nxv8i16_nxv8i32(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_zext_nxv8i16_nxv8i32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vzext.vf2 v12, v8
 ; RV32-NEXT:    vsll.vi v8, v12, 2
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v8, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_zext_nxv8i16_nxv8i32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV64-NEXT:    vzext.vf2 v12, v8
 ; RV64-NEXT:    vsll.vi v8, v12, 2
-; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV64-NEXT:    vluxei32.v v8, (a0), v8, v0.t
 ; RV64-NEXT:    ret
   %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i32>
@@ -897,18 +890,17 @@ define <vscale x 8 x i32> @vpgather_baseidx_zext_nxv8i16_nxv8i32(ptr %base, <vsc
 define <vscale x 8 x i32> @vpgather_baseidx_nxv8i32(ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_nxv8i32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
-; RV32-NEXT:    vsll.vi v8, v8, 2
 ; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vsll.vi v8, v8, 2
 ; RV32-NEXT:    vluxei32.v v8, (a0), v8, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_nxv8i32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf2 v16, v8
 ; RV64-NEXT:    vsll.vi v16, v16, 2
-; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i32> %idxs
@@ -1012,19 +1004,18 @@ define <vscale x 8 x i64> @vpgather_nxv8i64(<vscale x 8 x ptr> %ptrs, <vscale x
 define <vscale x 8 x i64> @vpgather_baseidx_nxv8i8_nxv8i64(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_nxv8i8_nxv8i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf4 v12, v8
 ; RV32-NEXT:    vsll.vi v16, v12, 3
-; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_nxv8i8_nxv8i64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf8 v16, v8
 ; RV64-NEXT:    vsll.vi v8, v16, 3
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i8> %idxs
@@ -1035,19 +1026,18 @@ define <vscale x 8 x i64> @vpgather_baseidx_nxv8i8_nxv8i64(ptr %base, <vscale x
 define <vscale x 8 x i64> @vpgather_baseidx_sext_nxv8i8_nxv8i64(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf4 v12, v8
 ; RV32-NEXT:    vsll.vi v16, v12, 3
-; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8i64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf8 v16, v8
 ; RV64-NEXT:    vsll.vi v8, v16, 3
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
 ; RV64-NEXT:    ret
   %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i64>
@@ -1059,19 +1049,19 @@ define <vscale x 8 x i64> @vpgather_baseidx_sext_nxv8i8_nxv8i64(ptr %base, <vsca
 define <vscale x 8 x i64> @vpgather_baseidx_zext_nxv8i8_nxv8i64(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
 ; RV32-NEXT:    vzext.vf2 v10, v8
 ; RV32-NEXT:    vsll.vi v16, v10, 3
-; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; RV32-NEXT:    vluxei16.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8i64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
 ; RV64-NEXT:    vzext.vf2 v10, v8
 ; RV64-NEXT:    vsll.vi v16, v10, 3
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vluxei16.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i64>
@@ -1083,19 +1073,18 @@ define <vscale x 8 x i64> @vpgather_baseidx_zext_nxv8i8_nxv8i64(ptr %base, <vsca
 define <vscale x 8 x i64> @vpgather_baseidx_nxv8i16_nxv8i64(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_nxv8i16_nxv8i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf2 v12, v8
 ; RV32-NEXT:    vsll.vi v16, v12, 3
-; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_nxv8i16_nxv8i64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf4 v16, v8
 ; RV64-NEXT:    vsll.vi v8, v16, 3
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i16> %idxs
@@ -1106,19 +1095,18 @@ define <vscale x 8 x i64> @vpgather_baseidx_nxv8i16_nxv8i64(ptr %base, <vscale x
 define <vscale x 8 x i64> @vpgather_baseidx_sext_nxv8i16_nxv8i64(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_sext_nxv8i16_nxv8i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf2 v12, v8
 ; RV32-NEXT:    vsll.vi v16, v12, 3
-; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_sext_nxv8i16_nxv8i64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf4 v16, v8
 ; RV64-NEXT:    vsll.vi v8, v16, 3
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
 ; RV64-NEXT:    ret
   %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i64>
@@ -1130,19 +1118,19 @@ define <vscale x 8 x i64> @vpgather_baseidx_sext_nxv8i16_nxv8i64(ptr %base, <vsc
 define <vscale x 8 x i64> @vpgather_baseidx_zext_nxv8i16_nxv8i64(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_zext_nxv8i16_nxv8i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vzext.vf2 v12, v8
 ; RV32-NEXT:    vsll.vi v16, v12, 3
-; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_zext_nxv8i16_nxv8i64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV64-NEXT:    vzext.vf2 v12, v8
 ; RV64-NEXT:    vsll.vi v16, v12, 3
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vluxei32.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i64>
@@ -1154,18 +1142,17 @@ define <vscale x 8 x i64> @vpgather_baseidx_zext_nxv8i16_nxv8i64(ptr %base, <vsc
 define <vscale x 8 x i64> @vpgather_baseidx_nxv8i32_nxv8i64(ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_nxv8i32_nxv8i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsll.vi v16, v8, 3
-; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_nxv8i32_nxv8i64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf2 v16, v8
 ; RV64-NEXT:    vsll.vi v8, v16, 3
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i32> %idxs
@@ -1176,18 +1163,17 @@ define <vscale x 8 x i64> @vpgather_baseidx_nxv8i32_nxv8i64(ptr %base, <vscale x
 define <vscale x 8 x i64> @vpgather_baseidx_sext_nxv8i32_nxv8i64(ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_sext_nxv8i32_nxv8i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsll.vi v16, v8, 3
-; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_sext_nxv8i32_nxv8i64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf2 v16, v8
 ; RV64-NEXT:    vsll.vi v8, v16, 3
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
 ; RV64-NEXT:    ret
   %eidxs = sext <vscale x 8 x i32> %idxs to <vscale x 8 x i64>
@@ -1199,18 +1185,17 @@ define <vscale x 8 x i64> @vpgather_baseidx_sext_nxv8i32_nxv8i64(ptr %base, <vsc
 define <vscale x 8 x i64> @vpgather_baseidx_zext_nxv8i32_nxv8i64(ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_zext_nxv8i32_nxv8i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsll.vi v16, v8, 3
-; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_zext_nxv8i32_nxv8i64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vzext.vf2 v16, v8
 ; RV64-NEXT:    vsll.vi v8, v16, 3
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
 ; RV64-NEXT:    ret
   %eidxs = zext <vscale x 8 x i32> %idxs to <vscale x 8 x i64>
@@ -1222,18 +1207,17 @@ define <vscale x 8 x i64> @vpgather_baseidx_zext_nxv8i32_nxv8i64(ptr %base, <vsc
 define <vscale x 8 x i64> @vpgather_baseidx_nxv8i64(ptr %base, <vscale x 8 x i64> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_nxv8i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vnsrl.wi v16, v8, 0
 ; RV32-NEXT:    vsll.vi v16, v16, 3
-; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_nxv8i64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
-; RV64-NEXT:    vsll.vi v8, v8, 3
 ; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsll.vi v8, v8, 3
 ; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %idxs
@@ -1531,19 +1515,19 @@ define <vscale x 8 x half> @vpgather_nxv8f16(<vscale x 8 x ptr> %ptrs, <vscale x
 define <vscale x 8 x half> @vpgather_baseidx_nxv8i8_nxv8f16(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_nxv8i8_nxv8f16:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf4 v12, v8
 ; RV32-NEXT:    vadd.vv v12, v12, v12
-; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
+; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_nxv8i8_nxv8f16:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf8 v16, v8
 ; RV64-NEXT:    vadd.vv v16, v16, v16
-; RV64-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
+; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds half, ptr %base, <vscale x 8 x i8> %idxs
@@ -1554,19 +1538,19 @@ define <vscale x 8 x half> @vpgather_baseidx_nxv8i8_nxv8f16(ptr %base, <vscale x
 define <vscale x 8 x half> @vpgather_baseidx_sext_nxv8i8_nxv8f16(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8f16:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf4 v12, v8
 ; RV32-NEXT:    vadd.vv v12, v12, v12
-; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
+; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8f16:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf8 v16, v8
 ; RV64-NEXT:    vadd.vv v16, v16, v16
-; RV64-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
+; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
@@ -1578,17 +1562,17 @@ define <vscale x 8 x half> @vpgather_baseidx_sext_nxv8i8_nxv8f16(ptr %base, <vsc
 define <vscale x 8 x half> @vpgather_baseidx_zext_nxv8i8_nxv8f16(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8f16:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e8, m1, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
 ; RV32-NEXT:    vwaddu.vv v10, v8, v8
-; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
+; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; RV32-NEXT:    vluxei16.v v8, (a0), v10, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8f16:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e8, m1, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
 ; RV64-NEXT:    vwaddu.vv v10, v8, v8
-; RV64-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
+; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; RV64-NEXT:    vluxei16.v v8, (a0), v10, v0.t
 ; RV64-NEXT:    ret
   %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
@@ -1600,18 +1584,17 @@ define <vscale x 8 x half> @vpgather_baseidx_zext_nxv8i8_nxv8f16(ptr %base, <vsc
 define <vscale x 8 x half> @vpgather_baseidx_nxv8f16(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_nxv8f16:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
-; RV32-NEXT:    vwadd.vv v12, v8, v8
 ; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
+; RV32-NEXT:    vwadd.vv v12, v8, v8
 ; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_nxv8f16:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf4 v16, v8
 ; RV64-NEXT:    vadd.vv v16, v16, v16
-; RV64-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
+; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds half, ptr %base, <vscale x 8 x i16> %idxs
@@ -1715,19 +1698,18 @@ define <vscale x 8 x float> @vpgather_nxv8f32(<vscale x 8 x ptr> %ptrs, <vscale
 define <vscale x 8 x float> @vpgather_baseidx_nxv8i8_nxv8f32(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_nxv8i8_nxv8f32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf4 v12, v8
 ; RV32-NEXT:    vsll.vi v8, v12, 2
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v8, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_nxv8i8_nxv8f32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf8 v16, v8
 ; RV64-NEXT:    vsll.vi v16, v16, 2
-; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i8> %idxs
@@ -1738,19 +1720,18 @@ define <vscale x 8 x float> @vpgather_baseidx_nxv8i8_nxv8f32(ptr %base, <vscale
 define <vscale x 8 x float> @vpgather_baseidx_sext_nxv8i8_nxv8f32(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8f32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf4 v12, v8
 ; RV32-NEXT:    vsll.vi v8, v12, 2
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v8, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8f32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf8 v16, v8
 ; RV64-NEXT:    vsll.vi v16, v16, 2
-; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i32>
@@ -1762,19 +1743,19 @@ define <vscale x 8 x float> @vpgather_baseidx_sext_nxv8i8_nxv8f32(ptr %base, <vs
 define <vscale x 8 x float> @vpgather_baseidx_zext_nxv8i8_nxv8f32(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8f32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
 ; RV32-NEXT:    vzext.vf2 v10, v8
 ; RV32-NEXT:    vsll.vi v12, v10, 2
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vluxei16.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8f32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
 ; RV64-NEXT:    vzext.vf2 v10, v8
 ; RV64-NEXT:    vsll.vi v12, v10, 2
-; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; RV64-NEXT:    vluxei16.v v8, (a0), v12, v0.t
 ; RV64-NEXT:    ret
   %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i32>
@@ -1786,19 +1767,18 @@ define <vscale x 8 x float> @vpgather_baseidx_zext_nxv8i8_nxv8f32(ptr %base, <vs
 define <vscale x 8 x float> @vpgather_baseidx_nxv8i16_nxv8f32(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_nxv8i16_nxv8f32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf2 v12, v8
 ; RV32-NEXT:    vsll.vi v8, v12, 2
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v8, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_nxv8i16_nxv8f32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf4 v16, v8
 ; RV64-NEXT:    vsll.vi v16, v16, 2
-; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i16> %idxs
@@ -1809,19 +1789,18 @@ define <vscale x 8 x float> @vpgather_baseidx_nxv8i16_nxv8f32(ptr %base, <vscale
 define <vscale x 8 x float> @vpgather_baseidx_sext_nxv8i16_nxv8f32(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_sext_nxv8i16_nxv8f32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf2 v12, v8
 ; RV32-NEXT:    vsll.vi v8, v12, 2
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v8, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_sext_nxv8i16_nxv8f32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf4 v16, v8
 ; RV64-NEXT:    vsll.vi v16, v16, 2
-; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i32>
@@ -1833,19 +1812,17 @@ define <vscale x 8 x float> @vpgather_baseidx_sext_nxv8i16_nxv8f32(ptr %base, <v
 define <vscale x 8 x float> @vpgather_baseidx_zext_nxv8i16_nxv8f32(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_zext_nxv8i16_nxv8f32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vzext.vf2 v12, v8
 ; RV32-NEXT:    vsll.vi v8, v12, 2
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v8, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_zext_nxv8i16_nxv8f32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV64-NEXT:    vzext.vf2 v12, v8
 ; RV64-NEXT:    vsll.vi v8, v12, 2
-; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV64-NEXT:    vluxei32.v v8, (a0), v8, v0.t
 ; RV64-NEXT:    ret
   %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i32>
@@ -1857,18 +1834,17 @@ define <vscale x 8 x float> @vpgather_baseidx_zext_nxv8i16_nxv8f32(ptr %base, <v
 define <vscale x 8 x float> @vpgather_baseidx_nxv8f32(ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_nxv8f32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
-; RV32-NEXT:    vsll.vi v8, v8, 2
 ; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vsll.vi v8, v8, 2
 ; RV32-NEXT:    vluxei32.v v8, (a0), v8, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_nxv8f32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf2 v16, v8
 ; RV64-NEXT:    vsll.vi v16, v16, 2
-; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i32> %idxs
@@ -1972,19 +1948,18 @@ define <vscale x 6 x double> @vpgather_nxv6f64(<vscale x 6 x ptr> %ptrs, <vscale
 define <vscale x 6 x double> @vpgather_baseidx_nxv6i8_nxv6f64(ptr %base, <vscale x 6 x i8> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_nxv6i8_nxv6f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf4 v12, v8
 ; RV32-NEXT:    vsll.vi v16, v12, 3
-; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_nxv6i8_nxv6f64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf8 v16, v8
 ; RV64-NEXT:    vsll.vi v8, v16, 3
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds double, ptr %base, <vscale x 6 x i8> %idxs
@@ -1995,19 +1970,18 @@ define <vscale x 6 x double> @vpgather_baseidx_nxv6i8_nxv6f64(ptr %base, <vscale
 define <vscale x 6 x double> @vpgather_baseidx_sext_nxv6i8_nxv6f64(ptr %base, <vscale x 6 x i8> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_sext_nxv6i8_nxv6f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf4 v12, v8
 ; RV32-NEXT:    vsll.vi v16, v12, 3
-; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_sext_nxv6i8_nxv6f64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf8 v16, v8
 ; RV64-NEXT:    vsll.vi v8, v16, 3
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
 ; RV64-NEXT:    ret
   %eidxs = sext <vscale x 6 x i8> %idxs to <vscale x 6 x i64>
@@ -2019,19 +1993,19 @@ define <vscale x 6 x double> @vpgather_baseidx_sext_nxv6i8_nxv6f64(ptr %base, <v
 define <vscale x 6 x double> @vpgather_baseidx_zext_nxv6i8_nxv6f64(ptr %base, <vscale x 6 x i8> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_zext_nxv6i8_nxv6f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
 ; RV32-NEXT:    vzext.vf2 v10, v8
 ; RV32-NEXT:    vsll.vi v16, v10, 3
-; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; RV32-NEXT:    vluxei16.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_zext_nxv6i8_nxv6f64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
 ; RV64-NEXT:    vzext.vf2 v10, v8
 ; RV64-NEXT:    vsll.vi v16, v10, 3
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vluxei16.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %eidxs = zext <vscale x 6 x i8> %idxs to <vscale x 6 x i64>
@@ -2043,19 +2017,18 @@ define <vscale x 6 x double> @vpgather_baseidx_zext_nxv6i8_nxv6f64(ptr %base, <v
 define <vscale x 6 x double> @vpgather_baseidx_nxv6i16_nxv6f64(ptr %base, <vscale x 6 x i16> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_nxv6i16_nxv6f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf2 v12, v8
 ; RV32-NEXT:    vsll.vi v16, v12, 3
-; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_nxv6i16_nxv6f64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf4 v16, v8
 ; RV64-NEXT:    vsll.vi v8, v16, 3
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds double, ptr %base, <vscale x 6 x i16> %idxs
@@ -2066,19 +2039,18 @@ define <vscale x 6 x double> @vpgather_baseidx_nxv6i16_nxv6f64(ptr %base, <vscal
 define <vscale x 6 x double> @vpgather_baseidx_sext_nxv6i16_nxv6f64(ptr %base, <vscale x 6 x i16> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_sext_nxv6i16_nxv6f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf2 v12, v8
 ; RV32-NEXT:    vsll.vi v16, v12, 3
-; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_sext_nxv6i16_nxv6f64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf4 v16, v8
 ; RV64-NEXT:    vsll.vi v8, v16, 3
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
 ; RV64-NEXT:    ret
   %eidxs = sext <vscale x 6 x i16> %idxs to <vscale x 6 x i64>
@@ -2090,19 +2062,19 @@ define <vscale x 6 x double> @vpgather_baseidx_sext_nxv6i16_nxv6f64(ptr %base, <
 define <vscale x 6 x double> @vpgather_baseidx_zext_nxv6i16_nxv6f64(ptr %base, <vscale x 6 x i16> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_zext_nxv6i16_nxv6f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vzext.vf2 v12, v8
 ; RV32-NEXT:    vsll.vi v16, v12, 3
-; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_zext_nxv6i16_nxv6f64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV64-NEXT:    vzext.vf2 v12, v8
 ; RV64-NEXT:    vsll.vi v16, v12, 3
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vluxei32.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %eidxs = zext <vscale x 6 x i16> %idxs to <vscale x 6 x i64>
@@ -2114,18 +2086,17 @@ define <vscale x 6 x double> @vpgather_baseidx_zext_nxv6i16_nxv6f64(ptr %base, <
 define <vscale x 6 x double> @vpgather_baseidx_nxv6i32_nxv6f64(ptr %base, <vscale x 6 x i32> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_nxv6i32_nxv6f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsll.vi v16, v8, 3
-; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_nxv6i32_nxv6f64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf2 v16, v8
 ; RV64-NEXT:    vsll.vi v8, v16, 3
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds double, ptr %base, <vscale x 6 x i32> %idxs
@@ -2136,18 +2107,17 @@ define <vscale x 6 x double> @vpgather_baseidx_nxv6i32_nxv6f64(ptr %base, <vscal
 define <vscale x 6 x double> @vpgather_baseidx_sext_nxv6i32_nxv6f64(ptr %base, <vscale x 6 x i32> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_sext_nxv6i32_nxv6f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsll.vi v16, v8, 3
-; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_sext_nxv6i32_nxv6f64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf2 v16, v8
 ; RV64-NEXT:    vsll.vi v8, v16, 3
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
 ; RV64-NEXT:    ret
   %eidxs = sext <vscale x 6 x i32> %idxs to <vscale x 6 x i64>
@@ -2159,18 +2129,17 @@ define <vscale x 6 x double> @vpgather_baseidx_sext_nxv6i32_nxv6f64(ptr %base, <
 define <vscale x 6 x double> @vpgather_baseidx_zext_nxv6i32_nxv6f64(ptr %base, <vscale x 6 x i32> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_zext_nxv6i32_nxv6f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsll.vi v16, v8, 3
-; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_zext_nxv6i32_nxv6f64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vzext.vf2 v16, v8
 ; RV64-NEXT:    vsll.vi v8, v16, 3
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
 ; RV64-NEXT:    ret
   %eidxs = zext <vscale x 6 x i32> %idxs to <vscale x 6 x i64>
@@ -2182,18 +2151,17 @@ define <vscale x 6 x double> @vpgather_baseidx_zext_nxv6i32_nxv6f64(ptr %base, <
 define <vscale x 6 x double> @vpgather_baseidx_nxv6f64(ptr %base, <vscale x 6 x i64> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_nxv6f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vnsrl.wi v16, v8, 0
 ; RV32-NEXT:    vsll.vi v16, v16, 3
-; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_nxv6f64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
-; RV64-NEXT:    vsll.vi v8, v8, 3
 ; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsll.vi v8, v8, 3
 ; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds double, ptr %base, <vscale x 6 x i64> %idxs
@@ -2223,19 +2191,18 @@ define <vscale x 8 x double> @vpgather_nxv8f64(<vscale x 8 x ptr> %ptrs, <vscale
 define <vscale x 8 x double> @vpgather_baseidx_nxv8i8_nxv8f64(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_nxv8i8_nxv8f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf4 v12, v8
 ; RV32-NEXT:    vsll.vi v16, v12, 3
-; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_nxv8i8_nxv8f64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf8 v16, v8
 ; RV64-NEXT:    vsll.vi v8, v16, 3
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i8> %idxs
@@ -2246,19 +2213,18 @@ define <vscale x 8 x double> @vpgather_baseidx_nxv8i8_nxv8f64(ptr %base, <vscale
 define <vscale x 8 x double> @vpgather_baseidx_sext_nxv8i8_nxv8f64(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf4 v12, v8
 ; RV32-NEXT:    vsll.vi v16, v12, 3
-; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8f64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf8 v16, v8
 ; RV64-NEXT:    vsll.vi v8, v16, 3
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
 ; RV64-NEXT:    ret
   %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i64>
@@ -2270,19 +2236,19 @@ define <vscale x 8 x double> @vpgather_baseidx_sext_nxv8i8_nxv8f64(ptr %base, <v
 define <vscale x 8 x double> @vpgather_baseidx_zext_nxv8i8_nxv8f64(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
 ; RV32-NEXT:    vzext.vf2 v10, v8
 ; RV32-NEXT:    vsll.vi v16, v10, 3
-; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; RV32-NEXT:    vluxei16.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8f64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
 ; RV64-NEXT:    vzext.vf2 v10, v8
 ; RV64-NEXT:    vsll.vi v16, v10, 3
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vluxei16.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i64>
@@ -2294,19 +2260,18 @@ define <vscale x 8 x double> @vpgather_baseidx_zext_nxv8i8_nxv8f64(ptr %base, <v
 define <vscale x 8 x double> @vpgather_baseidx_nxv8i16_nxv8f64(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_nxv8i16_nxv8f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf2 v12, v8
 ; RV32-NEXT:    vsll.vi v16, v12, 3
-; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_nxv8i16_nxv8f64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf4 v16, v8
 ; RV64-NEXT:    vsll.vi v8, v16, 3
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i16> %idxs
@@ -2317,19 +2282,18 @@ define <vscale x 8 x double> @vpgather_baseidx_nxv8i16_nxv8f64(ptr %base, <vscal
 define <vscale x 8 x double> @vpgather_baseidx_sext_nxv8i16_nxv8f64(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_sext_nxv8i16_nxv8f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf2 v12, v8
 ; RV32-NEXT:    vsll.vi v16, v12, 3
-; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_sext_nxv8i16_nxv8f64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf4 v16, v8
 ; RV64-NEXT:    vsll.vi v8, v16, 3
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
 ; RV64-NEXT:    ret
   %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i64>
@@ -2341,19 +2305,19 @@ define <vscale x 8 x double> @vpgather_baseidx_sext_nxv8i16_nxv8f64(ptr %base, <
 define <vscale x 8 x double> @vpgather_baseidx_zext_nxv8i16_nxv8f64(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_zext_nxv8i16_nxv8f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vzext.vf2 v12, v8
 ; RV32-NEXT:    vsll.vi v16, v12, 3
-; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_zext_nxv8i16_nxv8f64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV64-NEXT:    vzext.vf2 v12, v8
 ; RV64-NEXT:    vsll.vi v16, v12, 3
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vluxei32.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i64>
@@ -2365,18 +2329,17 @@ define <vscale x 8 x double> @vpgather_baseidx_zext_nxv8i16_nxv8f64(ptr %base, <
 define <vscale x 8 x double> @vpgather_baseidx_nxv8i32_nxv8f64(ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_nxv8i32_nxv8f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsll.vi v16, v8, 3
-; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_nxv8i32_nxv8f64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf2 v16, v8
 ; RV64-NEXT:    vsll.vi v8, v16, 3
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i32> %idxs
@@ -2387,18 +2350,17 @@ define <vscale x 8 x double> @vpgather_baseidx_nxv8i32_nxv8f64(ptr %base, <vscal
 define <vscale x 8 x double> @vpgather_baseidx_sext_nxv8i32_nxv8f64(ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_sext_nxv8i32_nxv8f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsll.vi v16, v8, 3
-; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_sext_nxv8i32_nxv8f64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf2 v16, v8
 ; RV64-NEXT:    vsll.vi v8, v16, 3
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
 ; RV64-NEXT:    ret
   %eidxs = sext <vscale x 8 x i32> %idxs to <vscale x 8 x i64>
@@ -2410,18 +2372,17 @@ define <vscale x 8 x double> @vpgather_baseidx_sext_nxv8i32_nxv8f64(ptr %base, <
 define <vscale x 8 x double> @vpgather_baseidx_zext_nxv8i32_nxv8f64(ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_zext_nxv8i32_nxv8f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsll.vi v16, v8, 3
-; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_zext_nxv8i32_nxv8f64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vzext.vf2 v16, v8
 ; RV64-NEXT:    vsll.vi v8, v16, 3
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
 ; RV64-NEXT:    ret
   %eidxs = zext <vscale x 8 x i32> %idxs to <vscale x 8 x i64>
@@ -2433,18 +2394,17 @@ define <vscale x 8 x double> @vpgather_baseidx_zext_nxv8i32_nxv8f64(ptr %base, <
 define <vscale x 8 x double> @vpgather_baseidx_nxv8f64(ptr %base, <vscale x 8 x i64> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_nxv8f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vnsrl.wi v16, v8, 0
 ; RV32-NEXT:    vsll.vi v16, v16, 3
-; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_nxv8f64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
-; RV64-NEXT:    vsll.vi v8, v8, 3
 ; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsll.vi v8, v8, 3
 ; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %idxs
diff --git a/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll
index 4d715c7031000e..a1b753e3135a10 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll
@@ -47,17 +47,15 @@ define void @vpscatter_nxv2i8(<vscale x 2 x i8> %val, <vscale x 2 x ptr> %ptrs,
 define void @vpscatter_nxv2i16_truncstore_nxv2i8(<vscale x 2 x i16> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_nxv2i16_truncstore_nxv2i8:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a1, zero, e8, mf4, ta, ma
-; RV32-NEXT:    vnsrl.wi v8, v8, 0
 ; RV32-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
+; RV32-NEXT:    vnsrl.wi v8, v8, 0
 ; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_nxv2i16_truncstore_nxv2i8:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a1, zero, e8, mf4, ta, ma
-; RV64-NEXT:    vnsrl.wi v8, v8, 0
 ; RV64-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
+; RV64-NEXT:    vnsrl.wi v8, v8, 0
 ; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
 ; RV64-NEXT:    ret
   %tval = trunc <vscale x 2 x i16> %val to <vscale x 2 x i8>
@@ -68,21 +66,19 @@ define void @vpscatter_nxv2i16_truncstore_nxv2i8(<vscale x 2 x i16> %val, <vscal
 define void @vpscatter_nxv2i32_truncstore_nxv2i8(<vscale x 2 x i32> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_nxv2i32_truncstore_nxv2i8:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
+; RV32-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; RV32-NEXT:    vnsrl.wi v8, v8, 0
 ; RV32-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
 ; RV32-NEXT:    vnsrl.wi v8, v8, 0
-; RV32-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_nxv2i32_truncstore_nxv2i8:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
+; RV64-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; RV64-NEXT:    vnsrl.wi v8, v8, 0
 ; RV64-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
 ; RV64-NEXT:    vnsrl.wi v8, v8, 0
-; RV64-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
 ; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
 ; RV64-NEXT:    ret
   %tval = trunc <vscale x 2 x i32> %val to <vscale x 2 x i8>
@@ -93,25 +89,23 @@ define void @vpscatter_nxv2i32_truncstore_nxv2i8(<vscale x 2 x i32> %val, <vscal
 define void @vpscatter_nxv2i64_truncstore_nxv2i8(<vscale x 2 x i64> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_nxv2i64_truncstore_nxv2i8:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
+; RV32-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
 ; RV32-NEXT:    vnsrl.wi v11, v8, 0
 ; RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
 ; RV32-NEXT:    vnsrl.wi v8, v11, 0
 ; RV32-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
 ; RV32-NEXT:    vnsrl.wi v8, v8, 0
-; RV32-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (zero), v10, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_nxv2i64_truncstore_nxv2i8:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
+; RV64-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
 ; RV64-NEXT:    vnsrl.wi v12, v8, 0
 ; RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
 ; RV64-NEXT:    vnsrl.wi v8, v12, 0
 ; RV64-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
 ; RV64-NEXT:    vnsrl.wi v8, v8, 0
-; RV64-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
 ; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
 ; RV64-NEXT:    ret
   %tval = trunc <vscale x 2 x i64> %val to <vscale x 2 x i8>
@@ -174,17 +168,17 @@ define void @vpscatter_nxv8i8(<vscale x 8 x i8> %val, <vscale x 8 x ptr> %ptrs,
 define void @vpscatter_baseidx_nxv8i8(<vscale x 8 x i8> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_nxv8i8:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf4 v12, v9
-; RV32-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
+; RV32-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_nxv8i8:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf8 v16, v9
-; RV64-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
+; RV64-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds i8, ptr %base, <vscale x 8 x i8> %idxs
@@ -231,17 +225,15 @@ define void @vpscatter_nxv2i16(<vscale x 2 x i16> %val, <vscale x 2 x ptr> %ptrs
 define void @vpscatter_nxv2i32_truncstore_nxv2i16(<vscale x 2 x i32> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_nxv2i32_truncstore_nxv2i16:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
-; RV32-NEXT:    vnsrl.wi v8, v8, 0
 ; RV32-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; RV32-NEXT:    vnsrl.wi v8, v8, 0
 ; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_nxv2i32_truncstore_nxv2i16:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
-; RV64-NEXT:    vnsrl.wi v8, v8, 0
 ; RV64-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; RV64-NEXT:    vnsrl.wi v8, v8, 0
 ; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
 ; RV64-NEXT:    ret
   %tval = trunc <vscale x 2 x i32> %val to <vscale x 2 x i16>
@@ -252,21 +244,19 @@ define void @vpscatter_nxv2i32_truncstore_nxv2i16(<vscale x 2 x i32> %val, <vsca
 define void @vpscatter_nxv2i64_truncstore_nxv2i16(<vscale x 2 x i64> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_nxv2i64_truncstore_nxv2i16:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
+; RV32-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
 ; RV32-NEXT:    vnsrl.wi v11, v8, 0
 ; RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
 ; RV32-NEXT:    vnsrl.wi v8, v11, 0
-; RV32-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (zero), v10, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_nxv2i64_truncstore_nxv2i16:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
+; RV64-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
 ; RV64-NEXT:    vnsrl.wi v12, v8, 0
 ; RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
 ; RV64-NEXT:    vnsrl.wi v8, v12, 0
-; RV64-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
 ; RV64-NEXT:    ret
   %tval = trunc <vscale x 2 x i64> %val to <vscale x 2 x i16>
@@ -329,19 +319,19 @@ define void @vpscatter_nxv8i16(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs
 define void @vpscatter_baseidx_nxv8i8_nxv8i16(<vscale x 8 x i16> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_nxv8i8_nxv8i16:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf4 v12, v10
 ; RV32-NEXT:    vadd.vv v12, v12, v12
-; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
+; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_nxv8i8_nxv8i16:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf8 v16, v10
 ; RV64-NEXT:    vadd.vv v16, v16, v16
-; RV64-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
+; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i8> %idxs
@@ -352,19 +342,19 @@ define void @vpscatter_baseidx_nxv8i8_nxv8i16(<vscale x 8 x i16> %val, ptr %base
 define void @vpscatter_baseidx_sext_nxv8i8_nxv8i16(<vscale x 8 x i16> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8i16:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf4 v12, v10
 ; RV32-NEXT:    vadd.vv v12, v12, v12
-; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
+; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8i16:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf8 v16, v10
 ; RV64-NEXT:    vadd.vv v16, v16, v16
-; RV64-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
+; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
@@ -376,17 +366,17 @@ define void @vpscatter_baseidx_sext_nxv8i8_nxv8i16(<vscale x 8 x i16> %val, ptr
 define void @vpscatter_baseidx_zext_nxv8i8_nxv8i16(<vscale x 8 x i16> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8i16:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e8, m1, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
 ; RV32-NEXT:    vwaddu.vv v12, v10, v10
-; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
+; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; RV32-NEXT:    vsoxei16.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8i16:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e8, m1, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
 ; RV64-NEXT:    vwaddu.vv v12, v10, v10
-; RV64-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
+; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; RV64-NEXT:    vsoxei16.v v8, (a0), v12, v0.t
 ; RV64-NEXT:    ret
   %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
@@ -398,18 +388,17 @@ define void @vpscatter_baseidx_zext_nxv8i8_nxv8i16(<vscale x 8 x i16> %val, ptr
 define void @vpscatter_baseidx_nxv8i16(<vscale x 8 x i16> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_nxv8i16:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
-; RV32-NEXT:    vwadd.vv v12, v10, v10
 ; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
+; RV32-NEXT:    vwadd.vv v12, v10, v10
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_nxv8i16:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf4 v16, v10
 ; RV64-NEXT:    vadd.vv v16, v16, v16
-; RV64-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
+; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i16> %idxs
@@ -430,9 +419,8 @@ define void @vpscatter_baseidx_vpsext_nxv8i16_nxv8i16(<vscale x 8 x i16> %val, p
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV64-NEXT:    vsext.vf2 v12, v10, v0.t
-; RV64-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV64-NEXT:    vwadd.vv v16, v12, v12
-; RV64-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
+; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %eidxs = call <vscale x 8 x i32> @llvm.vp.sext.nxv8i16.nxv8i32(<vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 %evl)
@@ -454,9 +442,8 @@ define void @vpscatter_baseidx_vpzext_nxv8i16_nxv8i16(<vscale x 8 x i16> %val, p
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV64-NEXT:    vzext.vf2 v12, v10, v0.t
-; RV64-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV64-NEXT:    vwadd.vv v16, v12, v12
-; RV64-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
+; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %eidxs = call <vscale x 8 x i32> @llvm.vp.zext.nxv8i16.nxv8i32(<vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 %evl)
@@ -471,10 +458,10 @@ define void @vpscatter_baseidx_vpsext_nxv8i32_nxv8i16(<vscale x 8 x i16> %val, p
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV32-NEXT:    vsext.vf2 v16, v12, v0.t
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vnsrl.wi v12, v16, 0
 ; RV32-NEXT:    vadd.vv v12, v12, v12
-; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
+; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
 ;
@@ -497,10 +484,10 @@ define void @vpscatter_baseidx_vpzext_nxv8i32_nxv8i16(<vscale x 8 x i16> %val, p
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV32-NEXT:    vzext.vf2 v16, v12, v0.t
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vnsrl.wi v12, v16, 0
 ; RV32-NEXT:    vadd.vv v12, v12, v12
-; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
+; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
 ;
@@ -556,17 +543,15 @@ define void @vpscatter_nxv2i32(<vscale x 2 x i32> %val, <vscale x 2 x ptr> %ptrs
 define void @vpscatter_nxv2i64_truncstore_nxv2i32(<vscale x 2 x i64> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_nxv2i64_truncstore_nxv2i32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; RV32-NEXT:    vnsrl.wi v11, v8, 0
 ; RV32-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; RV32-NEXT:    vnsrl.wi v11, v8, 0
 ; RV32-NEXT:    vsoxei32.v v11, (zero), v10, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_nxv2i64_truncstore_nxv2i32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; RV64-NEXT:    vnsrl.wi v12, v8, 0
 ; RV64-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; RV64-NEXT:    vnsrl.wi v12, v8, 0
 ; RV64-NEXT:    vsoxei64.v v12, (zero), v10, v0.t
 ; RV64-NEXT:    ret
   %tval = trunc <vscale x 2 x i64> %val to <vscale x 2 x i32>
@@ -629,19 +614,18 @@ define void @vpscatter_nxv8i32(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs
 define void @vpscatter_baseidx_nxv8i8_nxv8i32(<vscale x 8 x i32> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_nxv8i8_nxv8i32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf4 v16, v12
 ; RV32-NEXT:    vsll.vi v12, v16, 2
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_nxv8i8_nxv8i32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf8 v16, v12
 ; RV64-NEXT:    vsll.vi v16, v16, 2
-; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i8> %idxs
@@ -652,19 +636,18 @@ define void @vpscatter_baseidx_nxv8i8_nxv8i32(<vscale x 8 x i32> %val, ptr %base
 define void @vpscatter_baseidx_sext_nxv8i8_nxv8i32(<vscale x 8 x i32> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8i32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf4 v16, v12
 ; RV32-NEXT:    vsll.vi v12, v16, 2
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8i32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf8 v16, v12
 ; RV64-NEXT:    vsll.vi v16, v16, 2
-; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i32>
@@ -676,19 +659,19 @@ define void @vpscatter_baseidx_sext_nxv8i8_nxv8i32(<vscale x 8 x i32> %val, ptr
 define void @vpscatter_baseidx_zext_nxv8i8_nxv8i32(<vscale x 8 x i32> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8i32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
 ; RV32-NEXT:    vzext.vf2 v14, v12
 ; RV32-NEXT:    vsll.vi v12, v14, 2
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vsoxei16.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8i32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
 ; RV64-NEXT:    vzext.vf2 v14, v12
 ; RV64-NEXT:    vsll.vi v12, v14, 2
-; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; RV64-NEXT:    vsoxei16.v v8, (a0), v12, v0.t
 ; RV64-NEXT:    ret
   %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i32>
@@ -700,19 +683,18 @@ define void @vpscatter_baseidx_zext_nxv8i8_nxv8i32(<vscale x 8 x i32> %val, ptr
 define void @vpscatter_baseidx_nxv8i16_nxv8i32(<vscale x 8 x i32> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_nxv8i16_nxv8i32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf2 v16, v12
 ; RV32-NEXT:    vsll.vi v12, v16, 2
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_nxv8i16_nxv8i32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf4 v16, v12
 ; RV64-NEXT:    vsll.vi v16, v16, 2
-; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i16> %idxs
@@ -723,19 +705,18 @@ define void @vpscatter_baseidx_nxv8i16_nxv8i32(<vscale x 8 x i32> %val, ptr %bas
 define void @vpscatter_baseidx_sext_nxv8i16_nxv8i32(<vscale x 8 x i32> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_sext_nxv8i16_nxv8i32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf2 v16, v12
 ; RV32-NEXT:    vsll.vi v12, v16, 2
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_sext_nxv8i16_nxv8i32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf4 v16, v12
 ; RV64-NEXT:    vsll.vi v16, v16, 2
-; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i32>
@@ -747,19 +728,17 @@ define void @vpscatter_baseidx_sext_nxv8i16_nxv8i32(<vscale x 8 x i32> %val, ptr
 define void @vpscatter_baseidx_zext_nxv8i16_nxv8i32(<vscale x 8 x i32> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_zext_nxv8i16_nxv8i32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vzext.vf2 v16, v12
 ; RV32-NEXT:    vsll.vi v12, v16, 2
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_zext_nxv8i16_nxv8i32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV64-NEXT:    vzext.vf2 v16, v12
 ; RV64-NEXT:    vsll.vi v12, v16, 2
-; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV64-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
 ; RV64-NEXT:    ret
   %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i32>
@@ -771,18 +750,17 @@ define void @vpscatter_baseidx_zext_nxv8i16_nxv8i32(<vscale x 8 x i32> %val, ptr
 define void @vpscatter_baseidx_nxv8i32(<vscale x 8 x i32> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_nxv8i32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
-; RV32-NEXT:    vsll.vi v12, v12, 2
 ; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vsll.vi v12, v12, 2
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_nxv8i32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf2 v16, v12
 ; RV64-NEXT:    vsll.vi v16, v16, 2
-; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i32> %idxs
@@ -881,19 +859,18 @@ define void @vpscatter_nxv8i64(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs
 define void @vpscatter_baseidx_nxv8i8_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_nxv8i8_nxv8i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf4 v20, v16
 ; RV32-NEXT:    vsll.vi v16, v20, 3
-; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_nxv8i8_nxv8i64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf8 v24, v16
 ; RV64-NEXT:    vsll.vi v16, v24, 3
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i8> %idxs
@@ -904,19 +881,18 @@ define void @vpscatter_baseidx_nxv8i8_nxv8i64(<vscale x 8 x i64> %val, ptr %base
 define void @vpscatter_baseidx_sext_nxv8i8_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf4 v20, v16
 ; RV32-NEXT:    vsll.vi v16, v20, 3
-; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8i64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf8 v24, v16
 ; RV64-NEXT:    vsll.vi v16, v24, 3
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i64>
@@ -928,19 +904,19 @@ define void @vpscatter_baseidx_sext_nxv8i8_nxv8i64(<vscale x 8 x i64> %val, ptr
 define void @vpscatter_baseidx_zext_nxv8i8_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
 ; RV32-NEXT:    vzext.vf2 v18, v16
 ; RV32-NEXT:    vsll.vi v16, v18, 3
-; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; RV32-NEXT:    vsoxei16.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8i64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
 ; RV64-NEXT:    vzext.vf2 v18, v16
 ; RV64-NEXT:    vsll.vi v16, v18, 3
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vsoxei16.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i64>
@@ -952,19 +928,18 @@ define void @vpscatter_baseidx_zext_nxv8i8_nxv8i64(<vscale x 8 x i64> %val, ptr
 define void @vpscatter_baseidx_nxv8i16_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_nxv8i16_nxv8i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf2 v20, v16
 ; RV32-NEXT:    vsll.vi v16, v20, 3
-; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_nxv8i16_nxv8i64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf4 v24, v16
 ; RV64-NEXT:    vsll.vi v16, v24, 3
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i16> %idxs
@@ -975,19 +950,18 @@ define void @vpscatter_baseidx_nxv8i16_nxv8i64(<vscale x 8 x i64> %val, ptr %bas
 define void @vpscatter_baseidx_sext_nxv8i16_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_sext_nxv8i16_nxv8i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf2 v20, v16
 ; RV32-NEXT:    vsll.vi v16, v20, 3
-; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_sext_nxv8i16_nxv8i64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf4 v24, v16
 ; RV64-NEXT:    vsll.vi v16, v24, 3
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i64>
@@ -999,19 +973,19 @@ define void @vpscatter_baseidx_sext_nxv8i16_nxv8i64(<vscale x 8 x i64> %val, ptr
 define void @vpscatter_baseidx_zext_nxv8i16_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_zext_nxv8i16_nxv8i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vzext.vf2 v20, v16
 ; RV32-NEXT:    vsll.vi v16, v20, 3
-; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_zext_nxv8i16_nxv8i64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV64-NEXT:    vzext.vf2 v20, v16
 ; RV64-NEXT:    vsll.vi v16, v20, 3
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i64>
@@ -1023,18 +997,17 @@ define void @vpscatter_baseidx_zext_nxv8i16_nxv8i64(<vscale x 8 x i64> %val, ptr
 define void @vpscatter_baseidx_nxv8i32_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_nxv8i32_nxv8i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsll.vi v16, v16, 3
-; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_nxv8i32_nxv8i64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf2 v24, v16
 ; RV64-NEXT:    vsll.vi v16, v24, 3
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i32> %idxs
@@ -1045,18 +1018,17 @@ define void @vpscatter_baseidx_nxv8i32_nxv8i64(<vscale x 8 x i64> %val, ptr %bas
 define void @vpscatter_baseidx_sext_nxv8i32_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_sext_nxv8i32_nxv8i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsll.vi v16, v16, 3
-; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_sext_nxv8i32_nxv8i64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf2 v24, v16
 ; RV64-NEXT:    vsll.vi v16, v24, 3
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %eidxs = sext <vscale x 8 x i32> %idxs to <vscale x 8 x i64>
@@ -1068,18 +1040,17 @@ define void @vpscatter_baseidx_sext_nxv8i32_nxv8i64(<vscale x 8 x i64> %val, ptr
 define void @vpscatter_baseidx_zext_nxv8i32_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_zext_nxv8i32_nxv8i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsll.vi v16, v16, 3
-; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_zext_nxv8i32_nxv8i64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vzext.vf2 v24, v16
 ; RV64-NEXT:    vsll.vi v16, v24, 3
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %eidxs = zext <vscale x 8 x i32> %idxs to <vscale x 8 x i64>
@@ -1091,18 +1062,17 @@ define void @vpscatter_baseidx_zext_nxv8i32_nxv8i64(<vscale x 8 x i64> %val, ptr
 define void @vpscatter_baseidx_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i64> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_nxv8i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vnsrl.wi v24, v16, 0
 ; RV32-NEXT:    vsll.vi v16, v24, 3
-; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_nxv8i64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
-; RV64-NEXT:    vsll.vi v16, v16, 3
 ; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsll.vi v16, v16, 3
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %idxs
@@ -1380,19 +1350,19 @@ define void @vpscatter_nxv8f16(<vscale x 8 x half> %val, <vscale x 8 x ptr> %ptr
 define void @vpscatter_baseidx_nxv8i8_nxv8f16(<vscale x 8 x half> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_nxv8i8_nxv8f16:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf4 v12, v10
 ; RV32-NEXT:    vadd.vv v12, v12, v12
-; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
+; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_nxv8i8_nxv8f16:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf8 v16, v10
 ; RV64-NEXT:    vadd.vv v16, v16, v16
-; RV64-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
+; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds half, ptr %base, <vscale x 8 x i8> %idxs
@@ -1403,19 +1373,19 @@ define void @vpscatter_baseidx_nxv8i8_nxv8f16(<vscale x 8 x half> %val, ptr %bas
 define void @vpscatter_baseidx_sext_nxv8i8_nxv8f16(<vscale x 8 x half> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8f16:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf4 v12, v10
 ; RV32-NEXT:    vadd.vv v12, v12, v12
-; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
+; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8f16:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf8 v16, v10
 ; RV64-NEXT:    vadd.vv v16, v16, v16
-; RV64-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
+; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
@@ -1427,17 +1397,17 @@ define void @vpscatter_baseidx_sext_nxv8i8_nxv8f16(<vscale x 8 x half> %val, ptr
 define void @vpscatter_baseidx_zext_nxv8i8_nxv8f16(<vscale x 8 x half> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8f16:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e8, m1, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
 ; RV32-NEXT:    vwaddu.vv v12, v10, v10
-; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
+; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; RV32-NEXT:    vsoxei16.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8f16:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e8, m1, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
 ; RV64-NEXT:    vwaddu.vv v12, v10, v10
-; RV64-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
+; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; RV64-NEXT:    vsoxei16.v v8, (a0), v12, v0.t
 ; RV64-NEXT:    ret
   %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
@@ -1449,18 +1419,17 @@ define void @vpscatter_baseidx_zext_nxv8i8_nxv8f16(<vscale x 8 x half> %val, ptr
 define void @vpscatter_baseidx_nxv8f16(<vscale x 8 x half> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_nxv8f16:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
-; RV32-NEXT:    vwadd.vv v12, v10, v10
 ; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
+; RV32-NEXT:    vwadd.vv v12, v10, v10
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_nxv8f16:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf4 v16, v10
 ; RV64-NEXT:    vadd.vv v16, v16, v16
-; RV64-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
+; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds half, ptr %base, <vscale x 8 x i16> %idxs
@@ -1559,19 +1528,18 @@ define void @vpscatter_nxv8f32(<vscale x 8 x float> %val, <vscale x 8 x ptr> %pt
 define void @vpscatter_baseidx_nxv8i8_nxv8f32(<vscale x 8 x float> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_nxv8i8_nxv8f32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf4 v16, v12
 ; RV32-NEXT:    vsll.vi v12, v16, 2
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_nxv8i8_nxv8f32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf8 v16, v12
 ; RV64-NEXT:    vsll.vi v16, v16, 2
-; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i8> %idxs
@@ -1582,19 +1550,18 @@ define void @vpscatter_baseidx_nxv8i8_nxv8f32(<vscale x 8 x float> %val, ptr %ba
 define void @vpscatter_baseidx_sext_nxv8i8_nxv8f32(<vscale x 8 x float> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8f32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf4 v16, v12
 ; RV32-NEXT:    vsll.vi v12, v16, 2
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8f32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf8 v16, v12
 ; RV64-NEXT:    vsll.vi v16, v16, 2
-; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i32>
@@ -1606,19 +1573,19 @@ define void @vpscatter_baseidx_sext_nxv8i8_nxv8f32(<vscale x 8 x float> %val, pt
 define void @vpscatter_baseidx_zext_nxv8i8_nxv8f32(<vscale x 8 x float> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8f32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
 ; RV32-NEXT:    vzext.vf2 v14, v12
 ; RV32-NEXT:    vsll.vi v12, v14, 2
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vsoxei16.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8f32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
 ; RV64-NEXT:    vzext.vf2 v14, v12
 ; RV64-NEXT:    vsll.vi v12, v14, 2
-; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; RV64-NEXT:    vsoxei16.v v8, (a0), v12, v0.t
 ; RV64-NEXT:    ret
   %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i32>
@@ -1630,19 +1597,18 @@ define void @vpscatter_baseidx_zext_nxv8i8_nxv8f32(<vscale x 8 x float> %val, pt
 define void @vpscatter_baseidx_nxv8i16_nxv8f32(<vscale x 8 x float> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_nxv8i16_nxv8f32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf2 v16, v12
 ; RV32-NEXT:    vsll.vi v12, v16, 2
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_nxv8i16_nxv8f32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf4 v16, v12
 ; RV64-NEXT:    vsll.vi v16, v16, 2
-; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i16> %idxs
@@ -1653,19 +1619,18 @@ define void @vpscatter_baseidx_nxv8i16_nxv8f32(<vscale x 8 x float> %val, ptr %b
 define void @vpscatter_baseidx_sext_nxv8i16_nxv8f32(<vscale x 8 x float> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_sext_nxv8i16_nxv8f32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf2 v16, v12
 ; RV32-NEXT:    vsll.vi v12, v16, 2
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_sext_nxv8i16_nxv8f32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf4 v16, v12
 ; RV64-NEXT:    vsll.vi v16, v16, 2
-; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i32>
@@ -1677,19 +1642,17 @@ define void @vpscatter_baseidx_sext_nxv8i16_nxv8f32(<vscale x 8 x float> %val, p
 define void @vpscatter_baseidx_zext_nxv8i16_nxv8f32(<vscale x 8 x float> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_zext_nxv8i16_nxv8f32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vzext.vf2 v16, v12
 ; RV32-NEXT:    vsll.vi v12, v16, 2
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_zext_nxv8i16_nxv8f32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV64-NEXT:    vzext.vf2 v16, v12
 ; RV64-NEXT:    vsll.vi v12, v16, 2
-; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV64-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
 ; RV64-NEXT:    ret
   %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i32>
@@ -1701,18 +1664,17 @@ define void @vpscatter_baseidx_zext_nxv8i16_nxv8f32(<vscale x 8 x float> %val, p
 define void @vpscatter_baseidx_nxv8f32(<vscale x 8 x float> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_nxv8f32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
-; RV32-NEXT:    vsll.vi v12, v12, 2
 ; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vsll.vi v12, v12, 2
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_nxv8f32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf2 v16, v12
 ; RV64-NEXT:    vsll.vi v16, v16, 2
-; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i32> %idxs
@@ -1811,19 +1773,18 @@ define void @vpscatter_nxv6f64(<vscale x 6 x double> %val, <vscale x 6 x ptr> %p
 define void @vpscatter_baseidx_nxv6i8_nxv6f64(<vscale x 6 x double> %val, ptr %base, <vscale x 6 x i8> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_nxv6i8_nxv6f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf4 v20, v16
 ; RV32-NEXT:    vsll.vi v16, v20, 3
-; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_nxv6i8_nxv6f64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf8 v24, v16
 ; RV64-NEXT:    vsll.vi v16, v24, 3
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds double, ptr %base, <vscale x 6 x i8> %idxs
@@ -1834,19 +1795,18 @@ define void @vpscatter_baseidx_nxv6i8_nxv6f64(<vscale x 6 x double> %val, ptr %b
 define void @vpscatter_baseidx_sext_nxv6i8_nxv6f64(<vscale x 6 x double> %val, ptr %base, <vscale x 6 x i8> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_sext_nxv6i8_nxv6f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf4 v20, v16
 ; RV32-NEXT:    vsll.vi v16, v20, 3
-; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_sext_nxv6i8_nxv6f64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf8 v24, v16
 ; RV64-NEXT:    vsll.vi v16, v24, 3
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %eidxs = sext <vscale x 6 x i8> %idxs to <vscale x 6 x i64>
@@ -1858,19 +1818,19 @@ define void @vpscatter_baseidx_sext_nxv6i8_nxv6f64(<vscale x 6 x double> %val, p
 define void @vpscatter_baseidx_zext_nxv6i8_nxv6f64(<vscale x 6 x double> %val, ptr %base, <vscale x 6 x i8> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_zext_nxv6i8_nxv6f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
 ; RV32-NEXT:    vzext.vf2 v18, v16
 ; RV32-NEXT:    vsll.vi v16, v18, 3
-; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; RV32-NEXT:    vsoxei16.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_zext_nxv6i8_nxv6f64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
 ; RV64-NEXT:    vzext.vf2 v18, v16
 ; RV64-NEXT:    vsll.vi v16, v18, 3
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vsoxei16.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %eidxs = zext <vscale x 6 x i8> %idxs to <vscale x 6 x i64>
@@ -1882,19 +1842,18 @@ define void @vpscatter_baseidx_zext_nxv6i8_nxv6f64(<vscale x 6 x double> %val, p
 define void @vpscatter_baseidx_nxv6i16_nxv6f64(<vscale x 6 x double> %val, ptr %base, <vscale x 6 x i16> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_nxv6i16_nxv6f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf2 v20, v16
 ; RV32-NEXT:    vsll.vi v16, v20, 3
-; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_nxv6i16_nxv6f64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf4 v24, v16
 ; RV64-NEXT:    vsll.vi v16, v24, 3
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds double, ptr %base, <vscale x 6 x i16> %idxs
@@ -1905,19 +1864,18 @@ define void @vpscatter_baseidx_nxv6i16_nxv6f64(<vscale x 6 x double> %val, ptr %
 define void @vpscatter_baseidx_sext_nxv6i16_nxv6f64(<vscale x 6 x double> %val, ptr %base, <vscale x 6 x i16> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_sext_nxv6i16_nxv6f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf2 v20, v16
 ; RV32-NEXT:    vsll.vi v16, v20, 3
-; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_sext_nxv6i16_nxv6f64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf4 v24, v16
 ; RV64-NEXT:    vsll.vi v16, v24, 3
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %eidxs = sext <vscale x 6 x i16> %idxs to <vscale x 6 x i64>
@@ -1929,19 +1887,19 @@ define void @vpscatter_baseidx_sext_nxv6i16_nxv6f64(<vscale x 6 x double> %val,
 define void @vpscatter_baseidx_zext_nxv6i16_nxv6f64(<vscale x 6 x double> %val, ptr %base, <vscale x 6 x i16> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_zext_nxv6i16_nxv6f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vzext.vf2 v20, v16
 ; RV32-NEXT:    vsll.vi v16, v20, 3
-; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_zext_nxv6i16_nxv6f64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV64-NEXT:    vzext.vf2 v20, v16
 ; RV64-NEXT:    vsll.vi v16, v20, 3
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %eidxs = zext <vscale x 6 x i16> %idxs to <vscale x 6 x i64>
@@ -1953,18 +1911,17 @@ define void @vpscatter_baseidx_zext_nxv6i16_nxv6f64(<vscale x 6 x double> %val,
 define void @vpscatter_baseidx_nxv6i32_nxv6f64(<vscale x 6 x double> %val, ptr %base, <vscale x 6 x i32> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_nxv6i32_nxv6f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsll.vi v16, v16, 3
-; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_nxv6i32_nxv6f64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf2 v24, v16
 ; RV64-NEXT:    vsll.vi v16, v24, 3
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds double, ptr %base, <vscale x 6 x i32> %idxs
@@ -1975,18 +1932,17 @@ define void @vpscatter_baseidx_nxv6i32_nxv6f64(<vscale x 6 x double> %val, ptr %
 define void @vpscatter_baseidx_sext_nxv6i32_nxv6f64(<vscale x 6 x double> %val, ptr %base, <vscale x 6 x i32> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_sext_nxv6i32_nxv6f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsll.vi v16, v16, 3
-; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_sext_nxv6i32_nxv6f64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf2 v24, v16
 ; RV64-NEXT:    vsll.vi v16, v24, 3
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %eidxs = sext <vscale x 6 x i32> %idxs to <vscale x 6 x i64>
@@ -1998,18 +1954,17 @@ define void @vpscatter_baseidx_sext_nxv6i32_nxv6f64(<vscale x 6 x double> %val,
 define void @vpscatter_baseidx_zext_nxv6i32_nxv6f64(<vscale x 6 x double> %val, ptr %base, <vscale x 6 x i32> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_zext_nxv6i32_nxv6f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsll.vi v16, v16, 3
-; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_zext_nxv6i32_nxv6f64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vzext.vf2 v24, v16
 ; RV64-NEXT:    vsll.vi v16, v24, 3
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %eidxs = zext <vscale x 6 x i32> %idxs to <vscale x 6 x i64>
@@ -2021,18 +1976,17 @@ define void @vpscatter_baseidx_zext_nxv6i32_nxv6f64(<vscale x 6 x double> %val,
 define void @vpscatter_baseidx_nxv6f64(<vscale x 6 x double> %val, ptr %base, <vscale x 6 x i64> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_nxv6f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vnsrl.wi v24, v16, 0
 ; RV32-NEXT:    vsll.vi v16, v24, 3
-; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_nxv6f64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
-; RV64-NEXT:    vsll.vi v16, v16, 3
 ; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsll.vi v16, v16, 3
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds double, ptr %base, <vscale x 6 x i64> %idxs
@@ -2061,19 +2015,18 @@ define void @vpscatter_nxv8f64(<vscale x 8 x double> %val, <vscale x 8 x ptr> %p
 define void @vpscatter_baseidx_nxv8i8_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_nxv8i8_nxv8f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf4 v20, v16
 ; RV32-NEXT:    vsll.vi v16, v20, 3
-; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_nxv8i8_nxv8f64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf8 v24, v16
 ; RV64-NEXT:    vsll.vi v16, v24, 3
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i8> %idxs
@@ -2084,19 +2037,18 @@ define void @vpscatter_baseidx_nxv8i8_nxv8f64(<vscale x 8 x double> %val, ptr %b
 define void @vpscatter_baseidx_sext_nxv8i8_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf4 v20, v16
 ; RV32-NEXT:    vsll.vi v16, v20, 3
-; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8f64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf8 v24, v16
 ; RV64-NEXT:    vsll.vi v16, v24, 3
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i64>
@@ -2108,19 +2060,19 @@ define void @vpscatter_baseidx_sext_nxv8i8_nxv8f64(<vscale x 8 x double> %val, p
 define void @vpscatter_baseidx_zext_nxv8i8_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
 ; RV32-NEXT:    vzext.vf2 v18, v16
 ; RV32-NEXT:    vsll.vi v16, v18, 3
-; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; RV32-NEXT:    vsoxei16.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8f64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
 ; RV64-NEXT:    vzext.vf2 v18, v16
 ; RV64-NEXT:    vsll.vi v16, v18, 3
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vsoxei16.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i64>
@@ -2132,19 +2084,18 @@ define void @vpscatter_baseidx_zext_nxv8i8_nxv8f64(<vscale x 8 x double> %val, p
 define void @vpscatter_baseidx_nxv8i16_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_nxv8i16_nxv8f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf2 v20, v16
 ; RV32-NEXT:    vsll.vi v16, v20, 3
-; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_nxv8i16_nxv8f64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf4 v24, v16
 ; RV64-NEXT:    vsll.vi v16, v24, 3
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i16> %idxs
@@ -2155,19 +2106,18 @@ define void @vpscatter_baseidx_nxv8i16_nxv8f64(<vscale x 8 x double> %val, ptr %
 define void @vpscatter_baseidx_sext_nxv8i16_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_sext_nxv8i16_nxv8f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf2 v20, v16
 ; RV32-NEXT:    vsll.vi v16, v20, 3
-; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_sext_nxv8i16_nxv8f64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf4 v24, v16
 ; RV64-NEXT:    vsll.vi v16, v24, 3
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i64>
@@ -2179,19 +2129,19 @@ define void @vpscatter_baseidx_sext_nxv8i16_nxv8f64(<vscale x 8 x double> %val,
 define void @vpscatter_baseidx_zext_nxv8i16_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_zext_nxv8i16_nxv8f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vzext.vf2 v20, v16
 ; RV32-NEXT:    vsll.vi v16, v20, 3
-; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_zext_nxv8i16_nxv8f64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV64-NEXT:    vzext.vf2 v20, v16
 ; RV64-NEXT:    vsll.vi v16, v20, 3
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i64>
@@ -2203,18 +2153,17 @@ define void @vpscatter_baseidx_zext_nxv8i16_nxv8f64(<vscale x 8 x double> %val,
 define void @vpscatter_baseidx_nxv8i32_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_nxv8i32_nxv8f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsll.vi v16, v16, 3
-; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_nxv8i32_nxv8f64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf2 v24, v16
 ; RV64-NEXT:    vsll.vi v16, v24, 3
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i32> %idxs
@@ -2225,18 +2174,17 @@ define void @vpscatter_baseidx_nxv8i32_nxv8f64(<vscale x 8 x double> %val, ptr %
 define void @vpscatter_baseidx_sext_nxv8i32_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_sext_nxv8i32_nxv8f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsll.vi v16, v16, 3
-; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_sext_nxv8i32_nxv8f64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf2 v24, v16
 ; RV64-NEXT:    vsll.vi v16, v24, 3
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %eidxs = sext <vscale x 8 x i32> %idxs to <vscale x 8 x i64>
@@ -2248,18 +2196,17 @@ define void @vpscatter_baseidx_sext_nxv8i32_nxv8f64(<vscale x 8 x double> %val,
 define void @vpscatter_baseidx_zext_nxv8i32_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_zext_nxv8i32_nxv8f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsll.vi v16, v16, 3
-; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_zext_nxv8i32_nxv8f64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vzext.vf2 v24, v16
 ; RV64-NEXT:    vsll.vi v16, v24, 3
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %eidxs = zext <vscale x 8 x i32> %idxs to <vscale x 8 x i64>
@@ -2271,18 +2218,17 @@ define void @vpscatter_baseidx_zext_nxv8i32_nxv8f64(<vscale x 8 x double> %val,
 define void @vpscatter_baseidx_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i64> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_nxv8f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vnsrl.wi v24, v16, 0
 ; RV32-NEXT:    vsll.vi v16, v24, 3
-; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_nxv8f64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
-; RV64-NEXT:    vsll.vi v16, v16, 3
 ; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsll.vi v16, v16, 3
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %idxs
diff --git a/llvm/test/CodeGen/RISCV/rvv/vrem-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vrem-vp.ll
index 2ef96f4b3896fc..9304e8c58f90db 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vrem-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vrem-vp.ll
@@ -12,9 +12,7 @@ define <vscale x 8 x i7> @vrem_vx_nxv8i7(<vscale x 8 x i7> %a, i7 signext %b, <v
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
 ; CHECK-NEXT:    vsll.vi v8, v8, 1, v0.t
 ; CHECK-NEXT:    vsra.vi v8, v8, 1, v0.t
-; CHECK-NEXT:    vsetvli a2, zero, e8, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
-; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
 ; CHECK-NEXT:    vsll.vi v9, v9, 1, v0.t
 ; CHECK-NEXT:    vsra.vi v9, v9, 1, v0.t
 ; CHECK-NEXT:    vrem.vv v8, v8, v9, v0.t
diff --git a/llvm/test/CodeGen/RISCV/rvv/vremu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vremu-vp.ll
index 1f1ed4a1269acb..1d4ee06cb9ac85 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vremu-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vremu-vp.ll
@@ -12,9 +12,7 @@ define <vscale x 8 x i7> @vremu_vx_nxv8i7(<vscale x 8 x i7> %a, i7 signext %b, <
 ; CHECK-NEXT:    li a2, 127
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
 ; CHECK-NEXT:    vand.vx v8, v8, a2, v0.t
-; CHECK-NEXT:    vsetvli a3, zero, e8, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
-; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
 ; CHECK-NEXT:    vand.vx v9, v9, a2, v0.t
 ; CHECK-NEXT:    vremu.vv v8, v8, v9, v0.t
 ; CHECK-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll
index 027c81180d5f19..f28153d427ebfe 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll
@@ -1125,7 +1125,6 @@ exit:
 define <vscale x 4 x i32> @clobbered_forwarded_avl(i64 %n, <vscale x 4 x i32> %v, i1 %cmp) {
 ; CHECK-LABEL: clobbered_forwarded_avl:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    mv a2, a0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
 ; CHECK-NEXT:    andi a1, a1, 1
 ; CHECK-NEXT:  .LBB27_1: # %for.body
@@ -1133,9 +1132,7 @@ define <vscale x 4 x i32> @clobbered_forwarded_avl(i64 %n, <vscale x 4 x i32> %v
 ; CHECK-NEXT:    addi a0, a0, 1
 ; CHECK-NEXT:    bnez a1, .LBB27_1
 ; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
-; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vadd.vv v10, v8, v8
-; CHECK-NEXT:    vsetvli zero, a2, e32, m2, ta, ma
 ; CHECK-NEXT:    vadd.vv v8, v10, v8
 ; CHECK-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vshl-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vshl-vp.ll
index 380835494ed17d..f5c46aec86b864 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vshl-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vshl-vp.ll
@@ -9,10 +9,9 @@ declare <vscale x 8 x i7> @llvm.vp.shl.nxv8i7(<vscale x 8 x i7>, <vscale x 8 x i
 define <vscale x 8 x i7> @vsll_vx_nxv8i7(<vscale x 8 x i7> %a, i7 signext %b, <vscale x 8 x i1> %mask, i32 zeroext %evl) {
 ; CHECK-LABEL: vsll_vx_nxv8i7:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a2, zero, e8, m1, ta, ma
+; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
 ; CHECK-NEXT:    li a0, 127
-; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
 ; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
 ; CHECK-NEXT:    vsll.vv v8, v8, v9, v0.t
 ; CHECK-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsra-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vsra-sdnode.ll
index 382c8297473b78..68ea51845392c9 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsra-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsra-sdnode.ll
@@ -944,9 +944,9 @@ define <vscale x 1 x i8> @vsra_vv_nxv1i8_sext_zext_mixed_trunc(<vscale x 1 x i8>
 ; CHECK-NEXT:    vsext.vf4 v9, v8
 ; CHECK-NEXT:    vzext.vf4 v10, v8
 ; CHECK-NEXT:    vsra.vv v8, v9, v10
-; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; CHECK-NEXT:    vnsrl.wi v8, v8, 0
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
 ; CHECK-NEXT:    vnsrl.wi v8, v8, 0, v0.t
 ; CHECK-NEXT:    ret
   %sexted_va = sext <vscale x 1 x i8> %va to <vscale x 1 x i32>
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsra-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vsra-vp.ll
index cff8cc710d21f3..ecce91982b14a4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsra-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsra-vp.ll
@@ -12,10 +12,8 @@ define <vscale x 8 x i7> @vsra_vx_nxv8i7(<vscale x 8 x i7> %a, i7 signext %b, <v
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
 ; CHECK-NEXT:    vsll.vi v8, v8, 1, v0.t
 ; CHECK-NEXT:    vsra.vi v8, v8, 1, v0.t
-; CHECK-NEXT:    vsetvli a2, zero, e8, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
 ; CHECK-NEXT:    li a0, 127
-; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
 ; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
 ; CHECK-NEXT:    vsra.vv v8, v8, v9, v0.t
 ; CHECK-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsrl-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vsrl-vp.ll
index ff6771b643031f..9431b9abc91fbf 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsrl-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsrl-vp.ll
@@ -12,9 +12,7 @@ define <vscale x 8 x i7> @vsrl_vx_nxv8i7(<vscale x 8 x i7> %a, i7 signext %b, <v
 ; CHECK-NEXT:    li a2, 127
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
 ; CHECK-NEXT:    vand.vx v8, v8, a2, v0.t
-; CHECK-NEXT:    vsetvli a3, zero, e8, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
-; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
 ; CHECK-NEXT:    vand.vx v9, v9, a2, v0.t
 ; CHECK-NEXT:    vsrl.vv v8, v8, v9, v0.t
 ; CHECK-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/vssub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vssub-vp.ll
index 613b58b0f1b88a..bab66aad8c7e2a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vssub-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vssub-vp.ll
@@ -62,9 +62,8 @@ define <vscale x 1 x i8> @vssub_vx_nxv1i8(<vscale x 1 x i8> %va, i8 %b, <vscale
 define <vscale x 1 x i8> @vssub_vx_nxv1i8_commute(<vscale x 1 x i8> %va, i8 %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vssub_vx_nxv1i8_commute:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a2, zero, e8, mf8, ta, ma
-; CHECK-NEXT:    vmv.v.x v9, a0
 ; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT:    vmv.v.x v9, a0
 ; CHECK-NEXT:    vssub.vv v8, v9, v8, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x i8> poison, i8 %b, i32 0
diff --git a/llvm/test/CodeGen/RISCV/rvv/vssubu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vssubu-vp.ll
index 8c729d7d9bfb6e..ca56145260f51c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vssubu-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vssubu-vp.ll
@@ -60,9 +60,8 @@ define <vscale x 1 x i8> @vssubu_vx_nxv1i8(<vscale x 1 x i8> %va, i8 %b, <vscale
 define <vscale x 1 x i8> @vssubu_vx_nxv1i8_commute(<vscale x 1 x i8> %va, i8 %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vssubu_vx_nxv1i8_commute:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a2, zero, e8, mf8, ta, ma
-; CHECK-NEXT:    vmv.v.x v9, a0
 ; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT:    vmv.v.x v9, a0
 ; CHECK-NEXT:    vssubu.vv v8, v9, v8, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x i8> poison, i8 %b, i32 0
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwsll-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vwsll-vp.ll
index bb3076b3a945e8..c30c4763dd46d5 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwsll-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwsll-vp.ll
@@ -13,10 +13,9 @@ declare <vscale x 2 x i64> @llvm.vp.shl.nxv2i64(<vscale x 2 x i64>, <vscale x 2
 define <vscale x 2 x i64> @vwsll_vv_nxv2i64_sext(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b, <vscale x 2 x i1> %m, i32 zeroext %vl) {
 ; CHECK-LABEL: vwsll_vv_nxv2i64_sext:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a1, zero, e64, m2, ta, ma
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v10, v8
 ; CHECK-NEXT:    vsext.vf2 v12, v9
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
 ; CHECK-NEXT:    vsll.vv v8, v10, v12, v0.t
 ; CHECK-NEXT:    ret
 ;
@@ -35,10 +34,9 @@ define <vscale x 2 x i64> @vwsll_vv_nxv2i64_sext(<vscale x 2 x i32> %a, <vscale
 define <vscale x 2 x i64> @vwsll_vv_nxv2i64_zext(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b, <vscale x 2 x i1> %m, i32 zeroext %vl) {
 ; CHECK-LABEL: vwsll_vv_nxv2i64_zext:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a1, zero, e64, m2, ta, ma
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v10, v8
 ; CHECK-NEXT:    vzext.vf2 v12, v9
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
 ; CHECK-NEXT:    vsll.vv v8, v10, v12, v0.t
 ; CHECK-NEXT:    ret
 ;
@@ -57,17 +55,15 @@ define <vscale x 2 x i64> @vwsll_vv_nxv2i64_zext(<vscale x 2 x i32> %a, <vscale
 define <vscale x 2 x i64> @vwsll_vx_i64_nxv2i64(<vscale x 2 x i32> %a, i64 %b, <vscale x 2 x i1> %m, i32 zeroext %vl) {
 ; CHECK-RV32-LABEL: vwsll_vx_i64_nxv2i64:
 ; CHECK-RV32:       # %bb.0:
-; CHECK-RV32-NEXT:    vsetvli a1, zero, e64, m2, ta, ma
-; CHECK-RV32-NEXT:    vzext.vf2 v10, v8
 ; CHECK-RV32-NEXT:    vsetvli zero, a2, e64, m2, ta, ma
+; CHECK-RV32-NEXT:    vzext.vf2 v10, v8
 ; CHECK-RV32-NEXT:    vsll.vx v8, v10, a0, v0.t
 ; CHECK-RV32-NEXT:    ret
 ;
 ; CHECK-RV64-LABEL: vwsll_vx_i64_nxv2i64:
 ; CHECK-RV64:       # %bb.0:
-; CHECK-RV64-NEXT:    vsetvli a2, zero, e64, m2, ta, ma
-; CHECK-RV64-NEXT:    vzext.vf2 v10, v8
 ; CHECK-RV64-NEXT:    vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-RV64-NEXT:    vzext.vf2 v10, v8
 ; CHECK-RV64-NEXT:    vsll.vx v8, v10, a0, v0.t
 ; CHECK-RV64-NEXT:    ret
 ;
@@ -94,12 +90,11 @@ define <vscale x 2 x i64> @vwsll_vx_i64_nxv2i64(<vscale x 2 x i32> %a, i64 %b, <
 define <vscale x 2 x i64> @vwsll_vx_i32_nxv2i64_sext(<vscale x 2 x i32> %a, i32 %b, <vscale x 2 x i1> %m, i32 zeroext %vl) {
 ; CHECK-LABEL: vwsll_vx_i32_nxv2i64_sext:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v10, v8
 ; CHECK-NEXT:    vsext.vf2 v12, v9
-; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, ma
 ; CHECK-NEXT:    vsll.vv v8, v10, v12, v0.t
 ; CHECK-NEXT:    ret
 ;
@@ -120,12 +115,11 @@ define <vscale x 2 x i64> @vwsll_vx_i32_nxv2i64_sext(<vscale x 2 x i32> %a, i32
 define <vscale x 2 x i64> @vwsll_vx_i32_nxv2i64_zext(<vscale x 2 x i32> %a, i32 %b, <vscale x 2 x i1> %m, i32 zeroext %vl) {
 ; CHECK-LABEL: vwsll_vx_i32_nxv2i64_zext:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v10, v8
 ; CHECK-NEXT:    vzext.vf2 v12, v9
-; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, ma
 ; CHECK-NEXT:    vsll.vv v8, v10, v12, v0.t
 ; CHECK-NEXT:    ret
 ;
@@ -146,12 +140,11 @@ define <vscale x 2 x i64> @vwsll_vx_i32_nxv2i64_zext(<vscale x 2 x i32> %a, i32
 define <vscale x 2 x i64> @vwsll_vx_i16_nxv2i64_sext(<vscale x 2 x i32> %a, i16 %b, <vscale x 2 x i1> %m, i32 zeroext %vl) {
 ; CHECK-LABEL: vwsll_vx_i16_nxv2i64_sext:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a2, zero, e16, mf2, ta, ma
+; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v10, v8
 ; CHECK-NEXT:    vsext.vf4 v12, v9
-; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, ma
 ; CHECK-NEXT:    vsll.vv v8, v10, v12, v0.t
 ; CHECK-NEXT:    ret
 ;
@@ -172,12 +165,11 @@ define <vscale x 2 x i64> @vwsll_vx_i16_nxv2i64_sext(<vscale x 2 x i32> %a, i16
 define <vscale x 2 x i64> @vwsll_vx_i16_nxv2i64_zext(<vscale x 2 x i32> %a, i16 %b, <vscale x 2 x i1> %m, i32 zeroext %vl) {
 ; CHECK-LABEL: vwsll_vx_i16_nxv2i64_zext:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a2, zero, e16, mf2, ta, ma
+; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v10, v8
 ; CHECK-NEXT:    vzext.vf4 v12, v9
-; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, ma
 ; CHECK-NEXT:    vsll.vv v8, v10, v12, v0.t
 ; CHECK-NEXT:    ret
 ;
@@ -198,12 +190,11 @@ define <vscale x 2 x i64> @vwsll_vx_i16_nxv2i64_zext(<vscale x 2 x i32> %a, i16
 define <vscale x 2 x i64> @vwsll_vx_i8_nxv2i64_sext(<vscale x 2 x i32> %a, i8 %b, <vscale x 2 x i1> %m, i32 zeroext %vl) {
 ; CHECK-LABEL: vwsll_vx_i8_nxv2i64_sext:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a2, zero, e8, mf4, ta, ma
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v10, v8
 ; CHECK-NEXT:    vsext.vf8 v12, v9
-; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, ma
 ; CHECK-NEXT:    vsll.vv v8, v10, v12, v0.t
 ; CHECK-NEXT:    ret
 ;
@@ -224,12 +215,11 @@ define <vscale x 2 x i64> @vwsll_vx_i8_nxv2i64_sext(<vscale x 2 x i32> %a, i8 %b
 define <vscale x 2 x i64> @vwsll_vx_i8_nxv2i64_zext(<vscale x 2 x i32> %a, i8 %b, <vscale x 2 x i1> %m, i32 zeroext %vl) {
 ; CHECK-LABEL: vwsll_vx_i8_nxv2i64_zext:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a2, zero, e8, mf4, ta, ma
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v10, v8
 ; CHECK-NEXT:    vzext.vf8 v12, v9
-; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, ma
 ; CHECK-NEXT:    vsll.vv v8, v10, v12, v0.t
 ; CHECK-NEXT:    ret
 ;
@@ -250,9 +240,8 @@ define <vscale x 2 x i64> @vwsll_vx_i8_nxv2i64_zext(<vscale x 2 x i32> %a, i8 %b
 define <vscale x 2 x i64> @vwsll_vi_nxv2i64(<vscale x 2 x i32> %a, <vscale x 2 x i1> %m, i32 zeroext %vl) {
 ; CHECK-LABEL: vwsll_vi_nxv2i64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a1, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vzext.vf2 v10, v8
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-NEXT:    vzext.vf2 v10, v8
 ; CHECK-NEXT:    vsll.vi v8, v10, 2, v0.t
 ; CHECK-NEXT:    ret
 ;
@@ -276,10 +265,9 @@ declare <vscale x 4 x i32> @llvm.vp.shl.nxv4i32(<vscale x 4 x i32>, <vscale x 4
 define <vscale x 4 x i32> @vwsll_vv_nxv4i32_sext(<vscale x 4 x i16> %a, <vscale x 4 x i16> %b, <vscale x 4 x i1> %m, i32 zeroext %vl) {
 ; CHECK-LABEL: vwsll_vv_nxv4i32_sext:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v10, v8
 ; CHECK-NEXT:    vsext.vf2 v12, v9
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
 ; CHECK-NEXT:    vsll.vv v8, v10, v12, v0.t
 ; CHECK-NEXT:    ret
 ;
@@ -298,10 +286,9 @@ define <vscale x 4 x i32> @vwsll_vv_nxv4i32_sext(<vscale x 4 x i16> %a, <vscale
 define <vscale x 4 x i32> @vwsll_vv_nxv4i32_zext(<vscale x 4 x i16> %a, <vscale x 4 x i16> %b, <vscale x 4 x i1> %m, i32 zeroext %vl) {
 ; CHECK-LABEL: vwsll_vv_nxv4i32_zext:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v10, v8
 ; CHECK-NEXT:    vzext.vf2 v12, v9
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
 ; CHECK-NEXT:    vsll.vv v8, v10, v12, v0.t
 ; CHECK-NEXT:    ret
 ;
@@ -320,17 +307,15 @@ define <vscale x 4 x i32> @vwsll_vv_nxv4i32_zext(<vscale x 4 x i16> %a, <vscale
 define <vscale x 4 x i32> @vwsll_vx_i64_nxv4i32(<vscale x 4 x i16> %a, i64 %b, <vscale x 4 x i1> %m, i32 zeroext %vl) {
 ; CHECK-RV32-LABEL: vwsll_vx_i64_nxv4i32:
 ; CHECK-RV32:       # %bb.0:
-; CHECK-RV32-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
-; CHECK-RV32-NEXT:    vzext.vf2 v10, v8
 ; CHECK-RV32-NEXT:    vsetvli zero, a2, e32, m2, ta, ma
+; CHECK-RV32-NEXT:    vzext.vf2 v10, v8
 ; CHECK-RV32-NEXT:    vsll.vx v8, v10, a0, v0.t
 ; CHECK-RV32-NEXT:    ret
 ;
 ; CHECK-RV64-LABEL: vwsll_vx_i64_nxv4i32:
 ; CHECK-RV64:       # %bb.0:
-; CHECK-RV64-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
-; CHECK-RV64-NEXT:    vzext.vf2 v10, v8
 ; CHECK-RV64-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-RV64-NEXT:    vzext.vf2 v10, v8
 ; CHECK-RV64-NEXT:    vsll.vx v8, v10, a0, v0.t
 ; CHECK-RV64-NEXT:    ret
 ;
@@ -358,9 +343,8 @@ define <vscale x 4 x i32> @vwsll_vx_i64_nxv4i32(<vscale x 4 x i16> %a, i64 %b, <
 define <vscale x 4 x i32> @vwsll_vx_i32_nxv4i32(<vscale x 4 x i16> %a, i32 %b, <vscale x 4 x i1> %m, i32 zeroext %vl) {
 ; CHECK-LABEL: vwsll_vx_i32_nxv4i32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vzext.vf2 v10, v8
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-NEXT:    vzext.vf2 v10, v8
 ; CHECK-NEXT:    vsll.vx v8, v10, a0, v0.t
 ; CHECK-NEXT:    ret
 ;
@@ -380,12 +364,11 @@ define <vscale x 4 x i32> @vwsll_vx_i32_nxv4i32(<vscale x 4 x i16> %a, i32 %b, <
 define <vscale x 4 x i32> @vwsll_vx_i16_nxv4i32_sext(<vscale x 4 x i16> %a, i16 %b, <vscale x 4 x i1> %m, i32 zeroext %vl) {
 ; CHECK-LABEL: vwsll_vx_i16_nxv4i32_sext:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a2, zero, e16, m1, ta, ma
+; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v10, v8
 ; CHECK-NEXT:    vsext.vf2 v12, v9
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
 ; CHECK-NEXT:    vsll.vv v8, v10, v12, v0.t
 ; CHECK-NEXT:    ret
 ;
@@ -406,12 +389,11 @@ define <vscale x 4 x i32> @vwsll_vx_i16_nxv4i32_sext(<vscale x 4 x i16> %a, i16
 define <vscale x 4 x i32> @vwsll_vx_i16_nxv4i32_zext(<vscale x 4 x i16> %a, i16 %b, <vscale x 4 x i1> %m, i32 zeroext %vl) {
 ; CHECK-LABEL: vwsll_vx_i16_nxv4i32_zext:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a2, zero, e16, m1, ta, ma
+; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v10, v8
 ; CHECK-NEXT:    vzext.vf2 v12, v9
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
 ; CHECK-NEXT:    vsll.vv v8, v10, v12, v0.t
 ; CHECK-NEXT:    ret
 ;
@@ -432,12 +414,11 @@ define <vscale x 4 x i32> @vwsll_vx_i16_nxv4i32_zext(<vscale x 4 x i16> %a, i16
 define <vscale x 4 x i32> @vwsll_vx_i8_nxv4i32_sext(<vscale x 4 x i16> %a, i8 %b, <vscale x 4 x i1> %m, i32 zeroext %vl) {
 ; CHECK-LABEL: vwsll_vx_i8_nxv4i32_sext:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a2, zero, e8, mf2, ta, ma
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v10, v8
 ; CHECK-NEXT:    vsext.vf4 v12, v9
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
 ; CHECK-NEXT:    vsll.vv v8, v10, v12, v0.t
 ; CHECK-NEXT:    ret
 ;
@@ -458,12 +439,11 @@ define <vscale x 4 x i32> @vwsll_vx_i8_nxv4i32_sext(<vscale x 4 x i16> %a, i8 %b
 define <vscale x 4 x i32> @vwsll_vx_i8_nxv4i32_zext(<vscale x 4 x i16> %a, i8 %b, <vscale x 4 x i1> %m, i32 zeroext %vl) {
 ; CHECK-LABEL: vwsll_vx_i8_nxv4i32_zext:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a2, zero, e8, mf2, ta, ma
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v10, v8
 ; CHECK-NEXT:    vzext.vf4 v12, v9
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
 ; CHECK-NEXT:    vsll.vv v8, v10, v12, v0.t
 ; CHECK-NEXT:    ret
 ;
@@ -484,9 +464,8 @@ define <vscale x 4 x i32> @vwsll_vx_i8_nxv4i32_zext(<vscale x 4 x i16> %a, i8 %b
 define <vscale x 4 x i32> @vwsll_vi_nxv4i32(<vscale x 4 x i16> %a, <vscale x 4 x i1> %m, i32 zeroext %vl) {
 ; CHECK-LABEL: vwsll_vi_nxv4i32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vzext.vf2 v10, v8
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT:    vzext.vf2 v10, v8
 ; CHECK-NEXT:    vsll.vi v8, v10, 2, v0.t
 ; CHECK-NEXT:    ret
 ;
@@ -511,10 +490,9 @@ declare <vscale x 8 x i16> @llvm.vp.shl.nxv8i16(<vscale x 8 x i16>, <vscale x 8
 define <vscale x 8 x i16> @vwsll_vv_nxv8i16_sext(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b, <vscale x 8 x i1> %m, i32 zeroext %vl) {
 ; CHECK-LABEL: vwsll_vv_nxv8i16_sext:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v10, v8
 ; CHECK-NEXT:    vsext.vf2 v12, v9
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; CHECK-NEXT:    vsll.vv v8, v10, v12, v0.t
 ; CHECK-NEXT:    ret
 ;
@@ -533,10 +511,9 @@ define <vscale x 8 x i16> @vwsll_vv_nxv8i16_sext(<vscale x 8 x i8> %a, <vscale x
 define <vscale x 8 x i16> @vwsll_vv_nxv8i16_zext(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b, <vscale x 8 x i1> %m, i32 zeroext %vl) {
 ; CHECK-LABEL: vwsll_vv_nxv8i16_zext:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v10, v8
 ; CHECK-NEXT:    vzext.vf2 v12, v9
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; CHECK-NEXT:    vsll.vv v8, v10, v12, v0.t
 ; CHECK-NEXT:    ret
 ;
@@ -555,17 +532,15 @@ define <vscale x 8 x i16> @vwsll_vv_nxv8i16_zext(<vscale x 8 x i8> %a, <vscale x
 define <vscale x 8 x i16> @vwsll_vx_i64_nxv8i16(<vscale x 8 x i8> %a, i64 %b, <vscale x 8 x i1> %m, i32 zeroext %vl) {
 ; CHECK-RV32-LABEL: vwsll_vx_i64_nxv8i16:
 ; CHECK-RV32:       # %bb.0:
-; CHECK-RV32-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-RV32-NEXT:    vzext.vf2 v10, v8
 ; CHECK-RV32-NEXT:    vsetvli zero, a2, e16, m2, ta, ma
+; CHECK-RV32-NEXT:    vzext.vf2 v10, v8
 ; CHECK-RV32-NEXT:    vsll.vx v8, v10, a0, v0.t
 ; CHECK-RV32-NEXT:    ret
 ;
 ; CHECK-RV64-LABEL: vwsll_vx_i64_nxv8i16:
 ; CHECK-RV64:       # %bb.0:
-; CHECK-RV64-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
-; CHECK-RV64-NEXT:    vzext.vf2 v10, v8
 ; CHECK-RV64-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-RV64-NEXT:    vzext.vf2 v10, v8
 ; CHECK-RV64-NEXT:    vsll.vx v8, v10, a0, v0.t
 ; CHECK-RV64-NEXT:    ret
 ;
@@ -593,9 +568,8 @@ define <vscale x 8 x i16> @vwsll_vx_i64_nxv8i16(<vscale x 8 x i8> %a, i64 %b, <v
 define <vscale x 8 x i16> @vwsll_vx_i32_nxv8i16(<vscale x 8 x i8> %a, i32 %b, <vscale x 8 x i1> %m, i32 zeroext %vl) {
 ; CHECK-LABEL: vwsll_vx_i32_nxv8i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vzext.vf2 v10, v8
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-NEXT:    vzext.vf2 v10, v8
 ; CHECK-NEXT:    vsll.vx v8, v10, a0, v0.t
 ; CHECK-NEXT:    ret
 ;
@@ -616,9 +590,8 @@ define <vscale x 8 x i16> @vwsll_vx_i32_nxv8i16(<vscale x 8 x i8> %a, i32 %b, <v
 define <vscale x 8 x i16> @vwsll_vx_i16_nxv8i16(<vscale x 8 x i8> %a, i16 %b, <vscale x 8 x i1> %m, i32 zeroext %vl) {
 ; CHECK-LABEL: vwsll_vx_i16_nxv8i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vzext.vf2 v10, v8
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-NEXT:    vzext.vf2 v10, v8
 ; CHECK-NEXT:    vsll.vx v8, v10, a0, v0.t
 ; CHECK-NEXT:    ret
 ;
@@ -638,12 +611,11 @@ define <vscale x 8 x i16> @vwsll_vx_i16_nxv8i16(<vscale x 8 x i8> %a, i16 %b, <v
 define <vscale x 8 x i16> @vwsll_vx_i8_nxv8i16_sext(<vscale x 8 x i8> %a, i8 %b, <vscale x 8 x i1> %m, i32 zeroext %vl) {
 ; CHECK-LABEL: vwsll_vx_i8_nxv8i16_sext:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a2, zero, e8, m1, ta, ma
+; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v10, v8
 ; CHECK-NEXT:    vsext.vf2 v12, v9
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
 ; CHECK-NEXT:    vsll.vv v8, v10, v12, v0.t
 ; CHECK-NEXT:    ret
 ;
@@ -664,12 +636,11 @@ define <vscale x 8 x i16> @vwsll_vx_i8_nxv8i16_sext(<vscale x 8 x i8> %a, i8 %b,
 define <vscale x 8 x i16> @vwsll_vx_i8_nxv8i16_zext(<vscale x 8 x i8> %a, i8 %b, <vscale x 8 x i1> %m, i32 zeroext %vl) {
 ; CHECK-LABEL: vwsll_vx_i8_nxv8i16_zext:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a2, zero, e8, m1, ta, ma
+; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v10, v8
 ; CHECK-NEXT:    vzext.vf2 v12, v9
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
 ; CHECK-NEXT:    vsll.vv v8, v10, v12, v0.t
 ; CHECK-NEXT:    ret
 ;
@@ -690,9 +661,8 @@ define <vscale x 8 x i16> @vwsll_vx_i8_nxv8i16_zext(<vscale x 8 x i8> %a, i8 %b,
 define <vscale x 8 x i16> @vwsll_vi_nxv8i16(<vscale x 8 x i8> %a, <vscale x 8 x i1> %m, i32 zeroext %vl) {
 ; CHECK-LABEL: vwsll_vi_nxv8i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vzext.vf2 v10, v8
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT:    vzext.vf2 v10, v8
 ; CHECK-NEXT:    vsll.vi v8, v10, 2, v0.t
 ; CHECK-NEXT:    ret
 ;

>From e38ced5a159edcd2a41743704b0f462567fe2529 Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Fri, 13 Sep 2024 14:10:56 -0700
Subject: [PATCH 02/17] fixup! clang-format

---
 llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
index ae23d5dda5eb04..63e32d8e870e27 100644
--- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
@@ -39,7 +39,7 @@ class RISCVVLOptimizer : public MachineFunctionPass {
 public:
   static char ID;
 
-  RISCVVLOptimizer() : MachineFunctionPass(ID) { }
+  RISCVVLOptimizer() : MachineFunctionPass(ID) {}
 
   bool runOnMachineFunction(MachineFunction &MF) override;
 

>From 718404568829c47c63ef80daaa2da5b2189625bf Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Tue, 17 Sep 2024 09:30:58 -0700
Subject: [PATCH 03/17] fixup! respond to some comments

---
 llvm/lib/Target/RISCV/RISCVTargetMachine.cpp |   2 +-
 llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp   | 110 ++++++++------
 llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll       | 144 +++++++------------
 3 files changed, 114 insertions(+), 142 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
index 9273b305f1b8bf..45aa113de30958 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -103,7 +103,7 @@ static cl::opt<bool> EnableVSETVLIAfterRVVRegAlloc(
     cl::desc("Insert vsetvls after vector register allocation"),
     cl::init(true));
 
-static cl::opt<bool> EnableVLOptimizer("riscv-enable-vloptimizer",
+static cl::opt<bool> EnableVLOptimizer("riscv-enable-vl-optimizer",
                                        cl::desc("Enable the VL Optimizer pass"),
                                        cl::init(true), cl::Hidden);
 
diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
index 63e32d8e870e27..36910f818ea249 100644
--- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
@@ -29,6 +29,7 @@
 using namespace llvm;
 
 #define DEBUG_TYPE "riscv-vl-optimizer"
+#define PASS_NAME "RISC-V VL Optimizer"
 
 namespace {
 
@@ -49,7 +50,7 @@ class RISCVVLOptimizer : public MachineFunctionPass {
     MachineFunctionPass::getAnalysisUsage(AU);
   }
 
-  StringRef getPassName() const override { return "RISC-V VL Optimizer"; }
+  StringRef getPassName() const override { return PASS_NAME; }
 
 private:
   bool tryReduceVL(MachineInstr &MI);
@@ -59,11 +60,9 @@ class RISCVVLOptimizer : public MachineFunctionPass {
 } // end anonymous namespace
 
 char RISCVVLOptimizer::ID = 0;
-INITIALIZE_PASS_BEGIN(RISCVVLOptimizer, DEBUG_TYPE, "RISC-V VL Optimizer",
-                      false, false)
+INITIALIZE_PASS_BEGIN(RISCVVLOptimizer, DEBUG_TYPE, PASS_NAME, false, false)
 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
-INITIALIZE_PASS_END(RISCVVLOptimizer, DEBUG_TYPE, "RISC-V VL Optimizer", false,
-                    false)
+INITIALIZE_PASS_END(RISCVVLOptimizer, DEBUG_TYPE, PASS_NAME, false, false)
 
 FunctionPass *llvm::createRISCVVLOptimizerPass() {
   return new RISCVVLOptimizer();
@@ -74,10 +73,7 @@ static bool isVectorRegClass(Register R, const MachineRegisterInfo *MRI) {
   if (R.isPhysical())
     return RISCV::VRRegClass.contains(R);
   const TargetRegisterClass *RC = MRI->getRegClass(R);
-  return RISCV::VRRegClass.hasSubClassEq(RC) ||
-         RISCV::VRM2RegClass.hasSubClassEq(RC) ||
-         RISCV::VRM4RegClass.hasSubClassEq(RC) ||
-         RISCV::VRM8RegClass.hasSubClassEq(RC);
+  return RISCVRI::isVRegClass(RC->TSFlags);
 }
 
 /// Represents the EMUL and EEW of a MachineOperand.
@@ -134,6 +130,8 @@ static raw_ostream &operator<<(raw_ostream &OS, const OperandInfo &OI) {
   return OS;
 }
 
+namespace llvm {
+namespace RISCVVType {
 /// Return the RISCVII::VLMUL that is two times VLMul.
 /// Precondition: VLMul is not LMUL_RESERVED or LMUL_8.
 static RISCVII::VLMUL twoTimesVLMUL(RISCVII::VLMUL VLMul) {
@@ -198,6 +196,8 @@ getEMULEqualsEEWDivSEWTimesLMUL(unsigned Log2EEW, const MachineInstr &MI) {
   Denom = MILMULIsFractional ? Denom * MILMUL / GCD : Denom / GCD;
   return std::make_pair(Num > Denom ? Num : Denom, Denom > Num);
 }
+} // end namespace RISCVVType
+} // end namespace llvm
 
 static bool isOpN(const MachineOperand &MO, unsigned OpN) {
   const MachineInstr &MI = *MO.getParent();
@@ -229,7 +229,8 @@ static OperandInfo getIndexSegmentLoadStoreOperandInfo(unsigned Log2EEW,
   // v.*seg<nf>ei<eeew>.v
   // Index vector register group has EEW=<eew>, EMUL=(EEW/SEW)*LMUL.
   if (isOpN(MO, 2))
-    return OperandInfo(getEMULEqualsEEWDivSEWTimesLMUL(Log2EEW, MI), Log2EEW);
+    return OperandInfo(RISCVVType::getEMULEqualsEEWDivSEWTimesLMUL(Log2EEW, MI),
+                       Log2EEW);
 
   llvm_unreachable("Could not get OperandInfo for non-vector register of an "
                    "indexed segment load or store instruction");
@@ -251,7 +252,8 @@ static OperandInfo getIntegerExtensionOperandInfo(unsigned Factor,
   unsigned EEW = MISEW / Factor;
   unsigned Log2EEW = Log2_32(EEW);
 
-  return OperandInfo(getEMULEqualsEEWDivSEWTimesLMUL(Log2EEW, MI), Log2EEW);
+  return OperandInfo(RISCVVType::getEMULEqualsEEWDivSEWTimesLMUL(Log2EEW, MI),
+                     Log2EEW);
 }
 
 /// Check whether MO is a mask operand of MI.
@@ -296,7 +298,7 @@ static OperandInfo getOperandInfo(const MachineInstr &MI,
 
   // All mask operands have EEW=1, EMUL=(EEW/SEW)*LMUL
   if (isMaskOperand(MI, MO, MRI))
-    return OperandInfo(getEMULEqualsEEWDivSEWTimesLMUL(0, MI), 0);
+    return OperandInfo(RISCVVType::getEMULEqualsEEWDivSEWTimesLMUL(0, MI), 0);
 
   // switch against BaseInstr to reduce number of cases that need to be
   // considered.
@@ -322,25 +324,25 @@ static OperandInfo getOperandInfo(const MachineInstr &MI,
   case RISCV::VLSE8_V:
   case RISCV::VSSE8_V:
   case RISCV::VLE8FF_V:
-    return OperandInfo(getEMULEqualsEEWDivSEWTimesLMUL(3, MI), 3);
+    return OperandInfo(RISCVVType::getEMULEqualsEEWDivSEWTimesLMUL(3, MI), 3);
   case RISCV::VLE16_V:
   case RISCV::VSE16_V:
   case RISCV::VLSE16_V:
   case RISCV::VSSE16_V:
   case RISCV::VLE16FF_V:
-    return OperandInfo(getEMULEqualsEEWDivSEWTimesLMUL(4, MI), 4);
+    return OperandInfo(RISCVVType::getEMULEqualsEEWDivSEWTimesLMUL(4, MI), 4);
   case RISCV::VLE32_V:
   case RISCV::VSE32_V:
   case RISCV::VLSE32_V:
   case RISCV::VSSE32_V:
   case RISCV::VLE32FF_V:
-    return OperandInfo(getEMULEqualsEEWDivSEWTimesLMUL(5, MI), 5);
+    return OperandInfo(RISCVVType::getEMULEqualsEEWDivSEWTimesLMUL(5, MI), 5);
   case RISCV::VLE64_V:
   case RISCV::VSE64_V:
   case RISCV::VLSE64_V:
   case RISCV::VSSE64_V:
   case RISCV::VLE64FF_V:
-    return OperandInfo(getEMULEqualsEEWDivSEWTimesLMUL(6, MI), 6);
+    return OperandInfo(RISCVVType::getEMULEqualsEEWDivSEWTimesLMUL(6, MI), 6);
 
   // 7.6. Vector Indexed Instructions
   // Data EEW=SEW, EMUL=LMUL. Index EEW=<eew> and EMUL=(EEW/SEW)*LMUL
@@ -350,28 +352,28 @@ static OperandInfo getOperandInfo(const MachineInstr &MI,
   case RISCV::VSOXEI8_V:
     if (MO.getOperandNo() == 0)
       return OperandInfo(MIVLMul, MILog2SEW);
-    return OperandInfo(getEMULEqualsEEWDivSEWTimesLMUL(3, MI), 3);
+    return OperandInfo(RISCVVType::getEMULEqualsEEWDivSEWTimesLMUL(3, MI), 3);
   case RISCV::VLUXEI16_V:
   case RISCV::VLOXEI16_V:
   case RISCV::VSUXEI16_V:
   case RISCV::VSOXEI16_V:
     if (MO.getOperandNo() == 0)
       return OperandInfo(MIVLMul, MILog2SEW);
-    return OperandInfo(getEMULEqualsEEWDivSEWTimesLMUL(4, MI), 4);
+    return OperandInfo(RISCVVType::getEMULEqualsEEWDivSEWTimesLMUL(4, MI), 4);
   case RISCV::VLUXEI32_V:
   case RISCV::VLOXEI32_V:
   case RISCV::VSUXEI32_V:
   case RISCV::VSOXEI32_V:
     if (MO.getOperandNo() == 0)
       return OperandInfo(MIVLMul, MILog2SEW);
-    return OperandInfo(getEMULEqualsEEWDivSEWTimesLMUL(5, MI), 5);
+    return OperandInfo(RISCVVType::getEMULEqualsEEWDivSEWTimesLMUL(5, MI), 5);
   case RISCV::VLUXEI64_V:
   case RISCV::VLOXEI64_V:
   case RISCV::VSUXEI64_V:
   case RISCV::VSOXEI64_V:
     if (MO.getOperandNo() == 0)
       return OperandInfo(MIVLMul, MILog2SEW);
-    return OperandInfo(getEMULEqualsEEWDivSEWTimesLMUL(6, MI), 6);
+    return OperandInfo(RISCVVType::getEMULEqualsEEWDivSEWTimesLMUL(6, MI), 6);
 
   // 7.8. Vector Load/Store Segment Instructions
   // 7.8.1. Vector Unit-Stride Segment Loads and Stores
@@ -707,7 +709,8 @@ static OperandInfo getOperandInfo(const MachineInstr &MI,
   case RISCV::VWSUB_VX:
   case RISCV::VWSLL_VI: {
     unsigned Log2EEW = IsMODef ? MILog2SEW + 1 : MILog2SEW;
-    RISCVII::VLMUL EMUL = IsMODef ? twoTimesVLMUL(MIVLMul) : MIVLMul;
+    RISCVII::VLMUL EMUL =
+        IsMODef ? RISCVVType::twoTimesVLMUL(MIVLMul) : MIVLMul;
     return OperandInfo(EMUL, Log2EEW);
   }
   // Def and Op1 uses EEW=2*SEW and EMUL=2*LMUL. Op2 uses EEW=SEW and EMUL=LMUL
@@ -721,7 +724,8 @@ static OperandInfo getOperandInfo(const MachineInstr &MI,
   case RISCV::VWSUB_WX: {
     bool TwoTimes = IsMODef || IsOp1;
     unsigned Log2EEW = TwoTimes ? MILog2SEW + 1 : MILog2SEW;
-    RISCVII::VLMUL EMUL = TwoTimes ? twoTimesVLMUL(MIVLMul) : MIVLMul;
+    RISCVII::VLMUL EMUL =
+        TwoTimes ? RISCVVType::twoTimesVLMUL(MIVLMul) : MIVLMul;
     return OperandInfo(EMUL, Log2EEW);
   }
   // 11.3. Vector Integer Extension
@@ -743,7 +747,8 @@ static OperandInfo getOperandInfo(const MachineInstr &MI,
   case RISCV::VSBC_VVM:
   case RISCV::VSBC_VXM:
     return MO.getOperandNo() == 3
-               ? OperandInfo(getEMULEqualsEEWDivSEWTimesLMUL(0, MI), 0)
+               ? OperandInfo(RISCVVType::getEMULEqualsEEWDivSEWTimesLMUL(0, MI),
+                             0)
                : OperandInfo(MIVLMul, MILog2SEW);
   // Dest EEW=1 and EMUL=(EEW/SEW)*LMUL. Source EEW=SEW and EMUL=LMUL. Mask
   // operand EEW=1 and EMUL=(EEW/SEW)*LMUL
@@ -753,7 +758,8 @@ static OperandInfo getOperandInfo(const MachineInstr &MI,
   case RISCV::VMSBC_VVM:
   case RISCV::VMSBC_VXM:
     return IsMODef || MO.getOperandNo() == 3
-               ? OperandInfo(getEMULEqualsEEWDivSEWTimesLMUL(0, MI), 0)
+               ? OperandInfo(RISCVVType::getEMULEqualsEEWDivSEWTimesLMUL(0, MI),
+                             0)
                : OperandInfo(MIVLMul, MILog2SEW);
   // Dest EEW=1 and EMUL=(EEW/SEW)*LMUL. Source EEW=SEW and EMUL=LMUL.
   case RISCV::VMADC_VV:
@@ -761,7 +767,8 @@ static OperandInfo getOperandInfo(const MachineInstr &MI,
   case RISCV::VMADC_VX:
   case RISCV::VMSBC_VV:
   case RISCV::VMSBC_VX:
-    return IsMODef ? OperandInfo(getEMULEqualsEEWDivSEWTimesLMUL(0, MI), 0)
+    return IsMODef ? OperandInfo(
+                         RISCVVType::getEMULEqualsEEWDivSEWTimesLMUL(0, MI), 0)
                    : OperandInfo(MIVLMul, MILog2SEW);
 
   // 11.5. Vector Bitwise Logical Instructions
@@ -798,7 +805,8 @@ static OperandInfo getOperandInfo(const MachineInstr &MI,
   case RISCV::VNSRA_WX: {
     bool TwoTimes = IsOp1;
     unsigned Log2EEW = TwoTimes ? MILog2SEW + 1 : MILog2SEW;
-    RISCVII::VLMUL EMUL = TwoTimes ? twoTimesVLMUL(MIVLMul) : MIVLMul;
+    RISCVII::VLMUL EMUL =
+        TwoTimes ? RISCVVType::twoTimesVLMUL(MIVLMul) : MIVLMul;
     return OperandInfo(EMUL, Log2EEW);
   }
   // 11.8. Vector Integer Compare Instructions
@@ -824,7 +832,7 @@ static OperandInfo getOperandInfo(const MachineInstr &MI,
   case RISCV::VMSGT_VI:
   case RISCV::VMSGT_VX:
     if (IsMODef)
-      return OperandInfo(getEMULEqualsEEWDivSEWTimesLMUL(0, MI), 0);
+      return OperandInfo(RISCVVType::getEMULEqualsEEWDivSEWTimesLMUL(0, MI), 0);
     return OperandInfo(MIVLMul, MILog2SEW);
 
   // 11.9. Vector Integer Min/Max Instructions
@@ -872,7 +880,8 @@ static OperandInfo getOperandInfo(const MachineInstr &MI,
   case RISCV::VWMULU_VV:
   case RISCV::VWMULU_VX: {
     unsigned Log2EEW = IsMODef ? MILog2SEW + 1 : MILog2SEW;
-    RISCVII::VLMUL EMUL = IsMODef ? twoTimesVLMUL(MIVLMul) : MIVLMul;
+    RISCVII::VLMUL EMUL =
+        IsMODef ? RISCVVType::twoTimesVLMUL(MIVLMul) : MIVLMul;
     return OperandInfo(EMUL, Log2EEW);
   }
   // 11.13. Vector Single-Width Integer Multiply-Add Instructions
@@ -902,7 +911,8 @@ static OperandInfo getOperandInfo(const MachineInstr &MI,
     // due to SSA.
     bool TwoTimes = IsMODef || IsOp1;
     unsigned Log2EEW = TwoTimes ? MILog2SEW + 1 : MILog2SEW;
-    RISCVII::VLMUL EMUL = TwoTimes ? twoTimesVLMUL(MIVLMul) : MIVLMul;
+    RISCVII::VLMUL EMUL =
+        TwoTimes ? RISCVVType::twoTimesVLMUL(MIVLMul) : MIVLMul;
     return OperandInfo(EMUL, Log2EEW);
   }
   // 11.15. Vector Integer Merge Instructions
@@ -912,7 +922,7 @@ static OperandInfo getOperandInfo(const MachineInstr &MI,
   case RISCV::VMERGE_VVM:
   case RISCV::VMERGE_VXM:
     if (MO.getOperandNo() == 3)
-      return OperandInfo(getEMULEqualsEEWDivSEWTimesLMUL(0, MI), 0);
+      return OperandInfo(RISCVVType::getEMULEqualsEEWDivSEWTimesLMUL(0, MI), 0);
     return OperandInfo(MIVLMul, MILog2SEW);
 
   // 11.16. Vector Integer Move Instructions
@@ -948,7 +958,8 @@ static OperandInfo getOperandInfo(const MachineInstr &MI,
   case RISCV::VSMUL_VV:
   case RISCV::VSMUL_VX: {
     unsigned Log2EEW = IsMODef ? MILog2SEW + 1 : MILog2SEW;
-    RISCVII::VLMUL EMUL = IsMODef ? twoTimesVLMUL(MIVLMul) : MIVLMul;
+    RISCVII::VLMUL EMUL =
+        IsMODef ? RISCVVType::twoTimesVLMUL(MIVLMul) : MIVLMul;
     return OperandInfo(EMUL, Log2EEW);
   }
   // 12.4. Vector Single-Width Scaling Shift Instructions
@@ -971,7 +982,8 @@ static OperandInfo getOperandInfo(const MachineInstr &MI,
   case RISCV::VNCLIP_WX: {
     bool TwoTimes = !IsMODef && IsOp1;
     unsigned Log2EEW = TwoTimes ? MILog2SEW + 1 : MILog2SEW;
-    RISCVII::VLMUL EMUL = TwoTimes ? twoTimesVLMUL(MIVLMul) : MIVLMul;
+    RISCVII::VLMUL EMUL =
+        TwoTimes ? RISCVVType::twoTimesVLMUL(MIVLMul) : MIVLMul;
     return OperandInfo(EMUL, Log2EEW);
   }
   // 13. Vector Floating-Point Instructions
@@ -991,7 +1003,8 @@ static OperandInfo getOperandInfo(const MachineInstr &MI,
   case RISCV::VFWSUB_VV:
   case RISCV::VFWSUB_VF: {
     unsigned Log2EEW = IsMODef ? MILog2SEW + 1 : MILog2SEW;
-    RISCVII::VLMUL EMUL = IsMODef ? twoTimesVLMUL(MIVLMul) : MIVLMul;
+    RISCVII::VLMUL EMUL =
+        IsMODef ? RISCVVType::twoTimesVLMUL(MIVLMul) : MIVLMul;
     return OperandInfo(EMUL, Log2EEW);
   }
   // Dest and Op1 EEW=2*SEW and EMUL=2*LMUL. Op2 EEW=SEW and EMUL=LMUL.
@@ -1001,7 +1014,8 @@ static OperandInfo getOperandInfo(const MachineInstr &MI,
   case RISCV::VFWSUB_WV: {
     bool TwoTimes = IsMODef || IsOp1;
     unsigned Log2EEW = TwoTimes ? MILog2SEW + 1 : MILog2SEW;
-    RISCVII::VLMUL EMUL = TwoTimes ? twoTimesVLMUL(MIVLMul) : MIVLMul;
+    RISCVII::VLMUL EMUL =
+        TwoTimes ? RISCVVType::twoTimesVLMUL(MIVLMul) : MIVLMul;
     return OperandInfo(EMUL, Log2EEW);
   }
   // 13.4. Vector Single-Width Floating-Point Multiply/Divide Instructions
@@ -1055,7 +1069,8 @@ static OperandInfo getOperandInfo(const MachineInstr &MI,
     // due to SSA.
     bool TwoTimes = IsMODef || IsOp1;
     unsigned Log2EEW = TwoTimes ? MILog2SEW + 1 : MILog2SEW;
-    RISCVII::VLMUL EMUL = TwoTimes ? twoTimesVLMUL(MIVLMul) : MIVLMul;
+    RISCVII::VLMUL EMUL =
+        TwoTimes ? RISCVVType::twoTimesVLMUL(MIVLMul) : MIVLMul;
     return OperandInfo(EMUL, Log2EEW);
   }
   // 13.8. Vector Floating-Point Square-Root Instruction
@@ -1103,7 +1118,7 @@ static OperandInfo getOperandInfo(const MachineInstr &MI,
   case RISCV::VMFGT_VF:
   case RISCV::VMFGE_VF:
     if (IsMODef)
-      return OperandInfo(getEMULEqualsEEWDivSEWTimesLMUL(0, MI), 0);
+      return OperandInfo(RISCVVType::getEMULEqualsEEWDivSEWTimesLMUL(0, MI), 0);
     return OperandInfo(MIVLMul, MILog2SEW);
 
   // 13.15. Vector Floating-Point Merge Instruction
@@ -1111,7 +1126,7 @@ static OperandInfo getOperandInfo(const MachineInstr &MI,
   // (EEW/SEW)*LMUL.
   case RISCV::VFMERGE_VFM:
     if (IsOp3)
-      return OperandInfo(getEMULEqualsEEWDivSEWTimesLMUL(0, MI), 0);
+      return OperandInfo(RISCVVType::getEMULEqualsEEWDivSEWTimesLMUL(0, MI), 0);
     return OperandInfo(MIVLMul, MILog2SEW);
 
   // 13.18. Widening Floating-Point/Integer Type-Convert Instructions
@@ -1124,7 +1139,8 @@ static OperandInfo getOperandInfo(const MachineInstr &MI,
   case RISCV::VFWCVT_F_X_V:
   case RISCV::VFWCVT_F_F_V: {
     unsigned Log2EEW = IsMODef ? MILog2SEW + 1 : MILog2SEW;
-    RISCVII::VLMUL EMUL = IsMODef ? twoTimesVLMUL(MIVLMul) : MIVLMul;
+    RISCVII::VLMUL EMUL =
+        IsMODef ? RISCVVType::twoTimesVLMUL(MIVLMul) : MIVLMul;
     return OperandInfo(EMUL, Log2EEW);
   }
   // 13.19. Narrowing Floating-Point/Integer Type-Convert Instructions
@@ -1138,7 +1154,7 @@ static OperandInfo getOperandInfo(const MachineInstr &MI,
   case RISCV::VFNCVT_F_F_W:
   case RISCV::VFNCVT_ROD_F_F_W: {
     unsigned Log2EEW = IsMODef ? MILog2SEW - 1 : MILog2SEW;
-    RISCVII::VLMUL EMUL = IsMODef ? halfVLMUL(MIVLMul) : MIVLMul;
+    RISCVII::VLMUL EMUL = IsMODef ? RISCVVType::halfVLMUL(MIVLMul) : MIVLMul;
     return OperandInfo(EMUL, Log2EEW);
   }
   // 14. Vector Reduction Operations
@@ -1164,7 +1180,8 @@ static OperandInfo getOperandInfo(const MachineInstr &MI,
   case RISCV::VWREDSUM_VS:
   case RISCV::VWREDSUMU_VS: {
     unsigned Log2EEW = IsMODef ? MILog2SEW + 1 : MILog2SEW;
-    RISCVII::VLMUL EMUL = IsMODef ? twoTimesVLMUL(MIVLMul) : MIVLMul;
+    RISCVII::VLMUL EMUL =
+        IsMODef ? RISCVVType::twoTimesVLMUL(MIVLMul) : MIVLMul;
     return OperandInfo(EMUL, Log2EEW);
   }
   // 14.3. Vector Single-Width Floating-Point Reduction Instructions
@@ -1180,7 +1197,8 @@ static OperandInfo getOperandInfo(const MachineInstr &MI,
   case RISCV::VFWREDOSUM_VS:
   case RISCV::VFWREDUSUM_VS: {
     unsigned Log2EEW = IsMODef ? MILog2SEW + 1 : MILog2SEW;
-    RISCVII::VLMUL EMUL = IsMODef ? twoTimesVLMUL(MIVLMul) : MIVLMul;
+    RISCVII::VLMUL EMUL =
+        IsMODef ? RISCVVType::twoTimesVLMUL(MIVLMul) : MIVLMul;
     return OperandInfo(EMUL, Log2EEW);
   }
 
@@ -1203,7 +1221,7 @@ static OperandInfo getOperandInfo(const MachineInstr &MI,
   case RISCV::VMSBF_M:
   case RISCV::VMSIF_M:
   case RISCV::VMSOF_M: {
-    return OperandInfo(getEMULEqualsEEWDivSEWTimesLMUL(0, MI), 0);
+    return OperandInfo(RISCVVType::getEMULEqualsEEWDivSEWTimesLMUL(0, MI), 0);
   }
   // 15.8. Vector Iota Instruction
   // Dest and Op1 EEW=SEW and EMUL=LMUL. Op2 EEW=1 and EMUL(EEW/SEW)*LMUL.
@@ -1212,7 +1230,8 @@ static OperandInfo getOperandInfo(const MachineInstr &MI,
     unsigned Log2EEW = IsDefOrOp1 ? 0 : MILog2SEW;
     if (IsDefOrOp1)
       return OperandInfo(MIVLMul, Log2EEW);
-    return OperandInfo(getEMULEqualsEEWDivSEWTimesLMUL(MILog2SEW, MI), Log2EEW);
+    return OperandInfo(
+        RISCVVType::getEMULEqualsEEWDivSEWTimesLMUL(MILog2SEW, MI), Log2EEW);
   }
   // 15.9. Vector Element Index Instruction
   // Dest EEW=SEW EMUL=LMUL. Mask Operand EEW=1 and EMUL(EEW/SEW)*LMUL.
@@ -1220,7 +1239,8 @@ static OperandInfo getOperandInfo(const MachineInstr &MI,
     unsigned Log2EEW = IsMODef ? MILog2SEW : 0;
     if (IsMODef)
       return OperandInfo(MIVLMul, Log2EEW);
-    return OperandInfo(getEMULEqualsEEWDivSEWTimesLMUL(Log2EEW, MI), Log2EEW);
+    return OperandInfo(RISCVVType::getEMULEqualsEEWDivSEWTimesLMUL(Log2EEW, MI),
+                       Log2EEW);
   }
   // 16. Vector Permutation Instructions
   // 16.1. Integer Scalar Move Instructions
@@ -1255,7 +1275,7 @@ static OperandInfo getOperandInfo(const MachineInstr &MI,
   case RISCV::VRGATHEREI16_VV: {
     if (IsMODef || IsOp2)
       return OperandInfo(MIVLMul, MILog2SEW);
-    return OperandInfo(getEMULEqualsEEWDivSEWTimesLMUL(4, MI), 4);
+    return OperandInfo(RISCVVType::getEMULEqualsEEWDivSEWTimesLMUL(4, MI), 4);
   }
   // 16.5. Vector Compress Instruction
   // EMUL=LMUL. EEW=SEW.
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll
index baecb7bb7d2483..1531b9a221f98a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll
@@ -4349,10 +4349,9 @@ define <vscale x 1 x half> @vfnmadd_vf_nxv1f16_neg_splat(<vscale x 1 x half> %va
 ; ZVFHMIN-LABEL: vfnmadd_vf_nxv1f16_neg_splat:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    lui a1, 8
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v10, v10, a1, v0.t
 ; ZVFHMIN-NEXT:    vxor.vx v9, v9, a1, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
@@ -4382,10 +4381,9 @@ define <vscale x 1 x half> @vfnmadd_vf_nxv1f16_neg_splat_commute(<vscale x 1 x h
 ; ZVFHMIN-LABEL: vfnmadd_vf_nxv1f16_neg_splat_commute:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    lui a1, 8
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v10, v10, a1, v0.t
 ; ZVFHMIN-NEXT:    vxor.vx v9, v9, a1, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
@@ -4415,10 +4413,9 @@ define <vscale x 1 x half> @vfnmadd_vf_nxv1f16_neg_splat_unmasked(<vscale x 1 x
 ; ZVFHMIN-LABEL: vfnmadd_vf_nxv1f16_neg_splat_unmasked:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    lui a1, 8
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v10, v10, a1
 ; ZVFHMIN-NEXT:    vxor.vx v9, v9, a1
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
@@ -4448,10 +4445,9 @@ define <vscale x 1 x half> @vfnmadd_vf_nxv1f16_neg_splat_unmasked_commute(<vscal
 ; ZVFHMIN-LABEL: vfnmadd_vf_nxv1f16_neg_splat_unmasked_commute:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    lui a1, 8
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v10, v10, a1
 ; ZVFHMIN-NEXT:    vxor.vx v9, v9, a1
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
@@ -4718,10 +4714,9 @@ define <vscale x 1 x half> @vfnmsub_vf_nxv1f16_neg_splat(<vscale x 1 x half> %va
 ; ZVFHMIN-LABEL: vfnmsub_vf_nxv1f16_neg_splat:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    lui a1, 8
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v10, v10, a1, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v10
@@ -4749,10 +4744,9 @@ define <vscale x 1 x half> @vfnmsub_vf_nxv1f16_neg_splat_commute(<vscale x 1 x h
 ; ZVFHMIN-LABEL: vfnmsub_vf_nxv1f16_neg_splat_commute:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    lui a1, 8
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v10, v10, a1, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v10
@@ -4780,10 +4774,9 @@ define <vscale x 1 x half> @vfnmsub_vf_nxv1f16_neg_splat_unmasked(<vscale x 1 x
 ; ZVFHMIN-LABEL: vfnmsub_vf_nxv1f16_neg_splat_unmasked:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    lui a1, 8
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v10, v10, a1
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v10
@@ -4811,10 +4804,9 @@ define <vscale x 1 x half> @vfnmsub_vf_nxv1f16_neg_splat_unmasked_commute(<vscal
 ; ZVFHMIN-LABEL: vfnmsub_vf_nxv1f16_neg_splat_unmasked_commute:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    lui a1, 8
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v10, v10, a1
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v10
@@ -5266,10 +5258,9 @@ define <vscale x 2 x half> @vfnmadd_vf_nxv2f16_neg_splat(<vscale x 2 x half> %va
 ; ZVFHMIN-LABEL: vfnmadd_vf_nxv2f16_neg_splat:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    lui a1, 8
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v10, v10, a1, v0.t
 ; ZVFHMIN-NEXT:    vxor.vx v9, v9, a1, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
@@ -5299,10 +5290,9 @@ define <vscale x 2 x half> @vfnmadd_vf_nxv2f16_neg_splat_commute(<vscale x 2 x h
 ; ZVFHMIN-LABEL: vfnmadd_vf_nxv2f16_neg_splat_commute:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    lui a1, 8
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v10, v10, a1, v0.t
 ; ZVFHMIN-NEXT:    vxor.vx v9, v9, a1, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
@@ -5332,10 +5322,9 @@ define <vscale x 2 x half> @vfnmadd_vf_nxv2f16_neg_splat_unmasked(<vscale x 2 x
 ; ZVFHMIN-LABEL: vfnmadd_vf_nxv2f16_neg_splat_unmasked:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    lui a1, 8
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v10, v10, a1
 ; ZVFHMIN-NEXT:    vxor.vx v9, v9, a1
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
@@ -5365,10 +5354,9 @@ define <vscale x 2 x half> @vfnmadd_vf_nxv2f16_neg_splat_unmasked_commute(<vscal
 ; ZVFHMIN-LABEL: vfnmadd_vf_nxv2f16_neg_splat_unmasked_commute:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    lui a1, 8
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v10, v10, a1
 ; ZVFHMIN-NEXT:    vxor.vx v9, v9, a1
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
@@ -5635,10 +5623,9 @@ define <vscale x 2 x half> @vfnmsub_vf_nxv2f16_neg_splat(<vscale x 2 x half> %va
 ; ZVFHMIN-LABEL: vfnmsub_vf_nxv2f16_neg_splat:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    lui a1, 8
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v10, v10, a1, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v10
@@ -5666,10 +5653,9 @@ define <vscale x 2 x half> @vfnmsub_vf_nxv2f16_neg_splat_commute(<vscale x 2 x h
 ; ZVFHMIN-LABEL: vfnmsub_vf_nxv2f16_neg_splat_commute:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    lui a1, 8
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v10, v10, a1, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v10
@@ -5697,10 +5683,9 @@ define <vscale x 2 x half> @vfnmsub_vf_nxv2f16_neg_splat_unmasked(<vscale x 2 x
 ; ZVFHMIN-LABEL: vfnmsub_vf_nxv2f16_neg_splat_unmasked:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    lui a1, 8
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v10, v10, a1
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v10
@@ -5728,10 +5713,9 @@ define <vscale x 2 x half> @vfnmsub_vf_nxv2f16_neg_splat_unmasked_commute(<vscal
 ; ZVFHMIN-LABEL: vfnmsub_vf_nxv2f16_neg_splat_unmasked_commute:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    lui a1, 8
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v10, v10, a1
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v10
@@ -6183,10 +6167,9 @@ define <vscale x 4 x half> @vfnmadd_vf_nxv4f16_neg_splat(<vscale x 4 x half> %va
 ; ZVFHMIN-LABEL: vfnmadd_vf_nxv4f16_neg_splat:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    lui a1, 8
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v10, v10, a1, v0.t
 ; ZVFHMIN-NEXT:    vxor.vx v9, v9, a1, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
@@ -6216,10 +6199,9 @@ define <vscale x 4 x half> @vfnmadd_vf_nxv4f16_neg_splat_commute(<vscale x 4 x h
 ; ZVFHMIN-LABEL: vfnmadd_vf_nxv4f16_neg_splat_commute:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    lui a1, 8
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v10, v10, a1, v0.t
 ; ZVFHMIN-NEXT:    vxor.vx v9, v9, a1, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
@@ -6249,10 +6231,9 @@ define <vscale x 4 x half> @vfnmadd_vf_nxv4f16_neg_splat_unmasked(<vscale x 4 x
 ; ZVFHMIN-LABEL: vfnmadd_vf_nxv4f16_neg_splat_unmasked:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    lui a1, 8
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v10, v10, a1
 ; ZVFHMIN-NEXT:    vxor.vx v9, v9, a1
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
@@ -6282,10 +6263,9 @@ define <vscale x 4 x half> @vfnmadd_vf_nxv4f16_neg_splat_unmasked_commute(<vscal
 ; ZVFHMIN-LABEL: vfnmadd_vf_nxv4f16_neg_splat_unmasked_commute:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    lui a1, 8
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v10, v10, a1
 ; ZVFHMIN-NEXT:    vxor.vx v9, v9, a1
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
@@ -6552,10 +6532,9 @@ define <vscale x 4 x half> @vfnmsub_vf_nxv4f16_neg_splat(<vscale x 4 x half> %va
 ; ZVFHMIN-LABEL: vfnmsub_vf_nxv4f16_neg_splat:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    lui a1, 8
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v10, v10, a1, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
@@ -6583,10 +6562,9 @@ define <vscale x 4 x half> @vfnmsub_vf_nxv4f16_neg_splat_commute(<vscale x 4 x h
 ; ZVFHMIN-LABEL: vfnmsub_vf_nxv4f16_neg_splat_commute:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    lui a1, 8
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v10, v10, a1, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
@@ -6614,10 +6592,9 @@ define <vscale x 4 x half> @vfnmsub_vf_nxv4f16_neg_splat_unmasked(<vscale x 4 x
 ; ZVFHMIN-LABEL: vfnmsub_vf_nxv4f16_neg_splat_unmasked:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    lui a1, 8
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v10, v10, a1
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
@@ -6645,10 +6622,9 @@ define <vscale x 4 x half> @vfnmsub_vf_nxv4f16_neg_splat_unmasked_commute(<vscal
 ; ZVFHMIN-LABEL: vfnmsub_vf_nxv4f16_neg_splat_unmasked_commute:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    lui a1, 8
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v10, v10, a1
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
@@ -7100,10 +7076,9 @@ define <vscale x 8 x half> @vfnmadd_vf_nxv8f16_neg_splat(<vscale x 8 x half> %va
 ; ZVFHMIN-LABEL: vfnmadd_vf_nxv8f16_neg_splat:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v12, a1
 ; ZVFHMIN-NEXT:    lui a1, 8
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v12, v12, a1, v0.t
 ; ZVFHMIN-NEXT:    vxor.vx v10, v10, a1, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
@@ -7133,10 +7108,9 @@ define <vscale x 8 x half> @vfnmadd_vf_nxv8f16_neg_splat_commute(<vscale x 8 x h
 ; ZVFHMIN-LABEL: vfnmadd_vf_nxv8f16_neg_splat_commute:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v12, a1
 ; ZVFHMIN-NEXT:    lui a1, 8
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v12, v12, a1, v0.t
 ; ZVFHMIN-NEXT:    vxor.vx v10, v10, a1, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
@@ -7166,10 +7140,9 @@ define <vscale x 8 x half> @vfnmadd_vf_nxv8f16_neg_splat_unmasked(<vscale x 8 x
 ; ZVFHMIN-LABEL: vfnmadd_vf_nxv8f16_neg_splat_unmasked:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v12, a1
 ; ZVFHMIN-NEXT:    lui a1, 8
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v12, v12, a1
 ; ZVFHMIN-NEXT:    vxor.vx v10, v10, a1
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
@@ -7199,10 +7172,9 @@ define <vscale x 8 x half> @vfnmadd_vf_nxv8f16_neg_splat_unmasked_commute(<vscal
 ; ZVFHMIN-LABEL: vfnmadd_vf_nxv8f16_neg_splat_unmasked_commute:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v12, a1
 ; ZVFHMIN-NEXT:    lui a1, 8
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v12, v12, a1
 ; ZVFHMIN-NEXT:    vxor.vx v10, v10, a1
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
@@ -7469,10 +7441,9 @@ define <vscale x 8 x half> @vfnmsub_vf_nxv8f16_neg_splat(<vscale x 8 x half> %va
 ; ZVFHMIN-LABEL: vfnmsub_vf_nxv8f16_neg_splat:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v12, a1
 ; ZVFHMIN-NEXT:    lui a1, 8
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v12, v12, a1, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
@@ -7500,10 +7471,9 @@ define <vscale x 8 x half> @vfnmsub_vf_nxv8f16_neg_splat_commute(<vscale x 8 x h
 ; ZVFHMIN-LABEL: vfnmsub_vf_nxv8f16_neg_splat_commute:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v12, a1
 ; ZVFHMIN-NEXT:    lui a1, 8
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v12, v12, a1, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
@@ -7531,10 +7501,9 @@ define <vscale x 8 x half> @vfnmsub_vf_nxv8f16_neg_splat_unmasked(<vscale x 8 x
 ; ZVFHMIN-LABEL: vfnmsub_vf_nxv8f16_neg_splat_unmasked:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v12, a1
 ; ZVFHMIN-NEXT:    lui a1, 8
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v12, v12, a1
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
@@ -7562,10 +7531,9 @@ define <vscale x 8 x half> @vfnmsub_vf_nxv8f16_neg_splat_unmasked_commute(<vscal
 ; ZVFHMIN-LABEL: vfnmsub_vf_nxv8f16_neg_splat_unmasked_commute:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v12, a1
 ; ZVFHMIN-NEXT:    lui a1, 8
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v12, v12, a1
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
@@ -8111,10 +8079,9 @@ define <vscale x 16 x half> @vfnmadd_vf_nxv16f16_neg_splat(<vscale x 16 x half>
 ; ZVFHMIN-NEXT:    addi a1, sp, 16
 ; ZVFHMIN-NEXT:    vs4r.v v8, (a1) # Unknown-size Folded Spill
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v16, a1
 ; ZVFHMIN-NEXT:    lui a1, 8
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v4, v16, a1, v0.t
 ; ZVFHMIN-NEXT:    vxor.vx v12, v12, a1, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
@@ -8151,10 +8118,9 @@ define <vscale x 16 x half> @vfnmadd_vf_nxv16f16_neg_splat_commute(<vscale x 16
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vmv4r.v v4, v8
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v16, a1
 ; ZVFHMIN-NEXT:    lui a1, 8
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v16, v16, a1, v0.t
 ; ZVFHMIN-NEXT:    vxor.vx v12, v12, a1, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
@@ -8184,10 +8150,9 @@ define <vscale x 16 x half> @vfnmadd_vf_nxv16f16_neg_splat_unmasked(<vscale x 16
 ; ZVFHMIN-LABEL: vfnmadd_vf_nxv16f16_neg_splat_unmasked:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v16, a1
 ; ZVFHMIN-NEXT:    lui a1, 8
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v16, v16, a1
 ; ZVFHMIN-NEXT:    vxor.vx v12, v12, a1
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
@@ -8217,10 +8182,9 @@ define <vscale x 16 x half> @vfnmadd_vf_nxv16f16_neg_splat_unmasked_commute(<vsc
 ; ZVFHMIN-LABEL: vfnmadd_vf_nxv16f16_neg_splat_unmasked_commute:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v16, a1
 ; ZVFHMIN-NEXT:    lui a1, 8
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v16, v16, a1
 ; ZVFHMIN-NEXT:    vxor.vx v12, v12, a1
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
@@ -8531,10 +8495,9 @@ define <vscale x 16 x half> @vfnmsub_vf_nxv16f16_neg_splat(<vscale x 16 x half>
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vmv4r.v v4, v8
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v16, a1
 ; ZVFHMIN-NEXT:    lui a1, 8
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v16, v16, a1, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v16
@@ -8564,10 +8527,9 @@ define <vscale x 16 x half> @vfnmsub_vf_nxv16f16_neg_splat_commute(<vscale x 16
 ; ZVFHMIN-NEXT:    vmv4r.v v20, v12
 ; ZVFHMIN-NEXT:    vmv4r.v v4, v8
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v16, a1
 ; ZVFHMIN-NEXT:    lui a1, 8
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v16, v16, a1, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v16
@@ -8595,10 +8557,9 @@ define <vscale x 16 x half> @vfnmsub_vf_nxv16f16_neg_splat_unmasked(<vscale x 16
 ; ZVFHMIN-LABEL: vfnmsub_vf_nxv16f16_neg_splat_unmasked:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v16, a1
 ; ZVFHMIN-NEXT:    lui a1, 8
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v16, v16, a1
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v16
@@ -8626,10 +8587,9 @@ define <vscale x 16 x half> @vfnmsub_vf_nxv16f16_neg_splat_unmasked_commute(<vsc
 ; ZVFHMIN-LABEL: vfnmsub_vf_nxv16f16_neg_splat_unmasked_commute:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v16, a1
 ; ZVFHMIN-NEXT:    lui a1, 8
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v16, v16, a1
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v16
@@ -10532,10 +10492,9 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16_neg_splat(<vscale x 32 x half>
 ; ZVFHMIN-NEXT:    addi a1, a1, 16
 ; ZVFHMIN-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m8, ta, ma
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v24, a1
 ; ZVFHMIN-NEXT:    lui a1, 8
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v8, v24, a1, v0.t
 ; ZVFHMIN-NEXT:    vxor.vx v16, v16, a1, v0.t
 ; ZVFHMIN-NEXT:    csrr a1, vlenb
@@ -10660,10 +10619,9 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16_neg_splat_commute(<vscale x 32
 ; ZVFHMIN-NEXT:    addi a1, a1, 16
 ; ZVFHMIN-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m8, ta, ma
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v24, a1
 ; ZVFHMIN-NEXT:    lui a1, 8
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v8, v24, a1, v0.t
 ; ZVFHMIN-NEXT:    vxor.vx v16, v16, a1, v0.t
 ; ZVFHMIN-NEXT:    csrr a1, vlenb
@@ -10794,10 +10752,9 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16_neg_splat_unmasked(<vscale x 32
 ; ZVFHMIN-NEXT:    addi a1, a1, 16
 ; ZVFHMIN-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m8, ta, ma
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v24, a1
 ; ZVFHMIN-NEXT:    lui a1, 8
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v24, v24, a1
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
 ; ZVFHMIN-NEXT:    slli a2, a2, 3
@@ -10920,10 +10877,9 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16_neg_splat_unmasked_commute(<vsc
 ; ZVFHMIN-NEXT:    addi a1, a1, 16
 ; ZVFHMIN-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m8, ta, ma
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v24, a1
 ; ZVFHMIN-NEXT:    lui a1, 8
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v8, v24, a1
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
 ; ZVFHMIN-NEXT:    slli a2, a2, 3
@@ -12069,7 +12025,7 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_neg_splat(<vscale x 32 x half>
 ; ZVFHMIN-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
 ; ZVFHMIN-NEXT:    vmv8r.v v16, v8
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m8, ta, ma
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v8, a1
 ; ZVFHMIN-NEXT:    lui a1, 8
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
@@ -12077,7 +12033,6 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_neg_splat(<vscale x 32 x half>
 ; ZVFHMIN-NEXT:    add a2, sp, a2
 ; ZVFHMIN-NEXT:    addi a2, a2, 16
 ; ZVFHMIN-NEXT:    vs1r.v v0, (a2) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v8, v8, a1, v0.t
 ; ZVFHMIN-NEXT:    csrr a1, vlenb
 ; ZVFHMIN-NEXT:    slli a2, a1, 1
@@ -12199,12 +12154,11 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_neg_splat_commute(<vscale x 32
 ; ZVFHMIN-NEXT:    addi a1, a1, 16
 ; ZVFHMIN-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m8, ta, ma
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v24, a1
 ; ZVFHMIN-NEXT:    lui a1, 8
 ; ZVFHMIN-NEXT:    addi a2, sp, 16
 ; ZVFHMIN-NEXT:    vs1r.v v0, (a2) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v16, v24, a1, v0.t
 ; ZVFHMIN-NEXT:    csrr a1, vlenb
 ; ZVFHMIN-NEXT:    slli a2, a1, 1
@@ -12350,10 +12304,9 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_neg_splat_unmasked(<vscale x 32
 ; ZVFHMIN-NEXT:    addi a1, a1, 16
 ; ZVFHMIN-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m8, ta, ma
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v8, a1
 ; ZVFHMIN-NEXT:    lui a1, 8
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v8, v8, a1
 ; ZVFHMIN-NEXT:    csrr a1, vlenb
 ; ZVFHMIN-NEXT:    slli a1, a1, 3
@@ -12472,10 +12425,9 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_neg_splat_unmasked_commute(<vsc
 ; ZVFHMIN-NEXT:    addi a1, a1, 16
 ; ZVFHMIN-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m8, ta, ma
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v24, a1
 ; ZVFHMIN-NEXT:    lui a1, 8
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v8, v24, a1
 ; ZVFHMIN-NEXT:    addi a1, sp, 16
 ; ZVFHMIN-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill

>From 2c32b3e69f1d56e266ab102a07ef6b9e5ab985bd Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Tue, 17 Sep 2024 12:33:20 -0700
Subject: [PATCH 04/17] fixup! respond to some more comments

---
 llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
index 36910f818ea249..6dc6d1c585efbe 100644
--- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
@@ -1366,29 +1366,31 @@ static bool isVectorOpUsedAsScalarOp(MachineOperand &MO) {
   }
 }
 
-static bool safeToPropgateVL(const MachineInstr &MI) {
+/// Return true if MI may read elements past VL.
+static bool mayReadPastVL(const MachineInstr &MI) {
   const RISCVVPseudosTable::PseudoInfo *RVV =
       RISCVVPseudosTable::getPseudoInfo(MI.getOpcode());
   if (!RVV)
-    return false;
+    return true;
 
   switch (RVV->BaseInstr) {
-  // vslidedown instructions may use the higher part of the input operand beyond
-  // the VL.
+  // vslidedown instructions may read elements past VL. They are handled
+  // according to current tail policy.
   case RISCV::VSLIDEDOWN_VI:
   case RISCV::VSLIDEDOWN_VX:
   case RISCV::VSLIDE1DOWN_VX:
   case RISCV::VFSLIDE1DOWN_VF:
 
-  // vrgather instructions may index beyond the VL.
+  // vrgather instructions may read the source vector at any index < VLMAX,
+  // regardless of VL.
   case RISCV::VRGATHER_VI:
   case RISCV::VRGATHER_VV:
   case RISCV::VRGATHER_VX:
   case RISCV::VRGATHEREI16_VV:
-    return false;
+    return true;
 
   default:
-    return true;
+    return false;
   }
 }
 
@@ -1443,6 +1445,7 @@ bool RISCVVLOptimizer::tryReduceVL(MachineInstr &OrigMI) {
   while (!Worklist.empty()) {
     MachineInstr &MI = *Worklist.pop_back_val();
     LLVM_DEBUG(dbgs() << "Try reduce VL for " << MI << "\n");
+
     std::optional<Register> CommonVL;
     bool CanReduceVL = true;
     for (auto &UserOp : MRI->use_operands(MI.getOperand(0).getReg())) {
@@ -1461,7 +1464,7 @@ bool RISCVVLOptimizer::tryReduceVL(MachineInstr &OrigMI) {
         continue;
       }
 
-      if (!safeToPropgateVL(UserMI)) {
+      if (mayReadPastVL(UserMI)) {
         LLVM_DEBUG(dbgs() << "    Abort due to used by unsafe instruction\n");
         CanReduceVL = false;
         break;

>From ebc78ebf288e443becbc2156d0b11964a19ea3d3 Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Wed, 18 Sep 2024 11:00:04 -0700
Subject: [PATCH 05/17] fixup! fix getOperandInfo for reductions

---
 llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp | 42 ++++++++++------------
 1 file changed, 19 insertions(+), 23 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
index 6dc6d1c585efbe..1262cac1365c9a 100644
--- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
@@ -1159,11 +1159,12 @@ static OperandInfo getOperandInfo(const MachineInstr &MI,
   }
   // 14. Vector Reduction Operations
   // 14.1. Vector Single-Width Integer Reduction Instructions
-  // We need to return Unknown since only element 0 of reduction is valid but it
-  // was generated by reducing over all of the input elements. There are 3
-  // vector sources for reductions. One for scalar, one for tail value, and one
-  // for the elements to reduce over. Only the one with the elements to reduce
-  // over obeys VL. The other two only read element 0 from the register.
+  // The destination and first source operand are vector registers, but only
+  // element 0 has any meaning. The vector registers are being used as a scalar
+  // register. They have a width (SEW), but the concept of LMUL or EMUL does not
+  // apply here. We return Unknown since OperandInfo does not model this yet.
+  // The next vector operand is used for tail value, which does not obey VL. The
+  // last vector operand is the the elements to reduce over, which does obey VL.
   case RISCV::VREDAND_VS:
   case RISCV::VREDMAX_VS:
   case RISCV::VREDMAXU_VS:
@@ -1172,34 +1173,29 @@ static OperandInfo getOperandInfo(const MachineInstr &MI,
   case RISCV::VREDOR_VS:
   case RISCV::VREDSUM_VS:
   case RISCV::VREDXOR_VS:
-    return OperandInfo(OperandInfo::State::Unknown);
-
   // 14.2. Vector Widening Integer Reduction Instructions
-  // Dest EEW=2*SEW and EMUL=2*LMUL. Source EEW=SEW EMUL=LMUL. Source is zero
-  // extended to 2*SEW in order to generate 2*SEW Dest.
+  // Again, the destination and first source operand are vector registers, but
+  // only element 0 has any meaning. The vector registers are being used as a
+  // scalar register. They have a width (2 * SEW), but the concept of LMUL or
+  // EMUL does not apply here. We return Unknown since OperandInfo does not
+  // model this yet. The next vector operand is used for tail value, which does
+  // not obey VL. The last vector operand is the the elements to reduce over,
+  // which does obey VL.
   case RISCV::VWREDSUM_VS:
-  case RISCV::VWREDSUMU_VS: {
-    unsigned Log2EEW = IsMODef ? MILog2SEW + 1 : MILog2SEW;
-    RISCVII::VLMUL EMUL =
-        IsMODef ? RISCVVType::twoTimesVLMUL(MIVLMul) : MIVLMul;
-    return OperandInfo(EMUL, Log2EEW);
-  }
+  case RISCV::VWREDSUMU_VS:
   // 14.3. Vector Single-Width Floating-Point Reduction Instructions
-  // EMUL=LMUL. EEW=SEW.
+  // Behaves similar to 14.1
   case RISCV::VFREDMAX_VS:
   case RISCV::VFREDMIN_VS:
   case RISCV::VFREDOSUM_VS:
   case RISCV::VFREDUSUM_VS:
-    return OperandInfo(MIVLMul, MILog2SEW);
-
   // 14.4. Vector Widening Floating-Point Reduction Instructions
-  // Source EEW=SEW and EMUL=LMUL. Dest EEW=2*SEW and EMUL=2*LMUL.
+  // Behaves similar to 14.2
   case RISCV::VFWREDOSUM_VS:
   case RISCV::VFWREDUSUM_VS: {
-    unsigned Log2EEW = IsMODef ? MILog2SEW + 1 : MILog2SEW;
-    RISCVII::VLMUL EMUL =
-        IsMODef ? RISCVVType::twoTimesVLMUL(MIVLMul) : MIVLMul;
-    return OperandInfo(EMUL, Log2EEW);
+    if (IsOp2)
+      return OperandInfo(MIVLMul, MILog2SEW);
+    return OperandInfo(OperandInfo::State::Unknown);
   }
 
   // 15. Vector Mask Instructions

>From f21571e715ec824a7ddea71868ca8ab6692a9454 Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Wed, 18 Sep 2024 11:59:51 -0700
Subject: [PATCH 06/17] fixup! model scalar OperandInfo

---
 llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp | 73 ++++++++++++++++------
 1 file changed, 55 insertions(+), 18 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
index 1262cac1365c9a..b07bdff348c200 100644
--- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
@@ -86,16 +86,22 @@ struct OperandInfo {
   // Represent as 1,2,4,8, ... and fractional indicator. This is because
   // EMUL can take on values that don't map to RISCVII::VLMUL values exactly.
   // For example, a mask operand can have an EMUL less than MF8.
-  std::pair<unsigned, bool> EMUL;
+  std::optional<std::pair<unsigned, bool>> EMUL;
 
   unsigned Log2EEW;
 
+  // true if this is a vector register that only uses element 0 of the register.
+  bool IsScalar;
+
   OperandInfo(RISCVII::VLMUL EMUL, unsigned Log2EEW)
-      : S(State::Known), EMUL(RISCVVType::decodeVLMUL(EMUL)), Log2EEW(Log2EEW) {
-  }
+      : S(State::Known), EMUL(RISCVVType::decodeVLMUL(EMUL)), Log2EEW(Log2EEW),
+        IsScalar(false) {}
 
   OperandInfo(std::pair<unsigned, bool> EMUL, unsigned Log2EEW)
-      : S(State::Known), EMUL(EMUL), Log2EEW(Log2EEW) {}
+      : S(State::Known), EMUL(EMUL), Log2EEW(Log2EEW), IsScalar(false) {}
+
+  OperandInfo(unsigned Log2EEW)
+      : S(State::Known), EMUL(std::nullopt), Log2EEW(Log2EEW), IsScalar(true) {}
 
   OperandInfo(State S) : S(S) {
     assert(S != State::Known &&
@@ -108,8 +114,14 @@ struct OperandInfo {
 
   static bool EMULAndEEWAreEqual(const OperandInfo &A, const OperandInfo &B) {
     assert(A.isKnown() && B.isKnown() && "Both operands must be known");
-    return A.Log2EEW == B.Log2EEW && A.EMUL.first == B.EMUL.first &&
-           A.EMUL.second == B.EMUL.second;
+
+    if (A.IsScalar != B.IsScalar)
+      return false;
+    if (A.IsScalar && B.IsScalar)
+      return A.Log2EEW == B.Log2EEW;
+
+    return A.Log2EEW == B.Log2EEW && A.EMUL->first == B.EMUL->first &&
+           A.EMUL->second == B.EMUL->second;
   }
 
   void print(raw_ostream &OS) const {
@@ -117,10 +129,15 @@ struct OperandInfo {
       OS << "Unknown";
       return;
     }
+    if (IsScalar) {
+      OS << "EEW:" << (1 << Log2EEW);
+      return;
+    }
+    assert(EMUL && "Expected EMUL to have value");
     OS << "EMUL: ";
-    if (EMUL.second)
+    if (EMUL->second)
       OS << "m";
-    OS << "f" << EMUL.first;
+    OS << "f" << EMUL->first;
     OS << ", EEW: " << (1 << Log2EEW);
   }
 };
@@ -1162,9 +1179,9 @@ static OperandInfo getOperandInfo(const MachineInstr &MI,
   // The destination and first source operand are vector registers, but only
   // element 0 has any meaning. The vector registers are being used as a scalar
   // register. They have a width (SEW), but the concept of LMUL or EMUL does not
-  // apply here. We return Unknown since OperandInfo does not model this yet.
-  // The next vector operand is used for tail value, which does not obey VL. The
-  // last vector operand is the the elements to reduce over, which does obey VL.
+  // apply here. The next vector operand is used for tail value, which does not
+  // obey VL, we must return Unknown. The last vector operand is the the
+  // elements to reduce over, which does obey VL and has EEW=SEW and EMUL=LMUL.
   case RISCV::VREDAND_VS:
   case RISCV::VREDMAX_VS:
   case RISCV::VREDMAXU_VS:
@@ -1172,27 +1189,47 @@ static OperandInfo getOperandInfo(const MachineInstr &MI,
   case RISCV::VREDMINU_VS:
   case RISCV::VREDOR_VS:
   case RISCV::VREDSUM_VS:
-  case RISCV::VREDXOR_VS:
+  case RISCV::VREDXOR_VS: {
+    if (IsMODef || IsOp1)
+      return OperandInfo(MILog2SEW);
+    if (IsOp2)
+      return OperandInfo(MIVLMul, MILog2SEW);
+    return OperandInfo(OperandInfo::State::Unknown);
+  }
   // 14.2. Vector Widening Integer Reduction Instructions
   // Again, the destination and first source operand are vector registers, but
   // only element 0 has any meaning. The vector registers are being used as a
   // scalar register. They have a width (2 * SEW), but the concept of LMUL or
-  // EMUL does not apply here. We return Unknown since OperandInfo does not
-  // model this yet. The next vector operand is used for tail value, which does
-  // not obey VL. The last vector operand is the the elements to reduce over,
-  // which does obey VL.
+  // EMUL does not apply here. The next vector operand is used for tail value,
+  // which does not obey VL, we must return Unknown. The last vector operand is
+  // the the elements to reduce over, which does obey VL and has EEW=SEW and
+  // EMUL=LMUL.
   case RISCV::VWREDSUM_VS:
-  case RISCV::VWREDSUMU_VS:
+  case RISCV::VWREDSUMU_VS: {
+    if (IsMODef || IsOp1)
+      return OperandInfo(MILog2SEW + 1);
+    if (IsOp2)
+      return OperandInfo(MIVLMul, MILog2SEW);
+    return OperandInfo(OperandInfo::State::Unknown);
+  }
   // 14.3. Vector Single-Width Floating-Point Reduction Instructions
   // Behaves similar to 14.1
   case RISCV::VFREDMAX_VS:
   case RISCV::VFREDMIN_VS:
   case RISCV::VFREDOSUM_VS:
-  case RISCV::VFREDUSUM_VS:
+  case RISCV::VFREDUSUM_VS: {
+    if (IsMODef || IsOp1)
+      return OperandInfo(MILog2SEW);
+    if (IsOp2)
+      return OperandInfo(MIVLMul, MILog2SEW);
+    return OperandInfo(OperandInfo::State::Unknown);
+  }
   // 14.4. Vector Widening Floating-Point Reduction Instructions
   // Behaves similar to 14.2
   case RISCV::VFWREDOSUM_VS:
   case RISCV::VFWREDUSUM_VS: {
+    if (IsMODef || IsOp1)
+      return OperandInfo(MILog2SEW + 1);
     if (IsOp2)
       return OperandInfo(MIVLMul, MILog2SEW);
     return OperandInfo(OperandInfo::State::Unknown);

>From 397adf51308e5fe68460702d66e77a268b35766e Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Wed, 18 Sep 2024 12:38:20 -0700
Subject: [PATCH 07/17] fixup! fix some comments, add some fixmes, and update
 check

---
 llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp | 198 +++++++++++----------
 1 file changed, 104 insertions(+), 94 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
index b07bdff348c200..c69ea31d8481ae 100644
--- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
@@ -53,6 +53,8 @@ class RISCVVLOptimizer : public MachineFunctionPass {
   StringRef getPassName() const override { return PASS_NAME; }
 
 private:
+  void checkUsers(std::optional<Register> &CommonVL, bool &CanReduceVL,
+                  MachineInstr &MI);
   bool tryReduceVL(MachineInstr &MI);
   bool isCandidate(const MachineInstr &MI) const;
 };
@@ -1428,27 +1430,16 @@ static bool mayReadPastVL(const MachineInstr &MI) {
 }
 
 bool RISCVVLOptimizer::isCandidate(const MachineInstr &MI) const {
-
-  LLVM_DEBUG(
-      dbgs() << "Check whether the instruction is a candidate for reducing VL:"
-             << MI << "\n");
-
   const MCInstrDesc &Desc = MI.getDesc();
-  if (!RISCVII::hasVLOp(Desc.TSFlags) || !RISCVII::hasSEWOp(Desc.TSFlags)) {
-    LLVM_DEBUG(dbgs() << "  Not a candidate due to lack of vl op or sew op\n");
+  if (!RISCVII::hasVLOp(Desc.TSFlags) || !RISCVII::hasSEWOp(Desc.TSFlags))
     return false;
-  }
-
-  if (MI.getNumDefs() != 1) {
-    LLVM_DEBUG(dbgs() << " Not a candidate due to it def more than one\n");
+  if (MI.getNumDefs() != 1)
     return false;
-  }
+
   unsigned VLOpNum = RISCVII::getVLOpNum(Desc);
   const MachineOperand &VLOp = MI.getOperand(VLOpNum);
-  if (!VLOp.isImm() || VLOp.getImm() != RISCV::VLMaxSentinel) {
-    LLVM_DEBUG(dbgs() << "  Not a candidate due to VL is not VLMAX\n");
+  if (!VLOp.isImm() || VLOp.getImm() != RISCV::VLMaxSentinel)
     return false;
-  }
 
   // Some instructions that produce vectors have semantics that make it more
   // difficult to determine whether the VL can be reduced. For example, some
@@ -1467,9 +1458,91 @@ bool RISCVVLOptimizer::isCandidate(const MachineInstr &MI) const {
     return false;
   }
 
+  LLVM_DEBUG(dbgs() << "  Found a candidate for VL reduction: " << MI << "\n");
   return true;
 }
 
+void RISCVVLOptimizer::checkUsers(std::optional<Register> &CommonVL,
+                                  bool &CanReduceVL, MachineInstr &MI) {
+  // FIXME: Avoid visiting each user for each time we visit something on the
+  // worklist, combined with an extra visit from the outer loop. Restructure
+  // along lines of an instcombine style worklist which integrates the outer
+  // pass.
+  for (auto &UserOp : MRI->use_operands(MI.getOperand(0).getReg())) {
+    const MachineInstr &UserMI = *UserOp.getParent();
+    LLVM_DEBUG(dbgs() << "  Checking user: " << UserMI << "\n");
+
+    // Instructions like reductions may use a vector register as a scalar
+    // register. In this case, we should treat it like a scalar register which
+    // does not impact the decision on whether to optimize VL.
+    if (isVectorOpUsedAsScalarOp(UserOp)) {
+      [[maybe_unused]] Register R = UserOp.getReg();
+      [[maybe_unused]] const TargetRegisterClass *RC = MRI->getRegClass(R);
+      assert(RISCV::VRRegClass.hasSubClassEq(RC) &&
+             "Expect LMUL 1 register class for vector as scalar operands!");
+      LLVM_DEBUG(dbgs() << "    Use this operand as a scalar operand\n");
+      continue;
+    }
+
+    if (mayReadPastVL(UserMI)) {
+      LLVM_DEBUG(dbgs() << "    Abort because used by unsafe instruction\n");
+      CanReduceVL = false;
+      break;
+    }
+
+    // Tied operands might pass through.
+    if (UserOp.isTied()) {
+      LLVM_DEBUG(dbgs() << "    Abort because user used as tied operand\n");
+      CanReduceVL = false;
+      break;
+    }
+
+    const MCInstrDesc &Desc = UserMI.getDesc();
+    if (!RISCVII::hasVLOp(Desc.TSFlags) || !RISCVII::hasSEWOp(Desc.TSFlags)) {
+      LLVM_DEBUG(dbgs() << "    Abort due to lack of VL or SEW, assume that"
+                           " use VLMAX\n");
+      CanReduceVL = false;
+      break;
+    }
+
+    unsigned VLOpNum = RISCVII::getVLOpNum(Desc);
+    const MachineOperand &VLOp = UserMI.getOperand(VLOpNum);
+    // Looking for a register VL that isn't X0.
+    if (!VLOp.isReg() || VLOp.getReg() == RISCV::X0) {
+      LLVM_DEBUG(dbgs() << "    Abort due to user uses X0 as VL.\n");
+      CanReduceVL = false;
+      break;
+    }
+
+    if (!CommonVL) {
+      CommonVL = VLOp.getReg();
+    } else if (*CommonVL != VLOp.getReg()) {
+      LLVM_DEBUG(dbgs() << "    Abort because users have different VL\n");
+      CanReduceVL = false;
+      break;
+    }
+
+    // The SEW and LMUL of destination and source registers need to match.
+
+    // We know that MI DEF is a vector register, because that was the guard
+    // to call this function.
+    assert(isVectorRegClass(UserMI.getOperand(0).getReg(), MRI) &&
+           "Expected DEF and USE to be vector registers");
+
+    OperandInfo ConsumerInfo = getOperandInfo(UserMI, UserOp, MRI);
+    OperandInfo ProducerInfo = getOperandInfo(MI, MI.getOperand(0), MRI);
+    if (ConsumerInfo.isUnknown() || ProducerInfo.isUnknown() ||
+        !OperandInfo::EMULAndEEWAreEqual(ConsumerInfo, ProducerInfo)) {
+      LLVM_DEBUG(dbgs() << "    Abort due to incompatible or unknown "
+                           "information for EMUL or EEW.\n");
+      LLVM_DEBUG(dbgs() << "      ConsumerInfo is: " << ConsumerInfo << "\n");
+      LLVM_DEBUG(dbgs() << "      ProducerInfo is: " << ProducerInfo << "\n");
+      CanReduceVL = false;
+      break;
+    }
+  }
+}
+
 bool RISCVVLOptimizer::tryReduceVL(MachineInstr &OrigMI) {
   SetVector<MachineInstr *> Worklist;
   Worklist.insert(&OrigMI);
@@ -1477,87 +1550,24 @@ bool RISCVVLOptimizer::tryReduceVL(MachineInstr &OrigMI) {
   bool MadeChange = false;
   while (!Worklist.empty()) {
     MachineInstr &MI = *Worklist.pop_back_val();
-    LLVM_DEBUG(dbgs() << "Try reduce VL for " << MI << "\n");
-
+    LLVM_DEBUG(dbgs() << "Trying to reduce VL for " << MI << "\n");
+
+    // A MI may not produce a vector register when it is at the root of the
+    // traversal. For example:
+    // vec_reg_a = ..., vl_op, sew_op
+    // vec_reg_a = ..., vl_op, sew_op
+    // scalar_reg = vector_instr vec_reg_a, vec_reg_b, vl_op, sew_op
+    // We'd like to reduce the vl_op on vector_instr, despite it producing
+    // a scalar register. If the produced Dest is not a vector register (such as
+    // vcpop or vfirst), then it has no EEW or EMUL, so there is no need to
+    // check that producer and consumer LMUL and SEW of usres match.
     std::optional<Register> CommonVL;
     bool CanReduceVL = true;
-    for (auto &UserOp : MRI->use_operands(MI.getOperand(0).getReg())) {
-      const MachineInstr &UserMI = *UserOp.getParent();
-      LLVM_DEBUG(dbgs() << "  Check user: " << UserMI << "\n");
-
-      // Instructions like reductions may use a vector register as a scalar
-      // register. In this case, we should treat it like a scalar register which
-      // does not impact the decision on whether to optimize VL.
-      if (isVectorOpUsedAsScalarOp(UserOp)) {
-        [[maybe_unused]] Register R = UserOp.getReg();
-        [[maybe_unused]] const TargetRegisterClass *RC = MRI->getRegClass(R);
-        assert(RISCV::VRRegClass.hasSubClassEq(RC) &&
-               "Expect LMUL 1 register class for vector as scalar operands!");
-        LLVM_DEBUG(dbgs() << "    Use this operand as a scalar operand\n");
-        continue;
-      }
-
-      if (mayReadPastVL(UserMI)) {
-        LLVM_DEBUG(dbgs() << "    Abort due to used by unsafe instruction\n");
-        CanReduceVL = false;
-        break;
-      }
-
-      // Tied operands might pass through.
-      if (UserOp.isTied()) {
-        LLVM_DEBUG(dbgs() << "    Abort due to user use it as tied operand\n");
-        CanReduceVL = false;
-        break;
-      }
-
-      const MCInstrDesc &Desc = UserMI.getDesc();
-      if (!RISCVII::hasVLOp(Desc.TSFlags) || !RISCVII::hasSEWOp(Desc.TSFlags)) {
-        LLVM_DEBUG(dbgs() << "    Abort due to lack of VL or SEW, assume that"
-                             " use VLMAX.\n");
-        CanReduceVL = false;
-        break;
-      }
-
-      unsigned VLOpNum = RISCVII::getVLOpNum(Desc);
-      const MachineOperand &VLOp = UserMI.getOperand(VLOpNum);
-      // Looking for a register VL that isn't X0.
-      if (!VLOp.isReg() || VLOp.getReg() == RISCV::X0) {
-        LLVM_DEBUG(dbgs() << "    Abort due to user use X0 as VL.\n");
-        CanReduceVL = false;
-        break;
-      }
-
-      if (!CommonVL) {
-        CommonVL = VLOp.getReg();
-      } else if (*CommonVL != VLOp.getReg()) {
-        LLVM_DEBUG(dbgs() << "    Abort due to users have different VL!\n");
-        CanReduceVL = false;
-        break;
-      }
-
-      // The SEW and LMUL of destination and source registers need to match.
-
-      // If the produced Dest is not a vector register, then it has no EEW or
-      // EMUL, so there is no need to check that producer and consumer LMUL and
-      // SEW match. We've already checked above that UserOp is a vector
-      // register.
-      if (!isVectorRegClass(MI.getOperand(0).getReg(), MRI)) {
-        LLVM_DEBUG(dbgs() << "    Abort due to register class mismatch between "
-                             "USE and DEF\n");
-        continue;
-      }
-
-      OperandInfo ConsumerInfo = getOperandInfo(UserMI, UserOp, MRI);
-      OperandInfo ProducerInfo = getOperandInfo(MI, MI.getOperand(0), MRI);
-      if (ConsumerInfo.isUnknown() || ProducerInfo.isUnknown() ||
-          !OperandInfo::EMULAndEEWAreEqual(ConsumerInfo, ProducerInfo)) {
-        LLVM_DEBUG(dbgs() << "    Abort due to incompatible or unknown "
-                             "information for EMUL or EEW.\n");
-        LLVM_DEBUG(dbgs() << "      ConsumerInfo is: " << ConsumerInfo << "\n");
-        LLVM_DEBUG(dbgs() << "      ProducerInfo is: " << ProducerInfo << "\n");
-        CanReduceVL = false;
-        break;
-      }
+    if (isVectorRegClass(MI.getOperand(0).getReg(), MRI))
+      checkUsers(CommonVL, CanReduceVL, MI);
+    else {
+      CommonVL = MI.getOperand(RISCVII::getVLOpNum(MI.getDesc())).getReg();
+      CanReduceVL = true;
     }
 
     if (!CanReduceVL || !CommonVL)

>From 888fe3114b6d88cd62b1ccb6dc853746a6ad1a4e Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Wed, 18 Sep 2024 14:09:25 -0700
Subject: [PATCH 08/17] fixup! don't change to same vl

---
 llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
index c69ea31d8481ae..65231f91242abe 100644
--- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
@@ -1565,10 +1565,6 @@ bool RISCVVLOptimizer::tryReduceVL(MachineInstr &OrigMI) {
     bool CanReduceVL = true;
     if (isVectorRegClass(MI.getOperand(0).getReg(), MRI))
       checkUsers(CommonVL, CanReduceVL, MI);
-    else {
-      CommonVL = MI.getOperand(RISCVII::getVLOpNum(MI.getDesc())).getReg();
-      CanReduceVL = true;
-    }
 
     if (!CanReduceVL || !CommonVL)
       continue;

>From 5142e12ea2d4461213f06b5253e3827c9de6b987 Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Wed, 18 Sep 2024 14:22:45 -0700
Subject: [PATCH 09/17] fixup! remove comment and return CanReduceVL

---
 llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp | 20 ++++++--------------
 1 file changed, 6 insertions(+), 14 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
index 65231f91242abe..a948e8f22332a8 100644
--- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
@@ -53,8 +53,7 @@ class RISCVVLOptimizer : public MachineFunctionPass {
   StringRef getPassName() const override { return PASS_NAME; }
 
 private:
-  void checkUsers(std::optional<Register> &CommonVL, bool &CanReduceVL,
-                  MachineInstr &MI);
+  bool checkUsers(std::optional<Register> &CommonVL, MachineInstr &MI);
   bool tryReduceVL(MachineInstr &MI);
   bool isCandidate(const MachineInstr &MI) const;
 };
@@ -1462,12 +1461,13 @@ bool RISCVVLOptimizer::isCandidate(const MachineInstr &MI) const {
   return true;
 }
 
-void RISCVVLOptimizer::checkUsers(std::optional<Register> &CommonVL,
-                                  bool &CanReduceVL, MachineInstr &MI) {
+bool RISCVVLOptimizer::checkUsers(std::optional<Register> &CommonVL,
+                                  MachineInstr &MI) {
   // FIXME: Avoid visiting each user for each time we visit something on the
   // worklist, combined with an extra visit from the outer loop. Restructure
   // along lines of an instcombine style worklist which integrates the outer
   // pass.
+  bool CanReduceVL = true;
   for (auto &UserOp : MRI->use_operands(MI.getOperand(0).getReg())) {
     const MachineInstr &UserMI = *UserOp.getParent();
     LLVM_DEBUG(dbgs() << "  Checking user: " << UserMI << "\n");
@@ -1541,6 +1541,7 @@ void RISCVVLOptimizer::checkUsers(std::optional<Register> &CommonVL,
       break;
     }
   }
+  return CanReduceVL;
 }
 
 bool RISCVVLOptimizer::tryReduceVL(MachineInstr &OrigMI) {
@@ -1552,19 +1553,10 @@ bool RISCVVLOptimizer::tryReduceVL(MachineInstr &OrigMI) {
     MachineInstr &MI = *Worklist.pop_back_val();
     LLVM_DEBUG(dbgs() << "Trying to reduce VL for " << MI << "\n");
 
-    // A MI may not produce a vector register when it is at the root of the
-    // traversal. For example:
-    // vec_reg_a = ..., vl_op, sew_op
-    // vec_reg_a = ..., vl_op, sew_op
-    // scalar_reg = vector_instr vec_reg_a, vec_reg_b, vl_op, sew_op
-    // We'd like to reduce the vl_op on vector_instr, despite it producing
-    // a scalar register. If the produced Dest is not a vector register (such as
-    // vcpop or vfirst), then it has no EEW or EMUL, so there is no need to
-    // check that producer and consumer LMUL and SEW of usres match.
     std::optional<Register> CommonVL;
     bool CanReduceVL = true;
     if (isVectorRegClass(MI.getOperand(0).getReg(), MRI))
-      checkUsers(CommonVL, CanReduceVL, MI);
+      CanReduceVL = checkUsers(CommonVL, MI);
 
     if (!CanReduceVL || !CommonVL)
       continue;

>From 43cd9a499bfc1a20a90d6a39e16b155435d493ca Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Mon, 23 Sep 2024 10:05:49 -0700
Subject: [PATCH 10/17] fixup! add basic tests for supported instrs

---
 llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll | 395 +++++++++++++++++++
 1 file changed, 395 insertions(+)
 create mode 100644 llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll

diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
new file mode 100644
index 00000000000000..4fd02bd649577d
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
@@ -0,0 +1,395 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvbb -riscv-enable-vl-optimizer=false -verify-machineinstrs | FileCheck %s --check-prefixes=NOVLOPT
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvbb -riscv-enable-vl-optimizer=false -verify-machineinstrs | FileCheck %s --check-prefixes=NOVLOPT
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvbb -riscv-enable-vl-optimizer -verify-machineinstrs | FileCheck %s --check-prefixes=VLOPT
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvbb -riscv-enable-vl-optimizer -verify-machineinstrs | FileCheck %s --check-prefixes=VLOPT
+
+; The purpose of this test is to check the behavior of specific instructions as it relates to the VL optimizer
+
+define <vscale x 4 x i32> @vadd_vi(<vscale x 4 x i32> %a, iXLen %vl) {
+; NOVLOPT-LABEL: vadd_vi:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vadd.vi v10, v8, 5
+; NOVLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vadd.vv v8, v10, v8
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vadd_vi:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT:    vadd.vi v10, v8, 5
+; VLOPT-NEXT:    vadd.vv v8, v10, v8
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, i32 5, iXLen -1)
+  %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl)
+  ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 4 x i32> @vadd_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) {
+; NOVLOPT-LABEL: vadd_vv:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vadd.vv v8, v8, v10
+; NOVLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vadd.vv v8, v8, v10
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vadd_vv:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT:    vadd.vv v8, v8, v10
+; VLOPT-NEXT:    vadd.vv v8, v8, v10
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen -1)
+  %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %b, iXLen %vl)
+  ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 4 x i32> @vadd_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) {
+; NOVLOPT-LABEL: vadd_vx:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vadd.vx v10, v8, a0
+; NOVLOPT-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vadd.vv v8, v10, v8
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vadd_vx:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; VLOPT-NEXT:    vadd.vx v10, v8, a0
+; VLOPT-NEXT:    vadd.vv v8, v10, v8
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, i32 %b, iXLen -1)
+  %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl)
+  ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 4 x i32> @vmul_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) {
+; NOVLOPT-LABEL: vmul_vv:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vmul.vv v8, v8, v10
+; NOVLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vmul.vv v8, v8, v10
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vmul_vv:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT:    vmul.vv v8, v8, v10
+; VLOPT-NEXT:    vmul.vv v8, v8, v10
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x i32> @llvm.riscv.vmul.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen -1)
+  %2 = call <vscale x 4 x i32> @llvm.riscv.vmul.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %b, iXLen %vl)
+  ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 4 x i32> @vmul_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) {
+; NOVLOPT-LABEL: vmul_vx:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vmul.vx v10, v8, a0
+; NOVLOPT-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vmul.vv v8, v10, v8
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vmul_vx:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; VLOPT-NEXT:    vmul.vx v10, v8, a0
+; VLOPT-NEXT:    vmul.vv v8, v10, v8
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x i32> @llvm.riscv.vmul.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, i32 %b, iXLen -1)
+  %2 = call <vscale x 4 x i32> @llvm.riscv.vmul.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl)
+  ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 4 x i32> @vsll_vi(<vscale x 4 x i32> %a, iXLen %vl) {
+; NOVLOPT-LABEL: vsll_vi:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vsll.vi v10, v8, 5
+; NOVLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vsll.vv v8, v10, v8
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vsll_vi:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT:    vsll.vi v10, v8, 5
+; VLOPT-NEXT:    vsll.vv v8, v10, v8
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x i32> @llvm.riscv.vsll.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, iXLen 5, iXLen -1)
+  %2 = call <vscale x 4 x i32> @llvm.riscv.vsll.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl)
+  ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 4 x i32> @vsext.vf2(<vscale x 4 x i16> %a, <vscale x 4 x i32> %b, iXLen %vl) {
+; NOVLOPT-LABEL: vsext.vf2:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vsext.vf2 v12, v8
+; NOVLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vadd.vv v8, v12, v10
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vsext.vf2:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT:    vsext.vf2 v12, v8
+; VLOPT-NEXT:    vadd.vv v8, v12, v10
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x i32> @llvm.riscv.vsext.nxv4i32.nxv4i16(<vscale x 4 x i32> poison, <vscale x 4 x i16> %a, iXLen -1)
+  %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %b, iXLen %vl)
+  ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 4 x i32> @vsext.vf4(<vscale x 4 x i8> %a, <vscale x 4 x i32> %b, iXLen %vl) {
+; NOVLOPT-LABEL: vsext.vf4:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vsext.vf4 v12, v8
+; NOVLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vadd.vv v8, v12, v10
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vsext.vf4:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT:    vsext.vf4 v12, v8
+; VLOPT-NEXT:    vadd.vv v8, v12, v10
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x i32> @llvm.riscv.vsext.nxv4i32.nxv4i8(<vscale x 4 x i32> poison, <vscale x 4 x i8> %a, iXLen -1)
+  %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %b, iXLen %vl)
+  ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 4 x i64> @vsext.vf8(<vscale x 4 x i8> %a, <vscale x 4 x i64> %b, iXLen %vl) {
+; NOVLOPT-LABEL: vsext.vf8:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a1, zero, e64, m4, ta, ma
+; NOVLOPT-NEXT:    vsext.vf8 v16, v8
+; NOVLOPT-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; NOVLOPT-NEXT:    vadd.vv v8, v16, v12
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vsext.vf8:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; VLOPT-NEXT:    vsext.vf8 v16, v8
+; VLOPT-NEXT:    vadd.vv v8, v16, v12
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x i64> @llvm.riscv.vsext.nxv4i32.nxv4i8(<vscale x 4 x i64> poison, <vscale x 4 x i8> %a, iXLen -1)
+  %2 = call <vscale x 4 x i64> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i64> poison, <vscale x 4 x i64> %1, <vscale x 4 x i64> %b, iXLen %vl)
+  ret <vscale x 4 x i64> %2
+}
+
+define <vscale x 4 x i32> @vzext.vf2(<vscale x 4 x i16> %a, <vscale x 4 x i32> %b, iXLen %vl) {
+; NOVLOPT-LABEL: vzext.vf2:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vzext.vf2 v12, v8
+; NOVLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vadd.vv v8, v12, v10
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vzext.vf2:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT:    vzext.vf2 v12, v8
+; VLOPT-NEXT:    vadd.vv v8, v12, v10
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x i32> @llvm.riscv.vzext.nxv4i32.nxv4i16(<vscale x 4 x i32> poison, <vscale x 4 x i16> %a, iXLen -1)
+  %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %b, iXLen %vl)
+  ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 4 x i32> @vzext.vf4(<vscale x 4 x i8> %a, <vscale x 4 x i32> %b, iXLen %vl) {
+; NOVLOPT-LABEL: vzext.vf4:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vzext.vf4 v12, v8
+; NOVLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vadd.vv v8, v12, v10
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vzext.vf4:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT:    vzext.vf4 v12, v8
+; VLOPT-NEXT:    vadd.vv v8, v12, v10
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x i32> @llvm.riscv.vzext.nxv4i32.nxv4i8(<vscale x 4 x i32> poison, <vscale x 4 x i8> %a, iXLen -1)
+  %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %b, iXLen %vl)
+  ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 4 x i64> @vzext.vf8(<vscale x 4 x i8> %a, <vscale x 4 x i64> %b, iXLen %vl) {
+; NOVLOPT-LABEL: vzext.vf8:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a1, zero, e64, m4, ta, ma
+; NOVLOPT-NEXT:    vzext.vf8 v16, v8
+; NOVLOPT-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; NOVLOPT-NEXT:    vadd.vv v8, v16, v12
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vzext.vf8:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; VLOPT-NEXT:    vzext.vf8 v16, v8
+; VLOPT-NEXT:    vadd.vv v8, v16, v12
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x i64> @llvm.riscv.vzext.nxv4i32.nxv4i8(<vscale x 4 x i64> poison, <vscale x 4 x i8> %a, iXLen -1)
+  %2 = call <vscale x 4 x i64> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i64> poison, <vscale x 4 x i64> %1, <vscale x 4 x i64> %b, iXLen %vl)
+  ret <vscale x 4 x i64> %2
+}
+
+; FIXME: Add a test for vmv.v.i
+; FIXME: Add a test for vmv.v.v
+
+define <vscale x 4 x i32> @vmv.v.x(<vscale x 4 x i32> %a, i32 %x, iXLen %vl) {
+; NOVLOPT-LABEL: vmv.v.x:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vmv.v.x v10, a0
+; NOVLOPT-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vadd.vv v8, v10, v8
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vmv.v.x:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; VLOPT-NEXT:    vmv.v.x v10, a0
+; VLOPT-NEXT:    vadd.vv v8, v10, v8
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x i32> @llvm.riscv.vmv.v.x.nxv4i32(<vscale x 4 x i32> poison, i32 %x, iXLen -1)
+  %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl)
+  ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 4 x i16> @vnsrl_wi(<vscale x 4 x i32> %a, <vscale x 4 x i16> %b, iXLen %vl) {
+; NOVLOPT-LABEL: vnsrl_wi:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
+; NOVLOPT-NEXT:    vnsrl.wi v11, v8, 5
+; NOVLOPT-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; NOVLOPT-NEXT:    vadd.vv v8, v11, v10
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vnsrl_wi:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; VLOPT-NEXT:    vnsrl.wi v11, v8, 5
+; VLOPT-NEXT:    vadd.vv v8, v11, v10
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x i16> @llvm.riscv.vnsrl.nxv4i16.nxv4i32(<vscale x 4 x i16> poison, <vscale x 4 x i32> %a, iXLen 5, iXLen -1)
+  %2 = call <vscale x 4 x i16> @llvm.riscv.vadd.nxv4i16.nxv4i16(<vscale x 4 x i16> poison, <vscale x 4 x i16> %1, <vscale x 4 x i16> %b, iXLen %vl)
+  ret <vscale x 4 x i16> %2
+}
+
+define <vscale x 4 x i64> @vwadd.vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) {
+; NOVLOPT-LABEL: vwadd.vv:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vwadd.vv v12, v8, v10
+; NOVLOPT-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; NOVLOPT-NEXT:    vadd.vv v8, v12, v12
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vwadd.vv:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT:    vwadd.vv v12, v8, v10
+; VLOPT-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
+; VLOPT-NEXT:    vadd.vv v8, v12, v12
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x i64> @llvm.riscv.vwadd.nxv4i64.nxv4i32.nxv4i32(<vscale x 4 x i64> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen -1)
+  %2 = call <vscale x 4 x i64> @llvm.riscv.vadd.nxv4i64.nxv4i64(<vscale x 4 x i64> poison, <vscale x 4 x i64> %1, <vscale x 4 x i64> %1, iXLen %vl)
+  ret <vscale x 4 x i64> %2
+}
+
+define <vscale x 4 x i64> @vwaddu.vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) {
+; NOVLOPT-LABEL: vwaddu.vv:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vwaddu.vv v12, v8, v10
+; NOVLOPT-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; NOVLOPT-NEXT:    vadd.vv v8, v12, v12
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vwaddu.vv:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT:    vwaddu.vv v12, v8, v10
+; VLOPT-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
+; VLOPT-NEXT:    vadd.vv v8, v12, v12
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x i64> @llvm.riscv.vwaddu.nxv4i64.nxv4i32.nxv4i32(<vscale x 4 x i64> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen -1)
+  %2 = call <vscale x 4 x i64> @llvm.riscv.vadd.nxv4i64.nxv4i64(<vscale x 4 x i64> poison, <vscale x 4 x i64> %1, <vscale x 4 x i64> %1, iXLen %vl)
+  ret <vscale x 4 x i64> %2
+}
+
+define <vscale x 4 x i32> @vwmacc_vx(<vscale x 4 x i16> %a, i16 %b, iXLen %vl) {
+; NOVLOPT-LABEL: vwmacc_vx:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a2, zero, e16, m1, ta, ma
+; NOVLOPT-NEXT:    vwmacc.vx v10, a0, v8
+; NOVLOPT-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vadd.vv v8, v10, v10
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vwmacc_vx:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
+; VLOPT-NEXT:    vwmacc.vx v10, a0, v8
+; VLOPT-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; VLOPT-NEXT:    vadd.vv v8, v10, v10
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x i32> @llvm.riscv.vwmacc.nxv4i32.i16(<vscale x 4 x i32> poison, i16 %b, <vscale x 4 x i16> %a, iXLen -1, iXLen 0)
+  %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %1, iXLen %vl)
+  ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 4 x i32> @vwmaccu_vx(<vscale x 4 x i16> %a, i16 %b, iXLen %vl) {
+; NOVLOPT-LABEL: vwmaccu_vx:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a2, zero, e16, m1, ta, ma
+; NOVLOPT-NEXT:    vwmaccu.vx v10, a0, v8
+; NOVLOPT-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vadd.vv v8, v10, v10
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vwmaccu_vx:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
+; VLOPT-NEXT:    vwmaccu.vx v10, a0, v8
+; VLOPT-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; VLOPT-NEXT:    vadd.vv v8, v10, v10
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x i32> @llvm.riscv.vwmaccu.nxv4i32.i16(<vscale x 4 x i32> poison, i16 %b, <vscale x 4 x i16> %a, iXLen -1, iXLen 0)
+  %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %1, iXLen %vl)
+  ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 4 x i32> @vwsll.vi(<vscale x 4 x i16> %a, <vscale x 4 x i32> %b, iXLen %vl) {
+; NOVLOPT-LABEL: vwsll.vi:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
+; NOVLOPT-NEXT:    vwsll.vi v12, v8, 1
+; NOVLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vadd.vv v8, v12, v10
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vwsll.vi:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; VLOPT-NEXT:    vwsll.vi v12, v8, 1
+; VLOPT-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; VLOPT-NEXT:    vadd.vv v8, v12, v10
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x i32> @llvm.riscv.vwsll.nxv4i32.nxv4i16(<vscale x 4 x i32> poison, <vscale x 4 x i16> %a,iXLen 1, iXLen -1)
+  %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %b, iXLen %vl)
+  ret <vscale x 4 x i32> %2
+}

>From cf37397d1ef2365ec16dd2e13987f38b1a55b8fb Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Mon, 23 Sep 2024 10:45:14 -0700
Subject: [PATCH 11/17] fixup! add more supported instrs and tests

---
 llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp   |  74 ++-
 llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll | 645 +++++++++++++++++--
 2 files changed, 643 insertions(+), 76 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
index a948e8f22332a8..60b6e5eba6da1c 100644
--- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
@@ -1340,25 +1340,79 @@ static bool isSupportedInstr(const MachineInstr &MI) {
     return false;
 
   switch (RVV->BaseInstr) {
+  // 11.1. Vector Single-Width Integer Add and Subtract
   case RISCV::VADD_VI:
   case RISCV::VADD_VV:
   case RISCV::VADD_VX:
-  case RISCV::VMUL_VV:
-  case RISCV::VMUL_VX:
-  case RISCV::VSLL_VI:
-  case RISCV::VSEXT_VF2:
-  case RISCV::VSEXT_VF4:
-  case RISCV::VSEXT_VF8:
+  case RISCV::VSUB_VV:
+  case RISCV::VSUB_VX:
+  case RISCV::VRSUB_VI:
+  case RISCV::VRSUB_VX:
+  // 11.2. Vector Widening Integer Add/Subtract
+  case RISCV::VWADDU_VV:
+  case RISCV::VWADDU_VX:
+  case RISCV::VWSUBU_VV:
+  case RISCV::VWSUBU_VX:
+  case RISCV::VWADD_VV:
+  case RISCV::VWADD_VX:
+  case RISCV::VWSUB_VV:
+  case RISCV::VWSUB_VX:
+  case RISCV::VWADDU_WV:
+  case RISCV::VWADDU_WX:
+  case RISCV::VWSUBU_WV:
+  case RISCV::VWSUBU_WX:
+  case RISCV::VWADD_WV:
+  case RISCV::VWADD_WX:
+  case RISCV::VWSUB_WV:
+  case RISCV::VWSUB_WX:
+  // 11.3. Vector Integer Extension
   case RISCV::VZEXT_VF2:
+  case RISCV::VSEXT_VF2:
   case RISCV::VZEXT_VF4:
+  case RISCV::VSEXT_VF4:
   case RISCV::VZEXT_VF8:
-  case RISCV::VMV_V_I:
-  case RISCV::VMV_V_X:
+  case RISCV::VSEXT_VF8:
+  // 11.4. Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions
+  // FIXME: Add support for 11.4 instructions
+  // 11.5. Vector Bitwise Logical Instructions
+  // FIXME: Add support for 11.5 instructions
+  // 11.6. Vector Single-Width Shift Instructions
+  // FIXME: Add support for 11.6 instructions
+  case RISCV::VSLL_VI:
+  // 11.7. Vector Narrowing Integer Right Shift Instructions
+  // FIXME: Add support for 11.7 instructions
   case RISCV::VNSRL_WI:
-  case RISCV::VWADD_VV:
-  case RISCV::VWADDU_VV:
+  // 11.8 Vector Integer Compare Instructions
+  // FIXME: Add support for 11.8 instructions
+  // 11.9. Vector Integer Min/Max Instructions
+  // FIXME: Add support for 11.9 instructions
+  // 11.10. Vector Single-Width Integer Multiply Instructions
+  case RISCV::VMUL_VV:
+  case RISCV::VMUL_VX:
+  case RISCV::VMULH_VV:
+  case RISCV::VMULH_VX:
+  case RISCV::VMULHU_VV:
+  case RISCV::VMULHU_VX:
+  case RISCV::VMULHSU_VV:
+  case RISCV::VMULHSU_VX:
+  // 11.11. Vector Integer Divide Instructions
+  // FIXME: Add support for 11.11 instructions
+  // 11.12. Vector Widening Integer Multiply Instructions
+  // FIXME: Add support for 11.12 instructions
+  // 11.13. Vector Single-Width Integer Multiply-Add Instructions
+  // FIXME: Add support for 11.13 instructions
+  // 11.14. Vector Widening Integer Multiply-Add Instructions
+  // FIXME: Add support for 11.14 instructions
   case RISCV::VWMACC_VX:
   case RISCV::VWMACCU_VX:
+  // 11.15. Vector Integer Merge Instructions
+  // FIXME: Add support for 11.15 instructions
+  // 11.16. Vector Integer Move Instructions
+  // FIXME: Add support for 11.16 instructions
+  case RISCV::VMV_V_I:
+  case RISCV::VMV_V_X:
+
+  // Vector Crypto
   case RISCV::VWSLL_VI:
     return true;
   }
diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
index 4fd02bd649577d..3f3c76861e9aa2 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
@@ -66,66 +66,422 @@ define <vscale x 4 x i32> @vadd_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) {
   ret <vscale x 4 x i32> %2
 }
 
-define <vscale x 4 x i32> @vmul_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) {
-; NOVLOPT-LABEL: vmul_vv:
+define <vscale x 4 x i32> @vsub_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) {
+; NOVLOPT-LABEL: vsub_vv:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
-; NOVLOPT-NEXT:    vmul.vv v8, v8, v10
+; NOVLOPT-NEXT:    vsub.vv v8, v8, v10
 ; NOVLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; NOVLOPT-NEXT:    vmul.vv v8, v8, v10
+; NOVLOPT-NEXT:    vsub.vv v8, v8, v10
 ; NOVLOPT-NEXT:    ret
 ;
-; VLOPT-LABEL: vmul_vv:
+; VLOPT-LABEL: vsub_vv:
 ; VLOPT:       # %bb.0:
 ; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; VLOPT-NEXT:    vmul.vv v8, v8, v10
-; VLOPT-NEXT:    vmul.vv v8, v8, v10
+; VLOPT-NEXT:    vsub.vv v8, v8, v10
+; VLOPT-NEXT:    vsub.vv v8, v8, v10
 ; VLOPT-NEXT:    ret
-  %1 = call <vscale x 4 x i32> @llvm.riscv.vmul.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen -1)
-  %2 = call <vscale x 4 x i32> @llvm.riscv.vmul.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %b, iXLen %vl)
+  %1 = call <vscale x 4 x i32> @llvm.riscv.vsub.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen -1)
+  %2 = call <vscale x 4 x i32> @llvm.riscv.vsub.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %b, iXLen %vl)
   ret <vscale x 4 x i32> %2
 }
 
-define <vscale x 4 x i32> @vmul_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) {
-; NOVLOPT-LABEL: vmul_vx:
+define <vscale x 4 x i32> @vsub_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) {
+; NOVLOPT-LABEL: vsub_vx:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
-; NOVLOPT-NEXT:    vmul.vx v10, v8, a0
+; NOVLOPT-NEXT:    vsub.vx v10, v8, a0
 ; NOVLOPT-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; NOVLOPT-NEXT:    vmul.vv v8, v10, v8
+; NOVLOPT-NEXT:    vsub.vv v8, v10, v8
 ; NOVLOPT-NEXT:    ret
 ;
-; VLOPT-LABEL: vmul_vx:
+; VLOPT-LABEL: vsub_vx:
 ; VLOPT:       # %bb.0:
 ; VLOPT-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; VLOPT-NEXT:    vmul.vx v10, v8, a0
-; VLOPT-NEXT:    vmul.vv v8, v10, v8
+; VLOPT-NEXT:    vsub.vx v10, v8, a0
+; VLOPT-NEXT:    vsub.vv v8, v10, v8
 ; VLOPT-NEXT:    ret
-  %1 = call <vscale x 4 x i32> @llvm.riscv.vmul.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, i32 %b, iXLen -1)
-  %2 = call <vscale x 4 x i32> @llvm.riscv.vmul.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl)
+  %1 = call <vscale x 4 x i32> @llvm.riscv.vsub.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, i32 %b, iXLen -1)
+  %2 = call <vscale x 4 x i32> @llvm.riscv.vsub.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl)
   ret <vscale x 4 x i32> %2
 }
 
-define <vscale x 4 x i32> @vsll_vi(<vscale x 4 x i32> %a, iXLen %vl) {
-; NOVLOPT-LABEL: vsll_vi:
+define <vscale x 4 x i32> @vrsub_vi(<vscale x 4 x i32> %a, iXLen %vl) {
+; NOVLOPT-LABEL: vrsub_vi:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
-; NOVLOPT-NEXT:    vsll.vi v10, v8, 5
+; NOVLOPT-NEXT:    vrsub.vi v10, v8, 5
 ; NOVLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; NOVLOPT-NEXT:    vsll.vv v8, v10, v8
+; NOVLOPT-NEXT:    vadd.vv v8, v10, v8
 ; NOVLOPT-NEXT:    ret
 ;
-; VLOPT-LABEL: vsll_vi:
+; VLOPT-LABEL: vrsub_vi:
 ; VLOPT:       # %bb.0:
 ; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; VLOPT-NEXT:    vsll.vi v10, v8, 5
-; VLOPT-NEXT:    vsll.vv v8, v10, v8
+; VLOPT-NEXT:    vrsub.vi v10, v8, 5
+; VLOPT-NEXT:    vadd.vv v8, v10, v8
 ; VLOPT-NEXT:    ret
-  %1 = call <vscale x 4 x i32> @llvm.riscv.vsll.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, iXLen 5, iXLen -1)
-  %2 = call <vscale x 4 x i32> @llvm.riscv.vsll.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl)
+  %1 = call <vscale x 4 x i32> @llvm.riscv.vrsub.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, i32 5, iXLen -1)
+  %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl)
   ret <vscale x 4 x i32> %2
 }
 
+define <vscale x 4 x i32> @vrsub_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) {
+; NOVLOPT-LABEL: vrsub_vx:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vrsub.vx v10, v8, a0
+; NOVLOPT-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vsub.vv v8, v10, v8
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vrsub_vx:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; VLOPT-NEXT:    vrsub.vx v10, v8, a0
+; VLOPT-NEXT:    vsub.vv v8, v10, v8
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x i32> @llvm.riscv.vrsub.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, i32 %b, iXLen -1)
+  %2 = call <vscale x 4 x i32> @llvm.riscv.vsub.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl)
+  ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 4 x i64> @vwaddu.vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) {
+; NOVLOPT-LABEL: vwaddu.vv:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vwaddu.vv v12, v8, v10
+; NOVLOPT-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; NOVLOPT-NEXT:    vadd.vv v8, v12, v12
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vwaddu.vv:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT:    vwaddu.vv v12, v8, v10
+; VLOPT-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
+; VLOPT-NEXT:    vadd.vv v8, v12, v12
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x i64> @llvm.riscv.vwaddu.nxv4i64.nxv4i32.nxv4i32(<vscale x 4 x i64> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen -1)
+  %2 = call <vscale x 4 x i64> @llvm.riscv.vadd.nxv4i64.nxv4i64(<vscale x 4 x i64> poison, <vscale x 4 x i64> %1, <vscale x 4 x i64> %1, iXLen %vl)
+  ret <vscale x 4 x i64> %2
+}
+
+define <vscale x 4 x i64> @vwaddu.vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) {
+; NOVLOPT-LABEL: vwaddu.vx:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vwaddu.vx v12, v8, a0
+; NOVLOPT-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
+; NOVLOPT-NEXT:    vadd.vv v8, v12, v12
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vwaddu.vx:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; VLOPT-NEXT:    vwaddu.vx v12, v8, a0
+; VLOPT-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
+; VLOPT-NEXT:    vadd.vv v8, v12, v12
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x i64> @llvm.riscv.vwaddu.nxv4i64.nxv4i32.i32(<vscale x 4 x i64> poison, <vscale x 4 x i32> %a, i32 %b, iXLen -1)
+  %2 = call <vscale x 4 x i64> @llvm.riscv.vadd.nxv4i64.nxv4i64(<vscale x 4 x i64> poison, <vscale x 4 x i64> %1, <vscale x 4 x i64> %1, iXLen %vl)
+  ret <vscale x 4 x i64> %2
+}
+
+define <vscale x 4 x i64> @vwsubu.vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) {
+; NOVLOPT-LABEL: vwsubu.vv:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vwsubu.vv v12, v8, v10
+; NOVLOPT-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; NOVLOPT-NEXT:    vadd.vv v8, v12, v12
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vwsubu.vv:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT:    vwsubu.vv v12, v8, v10
+; VLOPT-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
+; VLOPT-NEXT:    vadd.vv v8, v12, v12
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x i64> @llvm.riscv.vwsubu.nxv4i64.nxv4i32.nxv4i32(<vscale x 4 x i64> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen -1)
+  %2 = call <vscale x 4 x i64> @llvm.riscv.vadd.nxv4i64.nxv4i64(<vscale x 4 x i64> poison, <vscale x 4 x i64> %1, <vscale x 4 x i64> %1, iXLen %vl)
+  ret <vscale x 4 x i64> %2
+}
+
+define <vscale x 4 x i64> @vwsubu.vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) {
+; NOVLOPT-LABEL: vwsubu.vx:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vwsubu.vx v12, v8, a0
+; NOVLOPT-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
+; NOVLOPT-NEXT:    vadd.vv v8, v12, v12
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vwsubu.vx:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; VLOPT-NEXT:    vwsubu.vx v12, v8, a0
+; VLOPT-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
+; VLOPT-NEXT:    vadd.vv v8, v12, v12
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x i64> @llvm.riscv.vwsubu.nxv4i64.nxv4i32.i32(<vscale x 4 x i64> poison, <vscale x 4 x i32> %a, i32 %b, iXLen -1)
+  %2 = call <vscale x 4 x i64> @llvm.riscv.vadd.nxv4i64.nxv4i64(<vscale x 4 x i64> poison, <vscale x 4 x i64> %1, <vscale x 4 x i64> %1, iXLen %vl)
+  ret <vscale x 4 x i64> %2
+}
+
+define <vscale x 4 x i64> @vwadd.vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) {
+; NOVLOPT-LABEL: vwadd.vv:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vwadd.vv v12, v8, v10
+; NOVLOPT-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; NOVLOPT-NEXT:    vadd.vv v8, v12, v12
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vwadd.vv:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT:    vwadd.vv v12, v8, v10
+; VLOPT-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
+; VLOPT-NEXT:    vadd.vv v8, v12, v12
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x i64> @llvm.riscv.vwadd.nxv4i64.nxv4i32.nxv4i32(<vscale x 4 x i64> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen -1)
+  %2 = call <vscale x 4 x i64> @llvm.riscv.vadd.nxv4i64.nxv4i64(<vscale x 4 x i64> poison, <vscale x 4 x i64> %1, <vscale x 4 x i64> %1, iXLen %vl)
+  ret <vscale x 4 x i64> %2
+}
+
+define <vscale x 4 x i64> @vwadd.vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) {
+; NOVLOPT-LABEL: vwadd.vx:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vwadd.vx v12, v8, a0
+; NOVLOPT-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
+; NOVLOPT-NEXT:    vadd.vv v8, v12, v12
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vwadd.vx:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; VLOPT-NEXT:    vwadd.vx v12, v8, a0
+; VLOPT-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
+; VLOPT-NEXT:    vadd.vv v8, v12, v12
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x i64> @llvm.riscv.vwadd.nxv4i64.nxv4i32.i32(<vscale x 4 x i64> poison, <vscale x 4 x i32> %a, i32 %b, iXLen -1)
+  %2 = call <vscale x 4 x i64> @llvm.riscv.vadd.nxv4i64.nxv4i64(<vscale x 4 x i64> poison, <vscale x 4 x i64> %1, <vscale x 4 x i64> %1, iXLen %vl)
+  ret <vscale x 4 x i64> %2
+}
+
+define <vscale x 4 x i64> @vwsub.vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) {
+; NOVLOPT-LABEL: vwsub.vv:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vwsub.vv v12, v8, v10
+; NOVLOPT-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; NOVLOPT-NEXT:    vadd.vv v8, v12, v12
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vwsub.vv:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT:    vwsub.vv v12, v8, v10
+; VLOPT-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
+; VLOPT-NEXT:    vadd.vv v8, v12, v12
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x i64> @llvm.riscv.vwsub.nxv4i64.nxv4i32.nxv4i32(<vscale x 4 x i64> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen -1)
+  %2 = call <vscale x 4 x i64> @llvm.riscv.vadd.nxv4i64.nxv4i64(<vscale x 4 x i64> poison, <vscale x 4 x i64> %1, <vscale x 4 x i64> %1, iXLen %vl)
+  ret <vscale x 4 x i64> %2
+}
+
+define <vscale x 4 x i64> @vwsub.vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) {
+; NOVLOPT-LABEL: vwsub.vx:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vwsub.vx v12, v8, a0
+; NOVLOPT-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
+; NOVLOPT-NEXT:    vadd.vv v8, v12, v12
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vwsub.vx:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; VLOPT-NEXT:    vwsub.vx v12, v8, a0
+; VLOPT-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
+; VLOPT-NEXT:    vadd.vv v8, v12, v12
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x i64> @llvm.riscv.vwsub.nxv4i64.nxv4i32.i32(<vscale x 4 x i64> poison, <vscale x 4 x i32> %a, i32 %b, iXLen -1)
+  %2 = call <vscale x 4 x i64> @llvm.riscv.vadd.nxv4i64.nxv4i64(<vscale x 4 x i64> poison, <vscale x 4 x i64> %1, <vscale x 4 x i64> %1, iXLen %vl)
+  ret <vscale x 4 x i64> %2
+}
+
+define <vscale x 4 x i64> @vwaddu.wv(<vscale x 4 x i64> %a, <vscale x 4 x i32> %b, iXLen %vl) {
+; NOVLOPT-LABEL: vwaddu.wv:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vwaddu.wv v8, v8, v12
+; NOVLOPT-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; NOVLOPT-NEXT:    vadd.vv v8, v8, v8
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vwaddu.wv:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT:    vwaddu.wv v8, v8, v12
+; VLOPT-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
+; VLOPT-NEXT:    vadd.vv v8, v8, v8
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x i64> @llvm.riscv.vwaddu.w.nxv4i64.nxv4i32.nxv4i32(<vscale x 4 x i64> poison, <vscale x 4 x i64> %a, <vscale x 4 x i32> %b, iXLen -1)
+  %2 = call <vscale x 4 x i64> @llvm.riscv.vadd.nxv4i64.nxv4i64(<vscale x 4 x i64> poison, <vscale x 4 x i64> %1, <vscale x 4 x i64> %1, iXLen %vl)
+  ret <vscale x 4 x i64> %2
+}
+
+define <vscale x 4 x i64> @vwaddu.wx(<vscale x 4 x i64> %a, i32 %b, iXLen %vl) {
+; NOVLOPT-LABEL: vwaddu.wx:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vwaddu.wx v8, v8, a0
+; NOVLOPT-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
+; NOVLOPT-NEXT:    vadd.vv v8, v8, v8
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vwaddu.wx:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; VLOPT-NEXT:    vwaddu.wx v8, v8, a0
+; VLOPT-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
+; VLOPT-NEXT:    vadd.vv v8, v8, v8
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x i64> @llvm.riscv.vwaddu.w.xv4i64.nxv4i32.i32(<vscale x 4 x i64> poison, <vscale x 4 x i64> %a, i32 %b, iXLen -1)
+  %2 = call <vscale x 4 x i64> @llvm.riscv.vadd.nxv4i64.nxv4i64(<vscale x 4 x i64> poison, <vscale x 4 x i64> %1, <vscale x 4 x i64> %1, iXLen %vl)
+  ret <vscale x 4 x i64> %2
+}
+
+define <vscale x 4 x i64> @vwsubu.wv(<vscale x 4 x i64> %a, <vscale x 4 x i32> %b, iXLen %vl) {
+; NOVLOPT-LABEL: vwsubu.wv:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vwsubu.wv v8, v8, v12
+; NOVLOPT-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; NOVLOPT-NEXT:    vadd.vv v8, v8, v8
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vwsubu.wv:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT:    vwsubu.wv v8, v8, v12
+; VLOPT-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
+; VLOPT-NEXT:    vadd.vv v8, v8, v8
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x i64> @llvm.riscv.vwsubu.w.nxv4i64.nxv4i32.nxv4i32(<vscale x 4 x i64> poison, <vscale x 4 x i64> %a, <vscale x 4 x i32> %b, iXLen -1)
+  %2 = call <vscale x 4 x i64> @llvm.riscv.vadd.nxv4i64.nxv4i64(<vscale x 4 x i64> poison, <vscale x 4 x i64> %1, <vscale x 4 x i64> %1, iXLen %vl)
+  ret <vscale x 4 x i64> %2
+}
+
+define <vscale x 4 x i64> @vwsubu.wx(<vscale x 4 x i64> %a, i32 %b, iXLen %vl) {
+; NOVLOPT-LABEL: vwsubu.wx:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vwsubu.wx v8, v8, a0
+; NOVLOPT-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
+; NOVLOPT-NEXT:    vadd.vv v8, v8, v8
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vwsubu.wx:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; VLOPT-NEXT:    vwsubu.wx v8, v8, a0
+; VLOPT-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
+; VLOPT-NEXT:    vadd.vv v8, v8, v8
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x i64> @llvm.riscv.vwsubu.w.nxv4i64.nxv4i32.i32(<vscale x 4 x i64> poison, <vscale x 4 x i64> %a, i32 %b, iXLen -1)
+  %2 = call <vscale x 4 x i64> @llvm.riscv.vadd.nxv4i64.nxv4i64(<vscale x 4 x i64> poison, <vscale x 4 x i64> %1, <vscale x 4 x i64> %1, iXLen %vl)
+  ret <vscale x 4 x i64> %2
+}
+
+define <vscale x 4 x i64> @vwadd.wv(<vscale x 4 x i64> %a, <vscale x 4 x i32> %b, iXLen %vl) {
+; NOVLOPT-LABEL: vwadd.wv:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vwadd.wv v8, v8, v12
+; NOVLOPT-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; NOVLOPT-NEXT:    vadd.vv v8, v8, v8
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vwadd.wv:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT:    vwadd.wv v8, v8, v12
+; VLOPT-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
+; VLOPT-NEXT:    vadd.vv v8, v8, v8
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x i64> @llvm.riscv.vwadd.w.nxv4i64.nxv4i32.nxv4i32(<vscale x 4 x i64> poison, <vscale x 4 x i64> %a, <vscale x 4 x i32> %b, iXLen -1)
+  %2 = call <vscale x 4 x i64> @llvm.riscv.vadd.nxv4i64.nxv4i64(<vscale x 4 x i64> poison, <vscale x 4 x i64> %1, <vscale x 4 x i64> %1, iXLen %vl)
+  ret <vscale x 4 x i64> %2
+}
+
+define <vscale x 4 x i64> @vwadd.wx(<vscale x 4 x i64> %a, i32 %b, iXLen %vl) {
+; NOVLOPT-LABEL: vwadd.wx:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vwadd.wx v8, v8, a0
+; NOVLOPT-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
+; NOVLOPT-NEXT:    vadd.vv v8, v8, v8
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vwadd.wx:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; VLOPT-NEXT:    vwadd.wx v8, v8, a0
+; VLOPT-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
+; VLOPT-NEXT:    vadd.vv v8, v8, v8
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x i64> @llvm.riscv.vwadd.w.nxv4i64.nxv4i32.i32(<vscale x 4 x i64> poison, <vscale x 4 x i64> %a, i32 %b, iXLen -1)
+  %2 = call <vscale x 4 x i64> @llvm.riscv.vadd.nxv4i64.nxv4i64(<vscale x 4 x i64> poison, <vscale x 4 x i64> %1, <vscale x 4 x i64> %1, iXLen %vl)
+  ret <vscale x 4 x i64> %2
+}
+
+define <vscale x 4 x i64> @vwsub.wv(<vscale x 4 x i64> %a, <vscale x 4 x i32> %b, iXLen %vl) {
+; NOVLOPT-LABEL: vwsub.wv:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vwsub.wv v8, v8, v12
+; NOVLOPT-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; NOVLOPT-NEXT:    vadd.vv v8, v8, v8
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vwsub.wv:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT:    vwsub.wv v8, v8, v12
+; VLOPT-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
+; VLOPT-NEXT:    vadd.vv v8, v8, v8
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x i64> @llvm.riscv.vwsub.w.nxv4i64.nxv4i32.nxv4i32(<vscale x 4 x i64> poison, <vscale x 4 x i64> %a, <vscale x 4 x i32> %b, iXLen -1)
+  %2 = call <vscale x 4 x i64> @llvm.riscv.vadd.nxv4i64.nxv4i64(<vscale x 4 x i64> poison, <vscale x 4 x i64> %1, <vscale x 4 x i64> %1, iXLen %vl)
+  ret <vscale x 4 x i64> %2
+}
+
+define <vscale x 4 x i64> @vwsub.wx(<vscale x 4 x i64> %a, i32 %b, iXLen %vl) {
+; NOVLOPT-LABEL: vwsub.wx:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vwsub.wx v8, v8, a0
+; NOVLOPT-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
+; NOVLOPT-NEXT:    vadd.vv v8, v8, v8
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vwsub.wx:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; VLOPT-NEXT:    vwsub.wx v8, v8, a0
+; VLOPT-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
+; VLOPT-NEXT:    vadd.vv v8, v8, v8
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x i64> @llvm.riscv.vwsub.w.nxv4i64.nxv4i32.i32(<vscale x 4 x i64> poison, <vscale x 4 x i64> %a, i32 %b, iXLen -1)
+  %2 = call <vscale x 4 x i64> @llvm.riscv.vadd.nxv4i64.nxv4i64(<vscale x 4 x i64> poison, <vscale x 4 x i64> %1, <vscale x 4 x i64> %1, iXLen %vl)
+  ret <vscale x 4 x i64> %2
+}
+
 define <vscale x 4 x i32> @vsext.vf2(<vscale x 4 x i16> %a, <vscale x 4 x i32> %b, iXLen %vl) {
 ; NOVLOPT-LABEL: vsext.vf2:
 ; NOVLOPT:       # %bb.0:
@@ -246,26 +602,23 @@ define <vscale x 4 x i64> @vzext.vf8(<vscale x 4 x i8> %a, <vscale x 4 x i64> %b
   ret <vscale x 4 x i64> %2
 }
 
-; FIXME: Add a test for vmv.v.i
-; FIXME: Add a test for vmv.v.v
-
-define <vscale x 4 x i32> @vmv.v.x(<vscale x 4 x i32> %a, i32 %x, iXLen %vl) {
-; NOVLOPT-LABEL: vmv.v.x:
+define <vscale x 4 x i32> @vsll_vi(<vscale x 4 x i32> %a, iXLen %vl) {
+; NOVLOPT-LABEL: vsll_vi:
 ; NOVLOPT:       # %bb.0:
-; NOVLOPT-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
-; NOVLOPT-NEXT:    vmv.v.x v10, a0
-; NOVLOPT-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; NOVLOPT-NEXT:    vadd.vv v8, v10, v8
+; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vsll.vi v10, v8, 5
+; NOVLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vsll.vv v8, v10, v8
 ; NOVLOPT-NEXT:    ret
 ;
-; VLOPT-LABEL: vmv.v.x:
+; VLOPT-LABEL: vsll_vi:
 ; VLOPT:       # %bb.0:
-; VLOPT-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; VLOPT-NEXT:    vmv.v.x v10, a0
-; VLOPT-NEXT:    vadd.vv v8, v10, v8
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT:    vsll.vi v10, v8, 5
+; VLOPT-NEXT:    vsll.vv v8, v10, v8
 ; VLOPT-NEXT:    ret
-  %1 = call <vscale x 4 x i32> @llvm.riscv.vmv.v.x.nxv4i32(<vscale x 4 x i32> poison, i32 %x, iXLen -1)
-  %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl)
+  %1 = call <vscale x 4 x i32> @llvm.riscv.vsll.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, iXLen 5, iXLen -1)
+  %2 = call <vscale x 4 x i32> @llvm.riscv.vsll.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl)
   ret <vscale x 4 x i32> %2
 }
 
@@ -289,46 +642,166 @@ define <vscale x 4 x i16> @vnsrl_wi(<vscale x 4 x i32> %a, <vscale x 4 x i16> %b
   ret <vscale x 4 x i16> %2
 }
 
-define <vscale x 4 x i64> @vwadd.vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) {
-; NOVLOPT-LABEL: vwadd.vv:
+
+
+define <vscale x 4 x i32> @vmul_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) {
+; NOVLOPT-LABEL: vmul_vv:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
-; NOVLOPT-NEXT:    vwadd.vv v12, v8, v10
-; NOVLOPT-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; NOVLOPT-NEXT:    vadd.vv v8, v12, v12
+; NOVLOPT-NEXT:    vmul.vv v8, v8, v10
+; NOVLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vmul.vv v8, v8, v10
 ; NOVLOPT-NEXT:    ret
 ;
-; VLOPT-LABEL: vwadd.vv:
+; VLOPT-LABEL: vmul_vv:
 ; VLOPT:       # %bb.0:
 ; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; VLOPT-NEXT:    vwadd.vv v12, v8, v10
-; VLOPT-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; VLOPT-NEXT:    vadd.vv v8, v12, v12
+; VLOPT-NEXT:    vmul.vv v8, v8, v10
+; VLOPT-NEXT:    vmul.vv v8, v8, v10
 ; VLOPT-NEXT:    ret
-  %1 = call <vscale x 4 x i64> @llvm.riscv.vwadd.nxv4i64.nxv4i32.nxv4i32(<vscale x 4 x i64> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen -1)
-  %2 = call <vscale x 4 x i64> @llvm.riscv.vadd.nxv4i64.nxv4i64(<vscale x 4 x i64> poison, <vscale x 4 x i64> %1, <vscale x 4 x i64> %1, iXLen %vl)
-  ret <vscale x 4 x i64> %2
+  %1 = call <vscale x 4 x i32> @llvm.riscv.vmul.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen -1)
+  %2 = call <vscale x 4 x i32> @llvm.riscv.vmul.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %b, iXLen %vl)
+  ret <vscale x 4 x i32> %2
 }
 
-define <vscale x 4 x i64> @vwaddu.vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) {
-; NOVLOPT-LABEL: vwaddu.vv:
+define <vscale x 4 x i32> @vmul_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) {
+; NOVLOPT-LABEL: vmul_vx:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vmul.vx v10, v8, a0
+; NOVLOPT-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vmul.vv v8, v10, v8
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vmul_vx:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; VLOPT-NEXT:    vmul.vx v10, v8, a0
+; VLOPT-NEXT:    vmul.vv v8, v10, v8
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x i32> @llvm.riscv.vmul.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, i32 %b, iXLen -1)
+  %2 = call <vscale x 4 x i32> @llvm.riscv.vmul.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl)
+  ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 4 x i32> @vmulh_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) {
+; NOVLOPT-LABEL: vmulh_vv:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
-; NOVLOPT-NEXT:    vwaddu.vv v12, v8, v10
-; NOVLOPT-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; NOVLOPT-NEXT:    vadd.vv v8, v12, v12
+; NOVLOPT-NEXT:    vmulh.vv v8, v8, v10
+; NOVLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vmul.vv v8, v8, v10
 ; NOVLOPT-NEXT:    ret
 ;
-; VLOPT-LABEL: vwaddu.vv:
+; VLOPT-LABEL: vmulh_vv:
 ; VLOPT:       # %bb.0:
 ; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; VLOPT-NEXT:    vwaddu.vv v12, v8, v10
-; VLOPT-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; VLOPT-NEXT:    vadd.vv v8, v12, v12
+; VLOPT-NEXT:    vmulh.vv v8, v8, v10
+; VLOPT-NEXT:    vmul.vv v8, v8, v10
 ; VLOPT-NEXT:    ret
-  %1 = call <vscale x 4 x i64> @llvm.riscv.vwaddu.nxv4i64.nxv4i32.nxv4i32(<vscale x 4 x i64> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen -1)
-  %2 = call <vscale x 4 x i64> @llvm.riscv.vadd.nxv4i64.nxv4i64(<vscale x 4 x i64> poison, <vscale x 4 x i64> %1, <vscale x 4 x i64> %1, iXLen %vl)
-  ret <vscale x 4 x i64> %2
+  %1 = call <vscale x 4 x i32> @llvm.riscv.vmulh.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen -1)
+  %2 = call <vscale x 4 x i32> @llvm.riscv.vmul.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %b, iXLen %vl)
+  ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 4 x i32> @vmulh_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) {
+; NOVLOPT-LABEL: vmulh_vx:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vmulh.vx v10, v8, a0
+; NOVLOPT-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vmul.vv v8, v10, v8
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vmulh_vx:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; VLOPT-NEXT:    vmulh.vx v10, v8, a0
+; VLOPT-NEXT:    vmul.vv v8, v10, v8
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x i32> @llvm.riscv.vmulh.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, i32 %b, iXLen -1)
+  %2 = call <vscale x 4 x i32> @llvm.riscv.vmul.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl)
+  ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 4 x i32> @vmulhu_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) {
+; NOVLOPT-LABEL: vmulhu_vv:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vmulhu.vv v8, v8, v10
+; NOVLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vmul.vv v8, v8, v10
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vmulhu_vv:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT:    vmulhu.vv v8, v8, v10
+; VLOPT-NEXT:    vmul.vv v8, v8, v10
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x i32> @llvm.riscv.vmulhu.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen -1)
+  %2 = call <vscale x 4 x i32> @llvm.riscv.vmul.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %b, iXLen %vl)
+  ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 4 x i32> @vmulhu_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) {
+; NOVLOPT-LABEL: vmulhu_vx:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vmulhu.vx v10, v8, a0
+; NOVLOPT-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vmul.vv v8, v10, v8
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vmulhu_vx:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; VLOPT-NEXT:    vmulhu.vx v10, v8, a0
+; VLOPT-NEXT:    vmul.vv v8, v10, v8
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x i32> @llvm.riscv.vmulhu.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, i32 %b, iXLen -1)
+  %2 = call <vscale x 4 x i32> @llvm.riscv.vmul.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl)
+  ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 4 x i32> @vmulhsu_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) {
+; NOVLOPT-LABEL: vmulhsu_vv:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vmulhsu.vv v8, v8, v10
+; NOVLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vmul.vv v8, v8, v10
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vmulhsu_vv:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT:    vmulhsu.vv v8, v8, v10
+; VLOPT-NEXT:    vmul.vv v8, v8, v10
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x i32> @llvm.riscv.vmulhsu.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen -1)
+  %2 = call <vscale x 4 x i32> @llvm.riscv.vmul.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %b, iXLen %vl)
+  ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 4 x i32> @vmulhsu_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) {
+; NOVLOPT-LABEL: vmulhsu_vx:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vmulhsu.vx v10, v8, a0
+; NOVLOPT-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vmul.vv v8, v10, v8
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vmulhsu_vx:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; VLOPT-NEXT:    vmulhsu.vx v10, v8, a0
+; VLOPT-NEXT:    vmul.vv v8, v10, v8
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x i32> @llvm.riscv.vmulhsu.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, i32 %b, iXLen -1)
+  %2 = call <vscale x 4 x i32> @llvm.riscv.vmul.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl)
+  ret <vscale x 4 x i32> %2
 }
 
 define <vscale x 4 x i32> @vwmacc_vx(<vscale x 4 x i16> %a, i16 %b, iXLen %vl) {
@@ -373,6 +846,46 @@ define <vscale x 4 x i32> @vwmaccu_vx(<vscale x 4 x i16> %a, i16 %b, iXLen %vl)
   ret <vscale x 4 x i32> %2
 }
 
+define <vscale x 4 x i32> @vmv.v.i(<vscale x 4 x i32> %a, i32 %x, iXLen %vl) {
+; NOVLOPT-LABEL: vmv.v.i:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vmv.v.i v10, 5
+; NOVLOPT-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vadd.vv v8, v10, v8
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vmv.v.i:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; VLOPT-NEXT:    vmv.v.i v10, 5
+; VLOPT-NEXT:    vadd.vv v8, v10, v8
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x i32> @llvm.riscv.vmv.v.x.nxv4i32(<vscale x 4 x i32> poison, i32 5, iXLen -1)
+  %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl)
+  ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 4 x i32> @vmv.v.x(<vscale x 4 x i32> %a, i32 %x, iXLen %vl) {
+; NOVLOPT-LABEL: vmv.v.x:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vmv.v.x v10, a0
+; NOVLOPT-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vadd.vv v8, v10, v8
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vmv.v.x:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; VLOPT-NEXT:    vmv.v.x v10, a0
+; VLOPT-NEXT:    vadd.vv v8, v10, v8
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x i32> @llvm.riscv.vmv.v.x.nxv4i32(<vscale x 4 x i32> poison, i32 %x, iXLen -1)
+  %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl)
+  ret <vscale x 4 x i32> %2
+}
+
 define <vscale x 4 x i32> @vwsll.vi(<vscale x 4 x i16> %a, <vscale x 4 x i32> %b, iXLen %vl) {
 ; NOVLOPT-LABEL: vwsll.vi:
 ; NOVLOPT:       # %bb.0:

>From 2e3027a4c86e0dd42cdd98da7e5122bcdb708e08 Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Thu, 26 Sep 2024 07:32:44 -0700
Subject: [PATCH 12/17] fixup! improve test names

---
 llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll | 52 ++++++++++----------
 1 file changed, 26 insertions(+), 26 deletions(-)

diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
index 3f3c76861e9aa2..afeeb095b22769 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
@@ -4,7 +4,7 @@
 ; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvbb -riscv-enable-vl-optimizer -verify-machineinstrs | FileCheck %s --check-prefixes=VLOPT
 ; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvbb -riscv-enable-vl-optimizer -verify-machineinstrs | FileCheck %s --check-prefixes=VLOPT
 
-; The purpose of this test is to check the behavior of specific instructions as it relates to the VL optimizer
+; The purpose of this file is to check the behavior of specific instructions as it relates to the VL optimizer
 
 define <vscale x 4 x i32> @vadd_vi(<vscale x 4 x i32> %a, iXLen %vl) {
 ; NOVLOPT-LABEL: vadd_vi:
@@ -146,7 +146,7 @@ define <vscale x 4 x i32> @vrsub_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) {
   ret <vscale x 4 x i32> %2
 }
 
-define <vscale x 4 x i64> @vwaddu.vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) {
+define <vscale x 4 x i64> @vwaddu_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) {
 ; NOVLOPT-LABEL: vwaddu.vv:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
@@ -167,7 +167,7 @@ define <vscale x 4 x i64> @vwaddu.vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %
   ret <vscale x 4 x i64> %2
 }
 
-define <vscale x 4 x i64> @vwaddu.vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) {
+define <vscale x 4 x i64> @vwaddu_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) {
 ; NOVLOPT-LABEL: vwaddu.vx:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
@@ -188,7 +188,7 @@ define <vscale x 4 x i64> @vwaddu.vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) {
   ret <vscale x 4 x i64> %2
 }
 
-define <vscale x 4 x i64> @vwsubu.vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) {
+define <vscale x 4 x i64> @vwsubu_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) {
 ; NOVLOPT-LABEL: vwsubu.vv:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
@@ -209,7 +209,7 @@ define <vscale x 4 x i64> @vwsubu.vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %
   ret <vscale x 4 x i64> %2
 }
 
-define <vscale x 4 x i64> @vwsubu.vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) {
+define <vscale x 4 x i64> @vwsubu_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) {
 ; NOVLOPT-LABEL: vwsubu.vx:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
@@ -230,7 +230,7 @@ define <vscale x 4 x i64> @vwsubu.vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) {
   ret <vscale x 4 x i64> %2
 }
 
-define <vscale x 4 x i64> @vwadd.vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) {
+define <vscale x 4 x i64> @vwadd_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) {
 ; NOVLOPT-LABEL: vwadd.vv:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
@@ -251,7 +251,7 @@ define <vscale x 4 x i64> @vwadd.vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b
   ret <vscale x 4 x i64> %2
 }
 
-define <vscale x 4 x i64> @vwadd.vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) {
+define <vscale x 4 x i64> @vwadd_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) {
 ; NOVLOPT-LABEL: vwadd.vx:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
@@ -272,7 +272,7 @@ define <vscale x 4 x i64> @vwadd.vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) {
   ret <vscale x 4 x i64> %2
 }
 
-define <vscale x 4 x i64> @vwsub.vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) {
+define <vscale x 4 x i64> @vwsub_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) {
 ; NOVLOPT-LABEL: vwsub.vv:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
@@ -293,7 +293,7 @@ define <vscale x 4 x i64> @vwsub.vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b
   ret <vscale x 4 x i64> %2
 }
 
-define <vscale x 4 x i64> @vwsub.vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) {
+define <vscale x 4 x i64> @vwsub_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) {
 ; NOVLOPT-LABEL: vwsub.vx:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
@@ -314,7 +314,7 @@ define <vscale x 4 x i64> @vwsub.vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) {
   ret <vscale x 4 x i64> %2
 }
 
-define <vscale x 4 x i64> @vwaddu.wv(<vscale x 4 x i64> %a, <vscale x 4 x i32> %b, iXLen %vl) {
+define <vscale x 4 x i64> @vwaddu_wv(<vscale x 4 x i64> %a, <vscale x 4 x i32> %b, iXLen %vl) {
 ; NOVLOPT-LABEL: vwaddu.wv:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
@@ -335,7 +335,7 @@ define <vscale x 4 x i64> @vwaddu.wv(<vscale x 4 x i64> %a, <vscale x 4 x i32> %
   ret <vscale x 4 x i64> %2
 }
 
-define <vscale x 4 x i64> @vwaddu.wx(<vscale x 4 x i64> %a, i32 %b, iXLen %vl) {
+define <vscale x 4 x i64> @vwaddu_wx(<vscale x 4 x i64> %a, i32 %b, iXLen %vl) {
 ; NOVLOPT-LABEL: vwaddu.wx:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
@@ -356,7 +356,7 @@ define <vscale x 4 x i64> @vwaddu.wx(<vscale x 4 x i64> %a, i32 %b, iXLen %vl) {
   ret <vscale x 4 x i64> %2
 }
 
-define <vscale x 4 x i64> @vwsubu.wv(<vscale x 4 x i64> %a, <vscale x 4 x i32> %b, iXLen %vl) {
+define <vscale x 4 x i64> @vwsubu_wv(<vscale x 4 x i64> %a, <vscale x 4 x i32> %b, iXLen %vl) {
 ; NOVLOPT-LABEL: vwsubu.wv:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
@@ -377,7 +377,7 @@ define <vscale x 4 x i64> @vwsubu.wv(<vscale x 4 x i64> %a, <vscale x 4 x i32> %
   ret <vscale x 4 x i64> %2
 }
 
-define <vscale x 4 x i64> @vwsubu.wx(<vscale x 4 x i64> %a, i32 %b, iXLen %vl) {
+define <vscale x 4 x i64> @vwsubu_wx(<vscale x 4 x i64> %a, i32 %b, iXLen %vl) {
 ; NOVLOPT-LABEL: vwsubu.wx:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
@@ -398,7 +398,7 @@ define <vscale x 4 x i64> @vwsubu.wx(<vscale x 4 x i64> %a, i32 %b, iXLen %vl) {
   ret <vscale x 4 x i64> %2
 }
 
-define <vscale x 4 x i64> @vwadd.wv(<vscale x 4 x i64> %a, <vscale x 4 x i32> %b, iXLen %vl) {
+define <vscale x 4 x i64> @vwadd_wv(<vscale x 4 x i64> %a, <vscale x 4 x i32> %b, iXLen %vl) {
 ; NOVLOPT-LABEL: vwadd.wv:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
@@ -419,7 +419,7 @@ define <vscale x 4 x i64> @vwadd.wv(<vscale x 4 x i64> %a, <vscale x 4 x i32> %b
   ret <vscale x 4 x i64> %2
 }
 
-define <vscale x 4 x i64> @vwadd.wx(<vscale x 4 x i64> %a, i32 %b, iXLen %vl) {
+define <vscale x 4 x i64> @vwadd_wx(<vscale x 4 x i64> %a, i32 %b, iXLen %vl) {
 ; NOVLOPT-LABEL: vwadd.wx:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
@@ -440,7 +440,7 @@ define <vscale x 4 x i64> @vwadd.wx(<vscale x 4 x i64> %a, i32 %b, iXLen %vl) {
   ret <vscale x 4 x i64> %2
 }
 
-define <vscale x 4 x i64> @vwsub.wv(<vscale x 4 x i64> %a, <vscale x 4 x i32> %b, iXLen %vl) {
+define <vscale x 4 x i64> @vwsub_wv(<vscale x 4 x i64> %a, <vscale x 4 x i32> %b, iXLen %vl) {
 ; NOVLOPT-LABEL: vwsub.wv:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
@@ -461,7 +461,7 @@ define <vscale x 4 x i64> @vwsub.wv(<vscale x 4 x i64> %a, <vscale x 4 x i32> %b
   ret <vscale x 4 x i64> %2
 }
 
-define <vscale x 4 x i64> @vwsub.wx(<vscale x 4 x i64> %a, i32 %b, iXLen %vl) {
+define <vscale x 4 x i64> @vwsub_wx(<vscale x 4 x i64> %a, i32 %b, iXLen %vl) {
 ; NOVLOPT-LABEL: vwsub.wx:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
@@ -482,7 +482,7 @@ define <vscale x 4 x i64> @vwsub.wx(<vscale x 4 x i64> %a, i32 %b, iXLen %vl) {
   ret <vscale x 4 x i64> %2
 }
 
-define <vscale x 4 x i32> @vsext.vf2(<vscale x 4 x i16> %a, <vscale x 4 x i32> %b, iXLen %vl) {
+define <vscale x 4 x i32> @vsext_vf2(<vscale x 4 x i16> %a, <vscale x 4 x i32> %b, iXLen %vl) {
 ; NOVLOPT-LABEL: vsext.vf2:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
@@ -502,7 +502,7 @@ define <vscale x 4 x i32> @vsext.vf2(<vscale x 4 x i16> %a, <vscale x 4 x i32> %
   ret <vscale x 4 x i32> %2
 }
 
-define <vscale x 4 x i32> @vsext.vf4(<vscale x 4 x i8> %a, <vscale x 4 x i32> %b, iXLen %vl) {
+define <vscale x 4 x i32> @vsext_vf4(<vscale x 4 x i8> %a, <vscale x 4 x i32> %b, iXLen %vl) {
 ; NOVLOPT-LABEL: vsext.vf4:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
@@ -522,7 +522,7 @@ define <vscale x 4 x i32> @vsext.vf4(<vscale x 4 x i8> %a, <vscale x 4 x i32> %b
   ret <vscale x 4 x i32> %2
 }
 
-define <vscale x 4 x i64> @vsext.vf8(<vscale x 4 x i8> %a, <vscale x 4 x i64> %b, iXLen %vl) {
+define <vscale x 4 x i64> @vsext_vf8(<vscale x 4 x i8> %a, <vscale x 4 x i64> %b, iXLen %vl) {
 ; NOVLOPT-LABEL: vsext.vf8:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetvli a1, zero, e64, m4, ta, ma
@@ -542,7 +542,7 @@ define <vscale x 4 x i64> @vsext.vf8(<vscale x 4 x i8> %a, <vscale x 4 x i64> %b
   ret <vscale x 4 x i64> %2
 }
 
-define <vscale x 4 x i32> @vzext.vf2(<vscale x 4 x i16> %a, <vscale x 4 x i32> %b, iXLen %vl) {
+define <vscale x 4 x i32> @vzext_vf2(<vscale x 4 x i16> %a, <vscale x 4 x i32> %b, iXLen %vl) {
 ; NOVLOPT-LABEL: vzext.vf2:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
@@ -562,7 +562,7 @@ define <vscale x 4 x i32> @vzext.vf2(<vscale x 4 x i16> %a, <vscale x 4 x i32> %
   ret <vscale x 4 x i32> %2
 }
 
-define <vscale x 4 x i32> @vzext.vf4(<vscale x 4 x i8> %a, <vscale x 4 x i32> %b, iXLen %vl) {
+define <vscale x 4 x i32> @vzext_vf4(<vscale x 4 x i8> %a, <vscale x 4 x i32> %b, iXLen %vl) {
 ; NOVLOPT-LABEL: vzext.vf4:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
@@ -582,7 +582,7 @@ define <vscale x 4 x i32> @vzext.vf4(<vscale x 4 x i8> %a, <vscale x 4 x i32> %b
   ret <vscale x 4 x i32> %2
 }
 
-define <vscale x 4 x i64> @vzext.vf8(<vscale x 4 x i8> %a, <vscale x 4 x i64> %b, iXLen %vl) {
+define <vscale x 4 x i64> @vzext_vf8(<vscale x 4 x i8> %a, <vscale x 4 x i64> %b, iXLen %vl) {
 ; NOVLOPT-LABEL: vzext.vf8:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetvli a1, zero, e64, m4, ta, ma
@@ -846,7 +846,7 @@ define <vscale x 4 x i32> @vwmaccu_vx(<vscale x 4 x i16> %a, i16 %b, iXLen %vl)
   ret <vscale x 4 x i32> %2
 }
 
-define <vscale x 4 x i32> @vmv.v.i(<vscale x 4 x i32> %a, i32 %x, iXLen %vl) {
+define <vscale x 4 x i32> @vmv_v_i(<vscale x 4 x i32> %a, i32 %x, iXLen %vl) {
 ; NOVLOPT-LABEL: vmv.v.i:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
@@ -866,7 +866,7 @@ define <vscale x 4 x i32> @vmv.v.i(<vscale x 4 x i32> %a, i32 %x, iXLen %vl) {
   ret <vscale x 4 x i32> %2
 }
 
-define <vscale x 4 x i32> @vmv.v.x(<vscale x 4 x i32> %a, i32 %x, iXLen %vl) {
+define <vscale x 4 x i32> @vmv_v_x(<vscale x 4 x i32> %a, i32 %x, iXLen %vl) {
 ; NOVLOPT-LABEL: vmv.v.x:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
@@ -886,7 +886,7 @@ define <vscale x 4 x i32> @vmv.v.x(<vscale x 4 x i32> %a, i32 %x, iXLen %vl) {
   ret <vscale x 4 x i32> %2
 }
 
-define <vscale x 4 x i32> @vwsll.vi(<vscale x 4 x i16> %a, <vscale x 4 x i32> %b, iXLen %vl) {
+define <vscale x 4 x i32> @vwsll_vi(<vscale x 4 x i16> %a, <vscale x 4 x i32> %b, iXLen %vl) {
 ; NOVLOPT-LABEL: vwsll.vi:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetvli a1, zero, e16, m1, ta, ma

>From 8dc125f683563d03e2a832a3525446e3431ba7a2 Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Mon, 30 Sep 2024 08:20:36 -0700
Subject: [PATCH 13/17] fixup! pair down the number of instructions supported
 in getOperandInfo

---
 llvm/lib/Target/RISCV/RISCVTargetMachine.cpp  |   7 +-
 llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp    | 589 +-----------------
 llvm/test/CodeGen/RISCV/O3-pipeline.ll        |   3 +-
 .../CodeGen/RISCV/rvv/narrow-shift-extend.ll  |  54 +-
 llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll   |  54 +-
 llvm/test/CodeGen/RISCV/rvv/vdiv-vp.ll        |   2 +
 llvm/test/CodeGen/RISCV/rvv/vdivu-vp.ll       |   2 +
 llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll        | 144 +++--
 llvm/test/CodeGen/RISCV/rvv/vfwmacc-vp.ll     |  42 +-
 llvm/test/CodeGen/RISCV/rvv/vfwmsac-vp.ll     |  36 +-
 llvm/test/CodeGen/RISCV/rvv/vfwnmacc-vp.ll    |  45 +-
 llvm/test/CodeGen/RISCV/rvv/vfwnmsac-vp.ll    |  45 +-
 llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll  | 100 +--
 llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.ll |  50 +-
 llvm/test/CodeGen/RISCV/rvv/vmax-vp.ll        |   2 +
 llvm/test/CodeGen/RISCV/rvv/vmaxu-vp.ll       |   2 +
 llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll        |   2 +
 llvm/test/CodeGen/RISCV/rvv/vminu-vp.ll       |   2 +
 llvm/test/CodeGen/RISCV/rvv/vmul-vp.ll        |   7 +-
 .../test/CodeGen/RISCV/rvv/vpgather-sdnode.ll | 408 ++++++------
 .../CodeGen/RISCV/rvv/vpscatter-sdnode.ll     | 434 +++++++------
 llvm/test/CodeGen/RISCV/rvv/vrem-vp.ll        |   2 +
 llvm/test/CodeGen/RISCV/rvv/vremu-vp.ll       |   2 +
 .../RISCV/rvv/vsetvli-insert-crossbb.ll       |   3 +
 llvm/test/CodeGen/RISCV/rvv/vshl-vp.ll        |   3 +-
 llvm/test/CodeGen/RISCV/rvv/vsra-sdnode.ll    |   4 +-
 llvm/test/CodeGen/RISCV/rvv/vsra-vp.ll        |   2 +
 llvm/test/CodeGen/RISCV/rvv/vsrl-vp.ll        |   2 +
 llvm/test/CodeGen/RISCV/rvv/vssub-vp.ll       |   3 +-
 llvm/test/CodeGen/RISCV/rvv/vssubu-vp.ll      |   3 +-
 llvm/test/CodeGen/RISCV/rvv/vwsll-vp.ll       |  90 ++-
 31 files changed, 927 insertions(+), 1217 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
index 45aa113de30958..d8e068e842779f 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -103,9 +103,10 @@ static cl::opt<bool> EnableVSETVLIAfterRVVRegAlloc(
     cl::desc("Insert vsetvls after vector register allocation"),
     cl::init(true));
 
-static cl::opt<bool> EnableVLOptimizer("riscv-enable-vl-optimizer",
-                                       cl::desc("Enable the VL Optimizer pass"),
-                                       cl::init(true), cl::Hidden);
+static cl::opt<bool>
+    EnableVLOptimizer("riscv-enable-vl-optimizer",
+                      cl::desc("Enable the RISC-V VL Optimizer pass"),
+                      cl::init(false), cl::Hidden);
 
 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() {
   RegisterTargetMachine<RISCVTargetMachine> X(getTheRISCV32Target());
diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
index 60b6e5eba6da1c..48308b2f822637 100644
--- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
@@ -104,11 +104,7 @@ struct OperandInfo {
   OperandInfo(unsigned Log2EEW)
       : S(State::Known), EMUL(std::nullopt), Log2EEW(Log2EEW), IsScalar(true) {}
 
-  OperandInfo(State S) : S(S) {
-    assert(S != State::Known &&
-           "This constructor may only be used to construct "
-           "an Unknown OperandInfo");
-  }
+  OperandInfo() : S(State::Unknown) {}
 
   bool isUnknown() const { return S == State::Unknown; }
   bool isKnown() const { return S == State::Known; }
@@ -172,28 +168,6 @@ static RISCVII::VLMUL twoTimesVLMUL(RISCVII::VLMUL VLMul) {
   }
 }
 
-/// Return the RISCVII::VLMUL that is VLMul / 2.
-/// Precondition: VLMul is not LMUL_RESERVED or LMUL_MF8.
-static RISCVII::VLMUL halfVLMUL(RISCVII::VLMUL VLMul) {
-  switch (VLMul) {
-  case RISCVII::VLMUL::LMUL_F4:
-    return RISCVII::VLMUL::LMUL_F8;
-  case RISCVII::VLMUL::LMUL_F2:
-    return RISCVII::VLMUL::LMUL_F4;
-  case RISCVII::VLMUL::LMUL_1:
-    return RISCVII::VLMUL::LMUL_F2;
-  case RISCVII::VLMUL::LMUL_2:
-    return RISCVII::VLMUL::LMUL_1;
-  case RISCVII::VLMUL::LMUL_4:
-    return RISCVII::VLMUL::LMUL_2;
-  case RISCVII::VLMUL::LMUL_8:
-    return RISCVII::VLMUL::LMUL_4;
-  case RISCVII::VLMUL::LMUL_F8:
-  default:
-    llvm_unreachable("Could not divide VLMul by 2");
-  }
-}
-
 /// Return EMUL = (EEW / SEW) * LMUL where EEW comes from Log2EEW and LMUL and
 /// SEW are from the TSFlags of MI.
 static std::pair<unsigned, bool>
@@ -227,33 +201,6 @@ static bool isOpN(const MachineOperand &MO, unsigned OpN) {
   return MO.getOperandNo() == OpN;
 }
 
-/// An index segment load or store operand has the form v.*seg<nf>ei<eeew>.v.
-/// Data has EEW=SEW, EMUL=LMUL. Index has EEW=<eew>, EMUL=(EEW/SEW)*LMUL. LMUL
-/// and SEW comes from TSFlags of MI.
-static OperandInfo getIndexSegmentLoadStoreOperandInfo(unsigned Log2EEW,
-                                                       const MachineInstr &MI,
-                                                       const MachineOperand &MO,
-                                                       bool IsLoad) {
-  // Operand 0 is data register
-  // Data vector register group has EEW=SEW, EMUL=LMUL.
-  if (MO.getOperandNo() == 0) {
-    RISCVII::VLMUL MIVLMul = RISCVII::getLMul(MI.getDesc().TSFlags);
-    unsigned MILog2SEW =
-        MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm();
-    return OperandInfo(MIVLMul, MILog2SEW);
-  }
-
-  // Operand 2 is index vector register
-  // v.*seg<nf>ei<eeew>.v
-  // Index vector register group has EEW=<eew>, EMUL=(EEW/SEW)*LMUL.
-  if (isOpN(MO, 2))
-    return OperandInfo(RISCVVType::getEMULEqualsEEWDivSEWTimesLMUL(Log2EEW, MI),
-                       Log2EEW);
-
-  llvm_unreachable("Could not get OperandInfo for non-vector register of an "
-                   "indexed segment load or store instruction");
-}
-
 /// Dest has EEW=SEW and EMUL=LMUL. Source EEW=SEW/Factor (i.e. F2 => EEW/2).
 /// Source has EMUL=(EEW/SEW)*LMUL. LMUL and SEW comes from TSFlags of MI.
 static OperandInfo getIntegerExtensionOperandInfo(unsigned Factor,
@@ -307,12 +254,10 @@ static OperandInfo getOperandInfo(const MachineInstr &MI,
   // since they must preserve the entire register content.
   if (HasPassthru && MO.getOperandNo() == MI.getNumExplicitDefs() &&
       (MO.getReg() != RISCV::NoRegister))
-    return OperandInfo(OperandInfo::State::Unknown);
+    return {};
 
   bool IsMODef = MO.getOperandNo() == 0;
   bool IsOp1 = isOpN(MO, 1);
-  bool IsOp2 = isOpN(MO, 2);
-  bool IsOp3 = isOpN(MO, 3);
 
   // All mask operands have EEW=1, EMUL=(EEW/SEW)*LMUL
   if (isMaskOperand(MI, MO, MRI))
@@ -330,69 +275,6 @@ static OperandInfo getOperandInfo(const MachineInstr &MI,
     llvm_unreachable("Configuration setting instructions do not read or write "
                      "vector registers");
 
-  // 7. Vector Loads and Stores
-  // 7.4. Vector Unit-Stride Instructions
-  // 7.5. Vector Strided Instructions
-  // 7.7. Unit-stride Fault-Only-First Loads
-  /// Dest EEW encoded in the instruction and EMUL=(EEW/SEW)*LMUL
-  case RISCV::VLE8_V:
-  case RISCV::VSE8_V:
-  case RISCV::VLM_V:
-  case RISCV::VSM_V:
-  case RISCV::VLSE8_V:
-  case RISCV::VSSE8_V:
-  case RISCV::VLE8FF_V:
-    return OperandInfo(RISCVVType::getEMULEqualsEEWDivSEWTimesLMUL(3, MI), 3);
-  case RISCV::VLE16_V:
-  case RISCV::VSE16_V:
-  case RISCV::VLSE16_V:
-  case RISCV::VSSE16_V:
-  case RISCV::VLE16FF_V:
-    return OperandInfo(RISCVVType::getEMULEqualsEEWDivSEWTimesLMUL(4, MI), 4);
-  case RISCV::VLE32_V:
-  case RISCV::VSE32_V:
-  case RISCV::VLSE32_V:
-  case RISCV::VSSE32_V:
-  case RISCV::VLE32FF_V:
-    return OperandInfo(RISCVVType::getEMULEqualsEEWDivSEWTimesLMUL(5, MI), 5);
-  case RISCV::VLE64_V:
-  case RISCV::VSE64_V:
-  case RISCV::VLSE64_V:
-  case RISCV::VSSE64_V:
-  case RISCV::VLE64FF_V:
-    return OperandInfo(RISCVVType::getEMULEqualsEEWDivSEWTimesLMUL(6, MI), 6);
-
-  // 7.6. Vector Indexed Instructions
-  // Data EEW=SEW, EMUL=LMUL. Index EEW=<eew> and EMUL=(EEW/SEW)*LMUL
-  case RISCV::VLUXEI8_V:
-  case RISCV::VLOXEI8_V:
-  case RISCV::VSUXEI8_V:
-  case RISCV::VSOXEI8_V:
-    if (MO.getOperandNo() == 0)
-      return OperandInfo(MIVLMul, MILog2SEW);
-    return OperandInfo(RISCVVType::getEMULEqualsEEWDivSEWTimesLMUL(3, MI), 3);
-  case RISCV::VLUXEI16_V:
-  case RISCV::VLOXEI16_V:
-  case RISCV::VSUXEI16_V:
-  case RISCV::VSOXEI16_V:
-    if (MO.getOperandNo() == 0)
-      return OperandInfo(MIVLMul, MILog2SEW);
-    return OperandInfo(RISCVVType::getEMULEqualsEEWDivSEWTimesLMUL(4, MI), 4);
-  case RISCV::VLUXEI32_V:
-  case RISCV::VLOXEI32_V:
-  case RISCV::VSUXEI32_V:
-  case RISCV::VSOXEI32_V:
-    if (MO.getOperandNo() == 0)
-      return OperandInfo(MIVLMul, MILog2SEW);
-    return OperandInfo(RISCVVType::getEMULEqualsEEWDivSEWTimesLMUL(5, MI), 5);
-  case RISCV::VLUXEI64_V:
-  case RISCV::VLOXEI64_V:
-  case RISCV::VSUXEI64_V:
-  case RISCV::VSOXEI64_V:
-    if (MO.getOperandNo() == 0)
-      return OperandInfo(MIVLMul, MILog2SEW);
-    return OperandInfo(RISCVVType::getEMULEqualsEEWDivSEWTimesLMUL(6, MI), 6);
-
   // 7.8. Vector Load/Store Segment Instructions
   // 7.8.1. Vector Unit-Stride Segment Loads and Stores
   // v.*seg<nf>e<eew>.*
@@ -548,128 +430,6 @@ static OperandInfo getOperandInfo(const MachineInstr &MI,
   case RISCV::VSSSEG8E64_V:
     return OperandInfo(MIVLMul, 6);
 
-  // 7.8.3. Vector Indexed Segment Loads and Stores
-  case RISCV::VLUXSEG2EI8_V:
-  case RISCV::VLUXSEG3EI8_V:
-  case RISCV::VLUXSEG4EI8_V:
-  case RISCV::VLUXSEG5EI8_V:
-  case RISCV::VLUXSEG6EI8_V:
-  case RISCV::VLUXSEG7EI8_V:
-  case RISCV::VLUXSEG8EI8_V:
-  case RISCV::VLOXSEG2EI8_V:
-  case RISCV::VLOXSEG3EI8_V:
-  case RISCV::VLOXSEG4EI8_V:
-  case RISCV::VLOXSEG5EI8_V:
-  case RISCV::VLOXSEG6EI8_V:
-  case RISCV::VLOXSEG7EI8_V:
-  case RISCV::VLOXSEG8EI8_V:
-    return getIndexSegmentLoadStoreOperandInfo(3, MI, MO, /* IsLoad */ true);
-  case RISCV::VSUXSEG2EI8_V:
-  case RISCV::VSUXSEG3EI8_V:
-  case RISCV::VSUXSEG4EI8_V:
-  case RISCV::VSUXSEG5EI8_V:
-  case RISCV::VSUXSEG6EI8_V:
-  case RISCV::VSUXSEG7EI8_V:
-  case RISCV::VSUXSEG8EI8_V:
-  case RISCV::VSOXSEG2EI8_V:
-  case RISCV::VSOXSEG3EI8_V:
-  case RISCV::VSOXSEG4EI8_V:
-  case RISCV::VSOXSEG5EI8_V:
-  case RISCV::VSOXSEG6EI8_V:
-  case RISCV::VSOXSEG7EI8_V:
-  case RISCV::VSOXSEG8EI8_V:
-    return getIndexSegmentLoadStoreOperandInfo(3, MI, MO, /* IsLoad */ false);
-  case RISCV::VLUXSEG2EI16_V:
-  case RISCV::VLUXSEG3EI16_V:
-  case RISCV::VLUXSEG4EI16_V:
-  case RISCV::VLUXSEG5EI16_V:
-  case RISCV::VLUXSEG6EI16_V:
-  case RISCV::VLUXSEG7EI16_V:
-  case RISCV::VLUXSEG8EI16_V:
-  case RISCV::VLOXSEG2EI16_V:
-  case RISCV::VLOXSEG3EI16_V:
-  case RISCV::VLOXSEG4EI16_V:
-  case RISCV::VLOXSEG5EI16_V:
-  case RISCV::VLOXSEG6EI16_V:
-  case RISCV::VLOXSEG7EI16_V:
-  case RISCV::VLOXSEG8EI16_V:
-    return getIndexSegmentLoadStoreOperandInfo(4, MI, MO, /* IsLoad */ true);
-  case RISCV::VSUXSEG2EI16_V:
-  case RISCV::VSUXSEG3EI16_V:
-  case RISCV::VSUXSEG4EI16_V:
-  case RISCV::VSUXSEG5EI16_V:
-  case RISCV::VSUXSEG6EI16_V:
-  case RISCV::VSUXSEG7EI16_V:
-  case RISCV::VSUXSEG8EI16_V:
-  case RISCV::VSOXSEG2EI16_V:
-  case RISCV::VSOXSEG3EI16_V:
-  case RISCV::VSOXSEG4EI16_V:
-  case RISCV::VSOXSEG5EI16_V:
-  case RISCV::VSOXSEG6EI16_V:
-  case RISCV::VSOXSEG7EI16_V:
-  case RISCV::VSOXSEG8EI16_V:
-    return getIndexSegmentLoadStoreOperandInfo(4, MI, MO, /* IsLoad */ false);
-  case RISCV::VLUXSEG2EI32_V:
-  case RISCV::VLUXSEG3EI32_V:
-  case RISCV::VLUXSEG4EI32_V:
-  case RISCV::VLUXSEG5EI32_V:
-  case RISCV::VLUXSEG6EI32_V:
-  case RISCV::VLUXSEG7EI32_V:
-  case RISCV::VLUXSEG8EI32_V:
-  case RISCV::VLOXSEG2EI32_V:
-  case RISCV::VLOXSEG3EI32_V:
-  case RISCV::VLOXSEG4EI32_V:
-  case RISCV::VLOXSEG5EI32_V:
-  case RISCV::VLOXSEG6EI32_V:
-  case RISCV::VLOXSEG7EI32_V:
-  case RISCV::VLOXSEG8EI32_V:
-    return getIndexSegmentLoadStoreOperandInfo(5, MI, MO, /* IsLoad */ true);
-  case RISCV::VSUXSEG2EI32_V:
-  case RISCV::VSUXSEG3EI32_V:
-  case RISCV::VSUXSEG4EI32_V:
-  case RISCV::VSUXSEG5EI32_V:
-  case RISCV::VSUXSEG6EI32_V:
-  case RISCV::VSUXSEG7EI32_V:
-  case RISCV::VSUXSEG8EI32_V:
-  case RISCV::VSOXSEG2EI32_V:
-  case RISCV::VSOXSEG3EI32_V:
-  case RISCV::VSOXSEG4EI32_V:
-  case RISCV::VSOXSEG5EI32_V:
-  case RISCV::VSOXSEG6EI32_V:
-  case RISCV::VSOXSEG7EI32_V:
-  case RISCV::VSOXSEG8EI32_V:
-    return getIndexSegmentLoadStoreOperandInfo(5, MI, MO, /* IsLoad */ false);
-  case RISCV::VLUXSEG2EI64_V:
-  case RISCV::VLUXSEG3EI64_V:
-  case RISCV::VLUXSEG4EI64_V:
-  case RISCV::VLUXSEG5EI64_V:
-  case RISCV::VLUXSEG6EI64_V:
-  case RISCV::VLUXSEG7EI64_V:
-  case RISCV::VLUXSEG8EI64_V:
-  case RISCV::VLOXSEG2EI64_V:
-  case RISCV::VLOXSEG3EI64_V:
-  case RISCV::VLOXSEG4EI64_V:
-  case RISCV::VLOXSEG5EI64_V:
-  case RISCV::VLOXSEG6EI64_V:
-  case RISCV::VLOXSEG7EI64_V:
-  case RISCV::VLOXSEG8EI64_V:
-    return getIndexSegmentLoadStoreOperandInfo(6, MI, MO, /* IsLoad */ true);
-  case RISCV::VSUXSEG2EI64_V:
-  case RISCV::VSUXSEG3EI64_V:
-  case RISCV::VSUXSEG4EI64_V:
-  case RISCV::VSUXSEG5EI64_V:
-  case RISCV::VSUXSEG6EI64_V:
-  case RISCV::VSUXSEG7EI64_V:
-  case RISCV::VSUXSEG8EI64_V:
-  case RISCV::VSOXSEG2EI64_V:
-  case RISCV::VSOXSEG3EI64_V:
-  case RISCV::VSOXSEG4EI64_V:
-  case RISCV::VSOXSEG5EI64_V:
-  case RISCV::VSOXSEG6EI64_V:
-  case RISCV::VSOXSEG7EI64_V:
-  case RISCV::VSOXSEG8EI64_V:
-    return getIndexSegmentLoadStoreOperandInfo(6, MI, MO, /* IsLoad */ false);
-
   // 7.9. Vector Load/Store Whole Register Instructions
   // EMUL=nr. EEW=eew. Since in-register byte layouts are idential to in-memory
   // byte layouts, the same data is writen to destination register regardless
@@ -757,38 +517,6 @@ static OperandInfo getOperandInfo(const MachineInstr &MI,
   case RISCV::VSEXT_VF8:
     return getIntegerExtensionOperandInfo(8, MI, MO);
 
-  // 11.4. Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions
-  // EEW=SEW and EMUL=LMUL. Mask Operand EEW=1 and EMUL=(EEW/SEW)*LMUL
-  case RISCV::VADC_VIM:
-  case RISCV::VADC_VVM:
-  case RISCV::VADC_VXM:
-  case RISCV::VSBC_VVM:
-  case RISCV::VSBC_VXM:
-    return MO.getOperandNo() == 3
-               ? OperandInfo(RISCVVType::getEMULEqualsEEWDivSEWTimesLMUL(0, MI),
-                             0)
-               : OperandInfo(MIVLMul, MILog2SEW);
-  // Dest EEW=1 and EMUL=(EEW/SEW)*LMUL. Source EEW=SEW and EMUL=LMUL. Mask
-  // operand EEW=1 and EMUL=(EEW/SEW)*LMUL
-  case RISCV::VMADC_VIM:
-  case RISCV::VMADC_VVM:
-  case RISCV::VMADC_VXM:
-  case RISCV::VMSBC_VVM:
-  case RISCV::VMSBC_VXM:
-    return IsMODef || MO.getOperandNo() == 3
-               ? OperandInfo(RISCVVType::getEMULEqualsEEWDivSEWTimesLMUL(0, MI),
-                             0)
-               : OperandInfo(MIVLMul, MILog2SEW);
-  // Dest EEW=1 and EMUL=(EEW/SEW)*LMUL. Source EEW=SEW and EMUL=LMUL.
-  case RISCV::VMADC_VV:
-  case RISCV::VMADC_VI:
-  case RISCV::VMADC_VX:
-  case RISCV::VMSBC_VV:
-  case RISCV::VMSBC_VX:
-    return IsMODef ? OperandInfo(
-                         RISCVVType::getEMULEqualsEEWDivSEWTimesLMUL(0, MI), 0)
-                   : OperandInfo(MIVLMul, MILog2SEW);
-
   // 11.5. Vector Bitwise Logical Instructions
   // 11.6. Vector Single-Width Shift Instructions
   // EEW=SEW. EMUL=LMUL.
@@ -827,31 +555,6 @@ static OperandInfo getOperandInfo(const MachineInstr &MI,
         TwoTimes ? RISCVVType::twoTimesVLMUL(MIVLMul) : MIVLMul;
     return OperandInfo(EMUL, Log2EEW);
   }
-  // 11.8. Vector Integer Compare Instructions
-  // Dest EEW=1 and EMUL=(EEW/SEW)*LMUL. Source EEW=SEW and EMUL=LMUL.
-  case RISCV::VMSEQ_VI:
-  case RISCV::VMSEQ_VV:
-  case RISCV::VMSEQ_VX:
-  case RISCV::VMSNE_VI:
-  case RISCV::VMSNE_VV:
-  case RISCV::VMSNE_VX:
-  case RISCV::VMSLTU_VV:
-  case RISCV::VMSLTU_VX:
-  case RISCV::VMSLT_VV:
-  case RISCV::VMSLT_VX:
-  case RISCV::VMSLEU_VV:
-  case RISCV::VMSLEU_VI:
-  case RISCV::VMSLEU_VX:
-  case RISCV::VMSLE_VV:
-  case RISCV::VMSLE_VI:
-  case RISCV::VMSLE_VX:
-  case RISCV::VMSGTU_VI:
-  case RISCV::VMSGTU_VX:
-  case RISCV::VMSGT_VI:
-  case RISCV::VMSGT_VX:
-    if (IsMODef)
-      return OperandInfo(RISCVVType::getEMULEqualsEEWDivSEWTimesLMUL(0, MI), 0);
-    return OperandInfo(MIVLMul, MILog2SEW);
 
   // 11.9. Vector Integer Min/Max Instructions
   // EEW=SEW. EMUL=LMUL.
@@ -1004,278 +707,7 @@ static OperandInfo getOperandInfo(const MachineInstr &MI,
         TwoTimes ? RISCVVType::twoTimesVLMUL(MIVLMul) : MIVLMul;
     return OperandInfo(EMUL, Log2EEW);
   }
-  // 13. Vector Floating-Point Instructions
-  // 13.2. Vector Single-Width Floating-Point Add/Subtract Instructions
-  // EEW=SEW. EMUL=LMUL.
-  case RISCV::VFADD_VF:
-  case RISCV::VFADD_VV:
-  case RISCV::VFSUB_VF:
-  case RISCV::VFSUB_VV:
-  case RISCV::VFRSUB_VF:
-    return OperandInfo(MIVLMul, MILog2SEW);
 
-  // 13.3. Vector Widening Floating-Point Add/Subtract Instructions
-  // Dest EEW=2*SEW and EMUL=2*LMUL. Source EEW=SEW and EMUL=LMUL.
-  case RISCV::VFWADD_VV:
-  case RISCV::VFWADD_VF:
-  case RISCV::VFWSUB_VV:
-  case RISCV::VFWSUB_VF: {
-    unsigned Log2EEW = IsMODef ? MILog2SEW + 1 : MILog2SEW;
-    RISCVII::VLMUL EMUL =
-        IsMODef ? RISCVVType::twoTimesVLMUL(MIVLMul) : MIVLMul;
-    return OperandInfo(EMUL, Log2EEW);
-  }
-  // Dest and Op1 EEW=2*SEW and EMUL=2*LMUL. Op2 EEW=SEW and EMUL=LMUL.
-  case RISCV::VFWADD_WF:
-  case RISCV::VFWADD_WV:
-  case RISCV::VFWSUB_WF:
-  case RISCV::VFWSUB_WV: {
-    bool TwoTimes = IsMODef || IsOp1;
-    unsigned Log2EEW = TwoTimes ? MILog2SEW + 1 : MILog2SEW;
-    RISCVII::VLMUL EMUL =
-        TwoTimes ? RISCVVType::twoTimesVLMUL(MIVLMul) : MIVLMul;
-    return OperandInfo(EMUL, Log2EEW);
-  }
-  // 13.4. Vector Single-Width Floating-Point Multiply/Divide Instructions
-  // EEW=SEW. EMUL=LMUL.
-  case RISCV::VFMUL_VF:
-  case RISCV::VFMUL_VV:
-  case RISCV::VFDIV_VF:
-  case RISCV::VFDIV_VV:
-  case RISCV::VFRDIV_VF:
-    return OperandInfo(MIVLMul, MILog2SEW);
-
-  // 13.5. Vector Widening Floating-Point Multiply
-  case RISCV::VFWMUL_VF:
-  case RISCV::VFWMUL_VV:
-    return OperandInfo(MIVLMul, MILog2SEW);
-
-  // 13.6. Vector Single-Width Floating-Point Fused Multiply-Add Instructions
-  // EEW=SEW. EMUL=LMUL.
-  // TODO: FMA instructions reads 3 registers but MC layer only reads 2
-  // registers since its missing that the output operand should be part of the
-  // input operand list.
-  case RISCV::VFMACC_VF:
-  case RISCV::VFMACC_VV:
-  case RISCV::VFNMACC_VF:
-  case RISCV::VFNMACC_VV:
-  case RISCV::VFMSAC_VF:
-  case RISCV::VFMSAC_VV:
-  case RISCV::VFNMSAC_VF:
-  case RISCV::VFNMSAC_VV:
-  case RISCV::VFMADD_VF:
-  case RISCV::VFMADD_VV:
-  case RISCV::VFNMADD_VF:
-  case RISCV::VFNMADD_VV:
-  case RISCV::VFMSUB_VF:
-  case RISCV::VFMSUB_VV:
-  case RISCV::VFNMSUB_VF:
-  case RISCV::VFNMSUB_VV:
-    return OperandInfo(OperandInfo::State::Unknown);
-
-  // 13.7. Vector Widening Floating-Point Fused Multiply-Add Instructions
-  // Dest EEW=2*SEW and EMUL=2*LMUL. Source EEW=SEW EMUL=LMUL.
-  case RISCV::VFWMACC_VF:
-  case RISCV::VFWMACC_VV:
-  case RISCV::VFWNMACC_VF:
-  case RISCV::VFWNMACC_VV:
-  case RISCV::VFWMSAC_VF:
-  case RISCV::VFWMSAC_VV:
-  case RISCV::VFWNMSAC_VF:
-  case RISCV::VFWNMSAC_VV: {
-    // Operand 0 is destination as a def and Operand 1 is destination as a use
-    // due to SSA.
-    bool TwoTimes = IsMODef || IsOp1;
-    unsigned Log2EEW = TwoTimes ? MILog2SEW + 1 : MILog2SEW;
-    RISCVII::VLMUL EMUL =
-        TwoTimes ? RISCVVType::twoTimesVLMUL(MIVLMul) : MIVLMul;
-    return OperandInfo(EMUL, Log2EEW);
-  }
-  // 13.8. Vector Floating-Point Square-Root Instruction
-  // 13.9. Vector Floating-Point Reciprocal Square-Root Estimate Instruction
-  // 13.10. Vector Floating-Point Reciprocal Estimate Instruction
-  // 13.11. Vector Floating-Point MIN/MAX Instructions
-  // 13.12. Vector Floating-Point Sign-Injection Instructions
-  // 13.14. Vector Floating-Point Classify Instruction
-  // 13.16. Vector Floating-Point Move Instruction
-  // 13.17. Single-Width Floating-Point/Integer Type-Convert Instructions
-  // EEW=SEW. EMUL=LMUL.
-  case RISCV::VFSQRT_V:
-  case RISCV::VFRSQRT7_V:
-  case RISCV::VFREC7_V:
-  case RISCV::VFMIN_VF:
-  case RISCV::VFMIN_VV:
-  case RISCV::VFMAX_VF:
-  case RISCV::VFMAX_VV:
-  case RISCV::VFSGNJ_VF:
-  case RISCV::VFSGNJ_VV:
-  case RISCV::VFSGNJN_VV:
-  case RISCV::VFSGNJN_VF:
-  case RISCV::VFSGNJX_VF:
-  case RISCV::VFSGNJX_VV:
-  case RISCV::VFCLASS_V:
-  case RISCV::VFMV_V_F:
-  case RISCV::VFCVT_XU_F_V:
-  case RISCV::VFCVT_X_F_V:
-  case RISCV::VFCVT_RTZ_XU_F_V:
-  case RISCV::VFCVT_RTZ_X_F_V:
-  case RISCV::VFCVT_F_XU_V:
-  case RISCV::VFCVT_F_X_V:
-    return OperandInfo(MIVLMul, MILog2SEW);
-
-  // 13.13. Vector Floating-Point Compare Instructions
-  // Dest EEW=1 and EMUL=(EEW/SEW)*LMUL. Source EEW=SEW EMUL=LMUL.
-  case RISCV::VMFEQ_VF:
-  case RISCV::VMFEQ_VV:
-  case RISCV::VMFNE_VF:
-  case RISCV::VMFNE_VV:
-  case RISCV::VMFLT_VF:
-  case RISCV::VMFLT_VV:
-  case RISCV::VMFLE_VF:
-  case RISCV::VMFLE_VV:
-  case RISCV::VMFGT_VF:
-  case RISCV::VMFGE_VF:
-    if (IsMODef)
-      return OperandInfo(RISCVVType::getEMULEqualsEEWDivSEWTimesLMUL(0, MI), 0);
-    return OperandInfo(MIVLMul, MILog2SEW);
-
-  // 13.15. Vector Floating-Point Merge Instruction
-  // EEW=SEW and EMUL=LMUL, except the mask operand has EEW=1 and EMUL=
-  // (EEW/SEW)*LMUL.
-  case RISCV::VFMERGE_VFM:
-    if (IsOp3)
-      return OperandInfo(RISCVVType::getEMULEqualsEEWDivSEWTimesLMUL(0, MI), 0);
-    return OperandInfo(MIVLMul, MILog2SEW);
-
-  // 13.18. Widening Floating-Point/Integer Type-Convert Instructions
-  // Dest EEW=2*SEW and EMUL=2*LMUL. Source EEW=SEW and EMUL=LMUL.
-  case RISCV::VFWCVT_XU_F_V:
-  case RISCV::VFWCVT_X_F_V:
-  case RISCV::VFWCVT_RTZ_XU_F_V:
-  case RISCV::VFWCVT_RTZ_X_F_V:
-  case RISCV::VFWCVT_F_XU_V:
-  case RISCV::VFWCVT_F_X_V:
-  case RISCV::VFWCVT_F_F_V: {
-    unsigned Log2EEW = IsMODef ? MILog2SEW + 1 : MILog2SEW;
-    RISCVII::VLMUL EMUL =
-        IsMODef ? RISCVVType::twoTimesVLMUL(MIVLMul) : MIVLMul;
-    return OperandInfo(EMUL, Log2EEW);
-  }
-  // 13.19. Narrowing Floating-Point/Integer Type-Convert Instructions
-  // EMUL=LMUL. Dest EEW=SEW/2. Source EEW=SEW EMUL=LMUL.
-  case RISCV::VFNCVT_XU_F_W:
-  case RISCV::VFNCVT_X_F_W:
-  case RISCV::VFNCVT_RTZ_XU_F_W:
-  case RISCV::VFNCVT_RTZ_X_F_W:
-  case RISCV::VFNCVT_F_XU_W:
-  case RISCV::VFNCVT_F_X_W:
-  case RISCV::VFNCVT_F_F_W:
-  case RISCV::VFNCVT_ROD_F_F_W: {
-    unsigned Log2EEW = IsMODef ? MILog2SEW - 1 : MILog2SEW;
-    RISCVII::VLMUL EMUL = IsMODef ? RISCVVType::halfVLMUL(MIVLMul) : MIVLMul;
-    return OperandInfo(EMUL, Log2EEW);
-  }
-  // 14. Vector Reduction Operations
-  // 14.1. Vector Single-Width Integer Reduction Instructions
-  // The destination and first source operand are vector registers, but only
-  // element 0 has any meaning. The vector registers are being used as a scalar
-  // register. They have a width (SEW), but the concept of LMUL or EMUL does not
-  // apply here. The next vector operand is used for tail value, which does not
-  // obey VL, we must return Unknown. The last vector operand is the the
-  // elements to reduce over, which does obey VL and has EEW=SEW and EMUL=LMUL.
-  case RISCV::VREDAND_VS:
-  case RISCV::VREDMAX_VS:
-  case RISCV::VREDMAXU_VS:
-  case RISCV::VREDMIN_VS:
-  case RISCV::VREDMINU_VS:
-  case RISCV::VREDOR_VS:
-  case RISCV::VREDSUM_VS:
-  case RISCV::VREDXOR_VS: {
-    if (IsMODef || IsOp1)
-      return OperandInfo(MILog2SEW);
-    if (IsOp2)
-      return OperandInfo(MIVLMul, MILog2SEW);
-    return OperandInfo(OperandInfo::State::Unknown);
-  }
-  // 14.2. Vector Widening Integer Reduction Instructions
-  // Again, the destination and first source operand are vector registers, but
-  // only element 0 has any meaning. The vector registers are being used as a
-  // scalar register. They have a width (2 * SEW), but the concept of LMUL or
-  // EMUL does not apply here. The next vector operand is used for tail value,
-  // which does not obey VL, we must return Unknown. The last vector operand is
-  // the the elements to reduce over, which does obey VL and has EEW=SEW and
-  // EMUL=LMUL.
-  case RISCV::VWREDSUM_VS:
-  case RISCV::VWREDSUMU_VS: {
-    if (IsMODef || IsOp1)
-      return OperandInfo(MILog2SEW + 1);
-    if (IsOp2)
-      return OperandInfo(MIVLMul, MILog2SEW);
-    return OperandInfo(OperandInfo::State::Unknown);
-  }
-  // 14.3. Vector Single-Width Floating-Point Reduction Instructions
-  // Behaves similar to 14.1
-  case RISCV::VFREDMAX_VS:
-  case RISCV::VFREDMIN_VS:
-  case RISCV::VFREDOSUM_VS:
-  case RISCV::VFREDUSUM_VS: {
-    if (IsMODef || IsOp1)
-      return OperandInfo(MILog2SEW);
-    if (IsOp2)
-      return OperandInfo(MIVLMul, MILog2SEW);
-    return OperandInfo(OperandInfo::State::Unknown);
-  }
-  // 14.4. Vector Widening Floating-Point Reduction Instructions
-  // Behaves similar to 14.2
-  case RISCV::VFWREDOSUM_VS:
-  case RISCV::VFWREDUSUM_VS: {
-    if (IsMODef || IsOp1)
-      return OperandInfo(MILog2SEW + 1);
-    if (IsOp2)
-      return OperandInfo(MIVLMul, MILog2SEW);
-    return OperandInfo(OperandInfo::State::Unknown);
-  }
-
-  // 15. Vector Mask Instructions
-  // 15.2. Vector count population in mask vcpop.m
-  // 15.3. vfirst find-first-set mask bit
-  // 15.4. vmsbf.m set-before-first mask bit
-  // 15.6. vmsof.m set-only-first mask bit
-  // EEW=1 and EMUL= (EEW/SEW)*LMUL
-  case RISCV::VMAND_MM:
-  case RISCV::VMNAND_MM:
-  case RISCV::VMANDN_MM:
-  case RISCV::VMXOR_MM:
-  case RISCV::VMOR_MM:
-  case RISCV::VMNOR_MM:
-  case RISCV::VMORN_MM:
-  case RISCV::VMXNOR_MM:
-  case RISCV::VCPOP_M:
-  case RISCV::VFIRST_M:
-  case RISCV::VMSBF_M:
-  case RISCV::VMSIF_M:
-  case RISCV::VMSOF_M: {
-    return OperandInfo(RISCVVType::getEMULEqualsEEWDivSEWTimesLMUL(0, MI), 0);
-  }
-  // 15.8. Vector Iota Instruction
-  // Dest and Op1 EEW=SEW and EMUL=LMUL. Op2 EEW=1 and EMUL(EEW/SEW)*LMUL.
-  case RISCV::VIOTA_M: {
-    bool IsDefOrOp1 = IsMODef || IsOp1;
-    unsigned Log2EEW = IsDefOrOp1 ? 0 : MILog2SEW;
-    if (IsDefOrOp1)
-      return OperandInfo(MIVLMul, Log2EEW);
-    return OperandInfo(
-        RISCVVType::getEMULEqualsEEWDivSEWTimesLMUL(MILog2SEW, MI), Log2EEW);
-  }
-  // 15.9. Vector Element Index Instruction
-  // Dest EEW=SEW EMUL=LMUL. Mask Operand EEW=1 and EMUL(EEW/SEW)*LMUL.
-  case RISCV::VID_V: {
-    unsigned Log2EEW = IsMODef ? MILog2SEW : 0;
-    if (IsMODef)
-      return OperandInfo(MIVLMul, Log2EEW);
-    return OperandInfo(RISCVVType::getEMULEqualsEEWDivSEWTimesLMUL(Log2EEW, MI),
-                       Log2EEW);
-  }
   // 16. Vector Permutation Instructions
   // 16.1. Integer Scalar Move Instructions
   // 16.2. Floating-Point Scalar Move Instructions
@@ -1304,28 +736,13 @@ static OperandInfo getOperandInfo(const MachineInstr &MI,
   case RISCV::VRGATHER_VV:
   case RISCV::VRGATHER_VX:
     return OperandInfo(MIVLMul, MILog2SEW);
-  // Destination EMUL=LMUL and EEW=SEW. Op2 EEW=SEW and EMUL=LMUL. Op1 EEW=16
-  // and EMUL=(16/SEW)*LMUL.
-  case RISCV::VRGATHEREI16_VV: {
-    if (IsMODef || IsOp2)
-      return OperandInfo(MIVLMul, MILog2SEW);
-    return OperandInfo(RISCVVType::getEMULEqualsEEWDivSEWTimesLMUL(4, MI), 4);
-  }
   // 16.5. Vector Compress Instruction
   // EMUL=LMUL. EEW=SEW.
   case RISCV::VCOMPRESS_VM:
     return OperandInfo(MIVLMul, MILog2SEW);
 
-  // 16.6. Whole Vector Register Move
-  case RISCV::VMV1R_V:
-  case RISCV::VMV2R_V:
-  case RISCV::VMV4R_V:
-  case RISCV::VMV8R_V:
-    llvm_unreachable("These instructions don't have pseudo versions so they "
-                     "don't have an SEW operand.");
-
   default:
-    return OperandInfo(OperandInfo::State::Unknown);
+    return {};
   }
 }
 
diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
index 9c3c6e6e42346e..c29f15a15c1503 100644
--- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll
+++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
@@ -119,8 +119,6 @@
 ; RV64-NEXT:        RISC-V Optimize W Instructions
 ; CHECK-NEXT:       RISC-V Pre-RA pseudo instruction expansion pass
 ; CHECK-NEXT:       RISC-V Merge Base Offset
-; CHECK-NEXT:       MachineDominator Tree Construction
-; CHECK-NEXT:       RISC-V VL Optimizer
 ; CHECK-NEXT:       RISC-V Insert Read/Write CSR Pass
 ; CHECK-NEXT:       RISC-V Insert Write VXRM Pass
 ; CHECK-NEXT:       RISC-V Landing Pad Setup
@@ -131,6 +129,7 @@
 ; CHECK-NEXT:       Live Variable Analysis
 ; CHECK-NEXT:       Eliminate PHI nodes for register allocation
 ; CHECK-NEXT:       Two-Address instruction pass
+; CHECK-NEXT:       MachineDominator Tree Construction
 ; CHECK-NEXT:       Slot index numbering
 ; CHECK-NEXT:       Live Interval Analysis
 ; CHECK-NEXT:       Register Coalescer
diff --git a/llvm/test/CodeGen/RISCV/rvv/narrow-shift-extend.ll b/llvm/test/CodeGen/RISCV/rvv/narrow-shift-extend.ll
index 63212e7cd5e004..e47517abacb4d3 100644
--- a/llvm/test/CodeGen/RISCV/rvv/narrow-shift-extend.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/narrow-shift-extend.ll
@@ -10,10 +10,10 @@ declare <vscale x 4 x i32> @llvm.riscv.vloxei.nxv4i32.nxv4i64(
 define <vscale x 4 x i32> @test_vloxei(ptr %ptr, <vscale x 4 x i8> %offset, i64 %vl) {
 ; CHECK-LABEL: test_vloxei:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-NEXT:    vsetvli a2, zero, e64, m4, ta, ma
 ; CHECK-NEXT:    vzext.vf8 v12, v8
 ; CHECK-NEXT:    vsll.vi v12, v12, 4
-; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
 ; CHECK-NEXT:    vloxei64.v v8, (a0), v12
 ; CHECK-NEXT:    ret
 entry:
@@ -30,10 +30,10 @@ entry:
 define <vscale x 4 x i32> @test_vloxei2(ptr %ptr, <vscale x 4 x i8> %offset, i64 %vl) {
 ; CHECK-LABEL: test_vloxei2:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-NEXT:    vsetvli a2, zero, e64, m4, ta, ma
 ; CHECK-NEXT:    vzext.vf8 v12, v8
 ; CHECK-NEXT:    vsll.vi v12, v12, 14
-; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
 ; CHECK-NEXT:    vloxei64.v v8, (a0), v12
 ; CHECK-NEXT:    ret
 entry:
@@ -50,10 +50,10 @@ entry:
 define <vscale x 4 x i32> @test_vloxei3(ptr %ptr, <vscale x 4 x i8> %offset, i64 %vl) {
 ; CHECK-LABEL: test_vloxei3:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-NEXT:    vsetvli a2, zero, e64, m4, ta, ma
 ; CHECK-NEXT:    vzext.vf8 v12, v8
 ; CHECK-NEXT:    vsll.vi v12, v12, 26
-; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
 ; CHECK-NEXT:    vloxei64.v v8, (a0), v12
 ; CHECK-NEXT:    ret
 entry:
@@ -74,8 +74,9 @@ define <vscale x 4 x i32> @test_vloxei4(ptr %ptr, <vscale x 4 x i8> %offset, <vs
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
 ; CHECK-NEXT:    vzext.vf8 v12, v8, v0.t
+; CHECK-NEXT:    vsetvli a2, zero, e64, m4, ta, ma
 ; CHECK-NEXT:    vsll.vi v12, v12, 4
-; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
 ; CHECK-NEXT:    vloxei64.v v8, (a0), v12
 ; CHECK-NEXT:    ret
 entry:
@@ -99,10 +100,10 @@ declare <vscale x 4 x i32> @llvm.riscv.vloxei.nxv4i32.nxv4i16(
 define <vscale x 4 x i32> @test_vloxei5(ptr %ptr, <vscale x 4 x i8> %offset, i64 %vl) {
 ; CHECK-LABEL: test_vloxei5:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-NEXT:    vsetvli a2, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v9, v8
 ; CHECK-NEXT:    vsll.vi v10, v9, 12
-; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
 ; CHECK-NEXT:    vloxei16.v v8, (a0), v10
 ; CHECK-NEXT:    ret
 entry:
@@ -122,10 +123,10 @@ define <vscale x 4 x i32> @test_vloxei6(ptr %ptr, <vscale x 4 x i7> %offset, i64
 ; CHECK-NEXT:    li a2, 127
 ; CHECK-NEXT:    vsetvli a3, zero, e8, mf2, ta, ma
 ; CHECK-NEXT:    vand.vx v8, v8, a2
-; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
 ; CHECK-NEXT:    vzext.vf8 v12, v8
 ; CHECK-NEXT:    vsll.vi v12, v12, 4
-; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
 ; CHECK-NEXT:    vloxei64.v v8, (a0), v12
 ; CHECK-NEXT:    ret
 entry:
@@ -145,9 +146,8 @@ define <vscale x 4 x i32> @test_vloxei7(ptr %ptr, <vscale x 4 x i1> %offset, i64
 ; CHECK-NEXT:    vsetvli a2, zero, e64, m4, ta, ma
 ; CHECK-NEXT:    vmv.v.i v8, 0
 ; CHECK-NEXT:    vmerge.vim v8, v8, 1, v0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
 ; CHECK-NEXT:    vsll.vi v12, v8, 2
-; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
 ; CHECK-NEXT:    vloxei64.v v8, (a0), v12
 ; CHECK-NEXT:    ret
 entry:
@@ -172,10 +172,10 @@ declare <vscale x 4 x i32> @llvm.riscv.vloxei.mask.nxv4i32.nxv4i64(
 define <vscale x 4 x i32> @test_vloxei_mask(ptr %ptr, <vscale x 4 x i8> %offset, <vscale x 4 x i1> %m, i64 %vl) {
 ; CHECK-LABEL: test_vloxei_mask:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-NEXT:    vsetvli a2, zero, e64, m4, ta, ma
 ; CHECK-NEXT:    vzext.vf8 v12, v8
 ; CHECK-NEXT:    vsll.vi v12, v12, 4
-; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
 ; CHECK-NEXT:    vloxei64.v v8, (a0), v12, v0.t
 ; CHECK-NEXT:    ret
 entry:
@@ -199,10 +199,10 @@ declare <vscale x 4 x i32> @llvm.riscv.vluxei.nxv4i32.nxv4i64(
 define <vscale x 4 x i32> @test_vluxei(ptr %ptr, <vscale x 4 x i8> %offset, i64 %vl) {
 ; CHECK-LABEL: test_vluxei:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-NEXT:    vsetvli a2, zero, e64, m4, ta, ma
 ; CHECK-NEXT:    vzext.vf8 v12, v8
 ; CHECK-NEXT:    vsll.vi v12, v12, 4
-; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
 ; CHECK-NEXT:    vluxei64.v v8, (a0), v12
 ; CHECK-NEXT:    ret
 entry:
@@ -227,10 +227,10 @@ declare <vscale x 4 x i32> @llvm.riscv.vluxei.mask.nxv4i32.nxv4i64(
 define <vscale x 4 x i32> @test_vluxei_mask(ptr %ptr, <vscale x 4 x i8> %offset, <vscale x 4 x i1> %m, i64 %vl) {
 ; CHECK-LABEL: test_vluxei_mask:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-NEXT:    vsetvli a2, zero, e64, m4, ta, ma
 ; CHECK-NEXT:    vzext.vf8 v12, v8
 ; CHECK-NEXT:    vsll.vi v12, v12, 4
-; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
 ; CHECK-NEXT:    vluxei64.v v8, (a0), v12, v0.t
 ; CHECK-NEXT:    ret
 entry:
@@ -254,10 +254,10 @@ declare void @llvm.riscv.vsoxei.nxv4i32.nxv4i64(
 define void @test_vsoxei(<vscale x 4 x i32> %val, ptr %ptr, <vscale x 4 x i8> %offset, i64 %vl) {
 ; CHECK-LABEL: test_vsoxei:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-NEXT:    vsetvli a2, zero, e64, m4, ta, ma
 ; CHECK-NEXT:    vzext.vf8 v12, v10
 ; CHECK-NEXT:    vsll.vi v12, v12, 4
-; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
 ; CHECK-NEXT:    vsoxei64.v v8, (a0), v12
 ; CHECK-NEXT:    ret
 entry:
@@ -281,10 +281,10 @@ declare void @llvm.riscv.vsoxei.mask.nxv4i32.nxv4i64(
 define void @test_vsoxei_mask(<vscale x 4 x i32> %val, ptr %ptr, <vscale x 4 x i8> %offset, <vscale x 4 x i1> %m, i64 %vl) {
 ; CHECK-LABEL: test_vsoxei_mask:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-NEXT:    vsetvli a2, zero, e64, m4, ta, ma
 ; CHECK-NEXT:    vzext.vf8 v12, v10
 ; CHECK-NEXT:    vsll.vi v12, v12, 4
-; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
 ; CHECK-NEXT:    vsoxei64.v v8, (a0), v12, v0.t
 ; CHECK-NEXT:    ret
 entry:
@@ -308,10 +308,10 @@ declare void @llvm.riscv.vsuxei.nxv4i32.nxv4i64(
 define void @test_vsuxei(<vscale x 4 x i32> %val, ptr %ptr, <vscale x 4 x i8> %offset, i64 %vl) {
 ; CHECK-LABEL: test_vsuxei:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-NEXT:    vsetvli a2, zero, e64, m4, ta, ma
 ; CHECK-NEXT:    vzext.vf8 v12, v10
 ; CHECK-NEXT:    vsll.vi v12, v12, 4
-; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
 ; CHECK-NEXT:    vsuxei64.v v8, (a0), v12
 ; CHECK-NEXT:    ret
 entry:
@@ -335,10 +335,10 @@ declare void @llvm.riscv.vsuxei.mask.nxv4i32.nxv4i64(
 define void @test_vsuxei_mask(<vscale x 4 x i32> %val, ptr %ptr, <vscale x 4 x i8> %offset, <vscale x 4 x i1> %m, i64 %vl) {
 ; CHECK-LABEL: test_vsuxei_mask:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-NEXT:    vsetvli a2, zero, e64, m4, ta, ma
 ; CHECK-NEXT:    vzext.vf8 v12, v10
 ; CHECK-NEXT:    vsll.vi v12, v12, 4
-; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
 ; CHECK-NEXT:    vsuxei64.v v8, (a0), v12, v0.t
 ; CHECK-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll
index 073a0aecb8f732..eb8c58d2d37790 100644
--- a/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll
@@ -184,8 +184,9 @@ define <vscale x 1 x i1> @icmp_uge_vv_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1
 define <vscale x 1 x i1> @icmp_uge_vx_nxv1i8(<vscale x 1 x i8> %va, i8 %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: icmp_uge_vx_nxv1i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT:    vsetvli a2, zero, e8, mf8, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
 ; CHECK-NEXT:    vmsleu.vv v0, v9, v8, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x i8> poison, i8 %b, i32 0
@@ -347,8 +348,9 @@ define <vscale x 1 x i1> @icmp_sge_vv_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1
 define <vscale x 1 x i1> @icmp_sge_vx_nxv1i8(<vscale x 1 x i8> %va, i8 %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: icmp_sge_vx_nxv1i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT:    vsetvli a2, zero, e8, mf8, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
 ; CHECK-NEXT:    vmsle.vv v0, v9, v8, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x i8> poison, i8 %b, i32 0
@@ -468,8 +470,9 @@ define <vscale x 1 x i1> @icmp_sle_vx_nxv1i8(<vscale x 1 x i8> %va, i8 %b, <vsca
 define <vscale x 1 x i1> @icmp_sle_vx_swap_nxv1i8(<vscale x 1 x i8> %va, i8 %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: icmp_sle_vx_swap_nxv1i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT:    vsetvli a2, zero, e8, mf8, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
 ; CHECK-NEXT:    vmsle.vv v0, v9, v8, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x i8> poison, i8 %b, i32 0
@@ -761,8 +764,9 @@ define <vscale x 8 x i1> @icmp_uge_vv_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8
 define <vscale x 8 x i1> @icmp_uge_vx_nxv8i8(<vscale x 8 x i8> %va, i8 %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: icmp_uge_vx_nxv8i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-NEXT:    vsetvli a2, zero, e8, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
+; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
 ; CHECK-NEXT:    vmsleu.vv v0, v9, v8, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x i8> poison, i8 %b, i32 0
@@ -924,8 +928,9 @@ define <vscale x 8 x i1> @icmp_sge_vv_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8
 define <vscale x 8 x i1> @icmp_sge_vx_nxv8i8(<vscale x 8 x i8> %va, i8 %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: icmp_sge_vx_nxv8i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-NEXT:    vsetvli a2, zero, e8, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
+; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
 ; CHECK-NEXT:    vmsle.vv v0, v9, v8, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x i8> poison, i8 %b, i32 0
@@ -1045,8 +1050,9 @@ define <vscale x 8 x i1> @icmp_sle_vx_nxv8i8(<vscale x 8 x i8> %va, i8 %b, <vsca
 define <vscale x 8 x i1> @icmp_sle_vx_swap_nxv8i8(<vscale x 8 x i8> %va, i8 %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: icmp_sle_vx_swap_nxv8i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-NEXT:    vsetvli a2, zero, e8, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
+; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
 ; CHECK-NEXT:    vmsle.vv v0, v9, v8, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x i8> poison, i8 %b, i32 0
@@ -1369,8 +1375,9 @@ define <vscale x 1 x i1> @icmp_uge_vv_nxv1i32(<vscale x 1 x i32> %va, <vscale x
 define <vscale x 1 x i1> @icmp_uge_vx_nxv1i32(<vscale x 1 x i32> %va, i32 %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: icmp_uge_vx_nxv1i32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-NEXT:    vsetvli a2, zero, e32, mf2, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
+; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma
 ; CHECK-NEXT:    vmsleu.vv v0, v9, v8, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x i32> poison, i32 %b, i32 0
@@ -1532,8 +1539,9 @@ define <vscale x 1 x i1> @icmp_sge_vv_nxv1i32(<vscale x 1 x i32> %va, <vscale x
 define <vscale x 1 x i1> @icmp_sge_vx_nxv1i32(<vscale x 1 x i32> %va, i32 %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: icmp_sge_vx_nxv1i32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-NEXT:    vsetvli a2, zero, e32, mf2, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
+; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma
 ; CHECK-NEXT:    vmsle.vv v0, v9, v8, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x i32> poison, i32 %b, i32 0
@@ -1653,8 +1661,9 @@ define <vscale x 1 x i1> @icmp_sle_vx_nxv1i32(<vscale x 1 x i32> %va, i32 %b, <v
 define <vscale x 1 x i1> @icmp_sle_vx_swap_nxv1i32(<vscale x 1 x i32> %va, i32 %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: icmp_sle_vx_swap_nxv1i32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-NEXT:    vsetvli a2, zero, e32, mf2, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
+; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma
 ; CHECK-NEXT:    vmsle.vv v0, v9, v8, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x i32> poison, i32 %b, i32 0
@@ -1876,8 +1885,9 @@ define <vscale x 8 x i1> @icmp_uge_vv_nxv8i32(<vscale x 8 x i32> %va, <vscale x
 define <vscale x 8 x i1> @icmp_uge_vx_nxv8i32(<vscale x 8 x i32> %va, i32 %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: icmp_uge_vx_nxv8i32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vmv.v.x v16, a0
+; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; CHECK-NEXT:    vmsleu.vv v12, v16, v8, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v12
 ; CHECK-NEXT:    ret
@@ -2054,8 +2064,9 @@ define <vscale x 8 x i1> @icmp_sge_vv_nxv8i32(<vscale x 8 x i32> %va, <vscale x
 define <vscale x 8 x i1> @icmp_sge_vx_nxv8i32(<vscale x 8 x i32> %va, i32 %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: icmp_sge_vx_nxv8i32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vmv.v.x v16, a0
+; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; CHECK-NEXT:    vmsle.vv v12, v16, v8, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v12
 ; CHECK-NEXT:    ret
@@ -2186,8 +2197,9 @@ define <vscale x 8 x i1> @icmp_sle_vx_nxv8i32(<vscale x 8 x i32> %va, i32 %b, <v
 define <vscale x 8 x i1> @icmp_sle_vx_swap_nxv8i32(<vscale x 8 x i32> %va, i32 %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: icmp_sle_vx_swap_nxv8i32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vmv.v.x v16, a0
+; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; CHECK-NEXT:    vmsle.vv v12, v16, v8, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v12
 ; CHECK-NEXT:    ret
@@ -2621,8 +2633,9 @@ define <vscale x 1 x i1> @icmp_uge_vx_nxv1i64(<vscale x 1 x i64> %va, i64 %b, <v
 ;
 ; RV64-LABEL: icmp_uge_vx_nxv1i64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e64, m1, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e64, m1, ta, ma
 ; RV64-NEXT:    vmv.v.x v9, a0
+; RV64-NEXT:    vsetvli zero, a1, e64, m1, ta, ma
 ; RV64-NEXT:    vmsleu.vv v0, v9, v8, v0.t
 ; RV64-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x i64> poison, i64 %b, i32 0
@@ -2868,8 +2881,9 @@ define <vscale x 1 x i1> @icmp_sge_vx_nxv1i64(<vscale x 1 x i64> %va, i64 %b, <v
 ;
 ; RV64-LABEL: icmp_sge_vx_nxv1i64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e64, m1, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e64, m1, ta, ma
 ; RV64-NEXT:    vmv.v.x v9, a0
+; RV64-NEXT:    vsetvli zero, a1, e64, m1, ta, ma
 ; RV64-NEXT:    vmsle.vv v0, v9, v8, v0.t
 ; RV64-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x i64> poison, i64 %b, i32 0
@@ -3059,8 +3073,9 @@ define <vscale x 1 x i1> @icmp_sle_vx_swap_nxv1i64(<vscale x 1 x i64> %va, i64 %
 ;
 ; RV64-LABEL: icmp_sle_vx_swap_nxv1i64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e64, m1, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e64, m1, ta, ma
 ; RV64-NEXT:    vmv.v.x v9, a0
+; RV64-NEXT:    vsetvli zero, a1, e64, m1, ta, ma
 ; RV64-NEXT:    vmsle.vv v0, v9, v8, v0.t
 ; RV64-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x i64> poison, i64 %b, i32 0
@@ -3387,8 +3402,9 @@ define <vscale x 8 x i1> @icmp_uge_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <v
 ;
 ; RV64-LABEL: icmp_uge_vx_nxv8i64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vmv.v.x v24, a0
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vmsleu.vv v16, v24, v8, v0.t
 ; RV64-NEXT:    vmv1r.v v0, v16
 ; RV64-NEXT:    ret
@@ -3655,8 +3671,9 @@ define <vscale x 8 x i1> @icmp_sge_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <v
 ;
 ; RV64-LABEL: icmp_sge_vx_nxv8i64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vmv.v.x v24, a0
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vmsle.vv v16, v24, v8, v0.t
 ; RV64-NEXT:    vmv1r.v v0, v16
 ; RV64-NEXT:    ret
@@ -3862,8 +3879,9 @@ define <vscale x 8 x i1> @icmp_sle_vx_swap_nxv8i64(<vscale x 8 x i64> %va, i64 %
 ;
 ; RV64-LABEL: icmp_sle_vx_swap_nxv8i64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vmv.v.x v24, a0
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vmsle.vv v16, v24, v8, v0.t
 ; RV64-NEXT:    vmv1r.v v0, v16
 ; RV64-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/vdiv-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vdiv-vp.ll
index d276631c5883e8..a4b7ca7f39768f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vdiv-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vdiv-vp.ll
@@ -12,7 +12,9 @@ define <vscale x 8 x i7> @vdiv_vx_nxv8i7(<vscale x 8 x i7> %a, i7 signext %b, <v
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
 ; CHECK-NEXT:    vsll.vi v8, v8, 1, v0.t
 ; CHECK-NEXT:    vsra.vi v8, v8, 1, v0.t
+; CHECK-NEXT:    vsetvli a2, zero, e8, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
+; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
 ; CHECK-NEXT:    vsll.vi v9, v9, 1, v0.t
 ; CHECK-NEXT:    vsra.vi v9, v9, 1, v0.t
 ; CHECK-NEXT:    vdiv.vv v8, v8, v9, v0.t
diff --git a/llvm/test/CodeGen/RISCV/rvv/vdivu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vdivu-vp.ll
index 7794e031d5c677..67c3f9dbf2869a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vdivu-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vdivu-vp.ll
@@ -12,7 +12,9 @@ define <vscale x 8 x i7> @vdivu_vx_nxv8i7(<vscale x 8 x i7> %a, i7 signext %b, <
 ; CHECK-NEXT:    li a2, 127
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
 ; CHECK-NEXT:    vand.vx v8, v8, a2, v0.t
+; CHECK-NEXT:    vsetvli a3, zero, e8, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
+; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
 ; CHECK-NEXT:    vand.vx v9, v9, a2, v0.t
 ; CHECK-NEXT:    vdivu.vv v8, v8, v9, v0.t
 ; CHECK-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll
index 1531b9a221f98a..baecb7bb7d2483 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll
@@ -4349,9 +4349,10 @@ define <vscale x 1 x half> @vfnmadd_vf_nxv1f16_neg_splat(<vscale x 1 x half> %va
 ; ZVFHMIN-LABEL: vfnmadd_vf_nxv1f16_neg_splat:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    lui a1, 8
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v10, v10, a1, v0.t
 ; ZVFHMIN-NEXT:    vxor.vx v9, v9, a1, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
@@ -4381,9 +4382,10 @@ define <vscale x 1 x half> @vfnmadd_vf_nxv1f16_neg_splat_commute(<vscale x 1 x h
 ; ZVFHMIN-LABEL: vfnmadd_vf_nxv1f16_neg_splat_commute:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    lui a1, 8
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v10, v10, a1, v0.t
 ; ZVFHMIN-NEXT:    vxor.vx v9, v9, a1, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
@@ -4413,9 +4415,10 @@ define <vscale x 1 x half> @vfnmadd_vf_nxv1f16_neg_splat_unmasked(<vscale x 1 x
 ; ZVFHMIN-LABEL: vfnmadd_vf_nxv1f16_neg_splat_unmasked:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    lui a1, 8
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v10, v10, a1
 ; ZVFHMIN-NEXT:    vxor.vx v9, v9, a1
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
@@ -4445,9 +4448,10 @@ define <vscale x 1 x half> @vfnmadd_vf_nxv1f16_neg_splat_unmasked_commute(<vscal
 ; ZVFHMIN-LABEL: vfnmadd_vf_nxv1f16_neg_splat_unmasked_commute:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    lui a1, 8
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v10, v10, a1
 ; ZVFHMIN-NEXT:    vxor.vx v9, v9, a1
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
@@ -4714,9 +4718,10 @@ define <vscale x 1 x half> @vfnmsub_vf_nxv1f16_neg_splat(<vscale x 1 x half> %va
 ; ZVFHMIN-LABEL: vfnmsub_vf_nxv1f16_neg_splat:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    lui a1, 8
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v10, v10, a1, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v10
@@ -4744,9 +4749,10 @@ define <vscale x 1 x half> @vfnmsub_vf_nxv1f16_neg_splat_commute(<vscale x 1 x h
 ; ZVFHMIN-LABEL: vfnmsub_vf_nxv1f16_neg_splat_commute:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    lui a1, 8
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v10, v10, a1, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v10
@@ -4774,9 +4780,10 @@ define <vscale x 1 x half> @vfnmsub_vf_nxv1f16_neg_splat_unmasked(<vscale x 1 x
 ; ZVFHMIN-LABEL: vfnmsub_vf_nxv1f16_neg_splat_unmasked:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    lui a1, 8
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v10, v10, a1
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v10
@@ -4804,9 +4811,10 @@ define <vscale x 1 x half> @vfnmsub_vf_nxv1f16_neg_splat_unmasked_commute(<vscal
 ; ZVFHMIN-LABEL: vfnmsub_vf_nxv1f16_neg_splat_unmasked_commute:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    lui a1, 8
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v10, v10, a1
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v10
@@ -5258,9 +5266,10 @@ define <vscale x 2 x half> @vfnmadd_vf_nxv2f16_neg_splat(<vscale x 2 x half> %va
 ; ZVFHMIN-LABEL: vfnmadd_vf_nxv2f16_neg_splat:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    lui a1, 8
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v10, v10, a1, v0.t
 ; ZVFHMIN-NEXT:    vxor.vx v9, v9, a1, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
@@ -5290,9 +5299,10 @@ define <vscale x 2 x half> @vfnmadd_vf_nxv2f16_neg_splat_commute(<vscale x 2 x h
 ; ZVFHMIN-LABEL: vfnmadd_vf_nxv2f16_neg_splat_commute:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    lui a1, 8
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v10, v10, a1, v0.t
 ; ZVFHMIN-NEXT:    vxor.vx v9, v9, a1, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
@@ -5322,9 +5332,10 @@ define <vscale x 2 x half> @vfnmadd_vf_nxv2f16_neg_splat_unmasked(<vscale x 2 x
 ; ZVFHMIN-LABEL: vfnmadd_vf_nxv2f16_neg_splat_unmasked:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    lui a1, 8
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v10, v10, a1
 ; ZVFHMIN-NEXT:    vxor.vx v9, v9, a1
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
@@ -5354,9 +5365,10 @@ define <vscale x 2 x half> @vfnmadd_vf_nxv2f16_neg_splat_unmasked_commute(<vscal
 ; ZVFHMIN-LABEL: vfnmadd_vf_nxv2f16_neg_splat_unmasked_commute:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    lui a1, 8
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v10, v10, a1
 ; ZVFHMIN-NEXT:    vxor.vx v9, v9, a1
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
@@ -5623,9 +5635,10 @@ define <vscale x 2 x half> @vfnmsub_vf_nxv2f16_neg_splat(<vscale x 2 x half> %va
 ; ZVFHMIN-LABEL: vfnmsub_vf_nxv2f16_neg_splat:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    lui a1, 8
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v10, v10, a1, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v10
@@ -5653,9 +5666,10 @@ define <vscale x 2 x half> @vfnmsub_vf_nxv2f16_neg_splat_commute(<vscale x 2 x h
 ; ZVFHMIN-LABEL: vfnmsub_vf_nxv2f16_neg_splat_commute:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    lui a1, 8
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v10, v10, a1, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v10
@@ -5683,9 +5697,10 @@ define <vscale x 2 x half> @vfnmsub_vf_nxv2f16_neg_splat_unmasked(<vscale x 2 x
 ; ZVFHMIN-LABEL: vfnmsub_vf_nxv2f16_neg_splat_unmasked:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    lui a1, 8
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v10, v10, a1
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v10
@@ -5713,9 +5728,10 @@ define <vscale x 2 x half> @vfnmsub_vf_nxv2f16_neg_splat_unmasked_commute(<vscal
 ; ZVFHMIN-LABEL: vfnmsub_vf_nxv2f16_neg_splat_unmasked_commute:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    lui a1, 8
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v10, v10, a1
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v10
@@ -6167,9 +6183,10 @@ define <vscale x 4 x half> @vfnmadd_vf_nxv4f16_neg_splat(<vscale x 4 x half> %va
 ; ZVFHMIN-LABEL: vfnmadd_vf_nxv4f16_neg_splat:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    lui a1, 8
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v10, v10, a1, v0.t
 ; ZVFHMIN-NEXT:    vxor.vx v9, v9, a1, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
@@ -6199,9 +6216,10 @@ define <vscale x 4 x half> @vfnmadd_vf_nxv4f16_neg_splat_commute(<vscale x 4 x h
 ; ZVFHMIN-LABEL: vfnmadd_vf_nxv4f16_neg_splat_commute:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    lui a1, 8
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v10, v10, a1, v0.t
 ; ZVFHMIN-NEXT:    vxor.vx v9, v9, a1, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
@@ -6231,9 +6249,10 @@ define <vscale x 4 x half> @vfnmadd_vf_nxv4f16_neg_splat_unmasked(<vscale x 4 x
 ; ZVFHMIN-LABEL: vfnmadd_vf_nxv4f16_neg_splat_unmasked:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    lui a1, 8
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v10, v10, a1
 ; ZVFHMIN-NEXT:    vxor.vx v9, v9, a1
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
@@ -6263,9 +6282,10 @@ define <vscale x 4 x half> @vfnmadd_vf_nxv4f16_neg_splat_unmasked_commute(<vscal
 ; ZVFHMIN-LABEL: vfnmadd_vf_nxv4f16_neg_splat_unmasked_commute:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    lui a1, 8
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v10, v10, a1
 ; ZVFHMIN-NEXT:    vxor.vx v9, v9, a1
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
@@ -6532,9 +6552,10 @@ define <vscale x 4 x half> @vfnmsub_vf_nxv4f16_neg_splat(<vscale x 4 x half> %va
 ; ZVFHMIN-LABEL: vfnmsub_vf_nxv4f16_neg_splat:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    lui a1, 8
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v10, v10, a1, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
@@ -6562,9 +6583,10 @@ define <vscale x 4 x half> @vfnmsub_vf_nxv4f16_neg_splat_commute(<vscale x 4 x h
 ; ZVFHMIN-LABEL: vfnmsub_vf_nxv4f16_neg_splat_commute:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    lui a1, 8
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v10, v10, a1, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
@@ -6592,9 +6614,10 @@ define <vscale x 4 x half> @vfnmsub_vf_nxv4f16_neg_splat_unmasked(<vscale x 4 x
 ; ZVFHMIN-LABEL: vfnmsub_vf_nxv4f16_neg_splat_unmasked:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    lui a1, 8
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v10, v10, a1
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
@@ -6622,9 +6645,10 @@ define <vscale x 4 x half> @vfnmsub_vf_nxv4f16_neg_splat_unmasked_commute(<vscal
 ; ZVFHMIN-LABEL: vfnmsub_vf_nxv4f16_neg_splat_unmasked_commute:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    lui a1, 8
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v10, v10, a1
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
@@ -7076,9 +7100,10 @@ define <vscale x 8 x half> @vfnmadd_vf_nxv8f16_neg_splat(<vscale x 8 x half> %va
 ; ZVFHMIN-LABEL: vfnmadd_vf_nxv8f16_neg_splat:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v12, a1
 ; ZVFHMIN-NEXT:    lui a1, 8
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v12, v12, a1, v0.t
 ; ZVFHMIN-NEXT:    vxor.vx v10, v10, a1, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
@@ -7108,9 +7133,10 @@ define <vscale x 8 x half> @vfnmadd_vf_nxv8f16_neg_splat_commute(<vscale x 8 x h
 ; ZVFHMIN-LABEL: vfnmadd_vf_nxv8f16_neg_splat_commute:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v12, a1
 ; ZVFHMIN-NEXT:    lui a1, 8
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v12, v12, a1, v0.t
 ; ZVFHMIN-NEXT:    vxor.vx v10, v10, a1, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
@@ -7140,9 +7166,10 @@ define <vscale x 8 x half> @vfnmadd_vf_nxv8f16_neg_splat_unmasked(<vscale x 8 x
 ; ZVFHMIN-LABEL: vfnmadd_vf_nxv8f16_neg_splat_unmasked:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v12, a1
 ; ZVFHMIN-NEXT:    lui a1, 8
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v12, v12, a1
 ; ZVFHMIN-NEXT:    vxor.vx v10, v10, a1
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
@@ -7172,9 +7199,10 @@ define <vscale x 8 x half> @vfnmadd_vf_nxv8f16_neg_splat_unmasked_commute(<vscal
 ; ZVFHMIN-LABEL: vfnmadd_vf_nxv8f16_neg_splat_unmasked_commute:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v12, a1
 ; ZVFHMIN-NEXT:    lui a1, 8
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v12, v12, a1
 ; ZVFHMIN-NEXT:    vxor.vx v10, v10, a1
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
@@ -7441,9 +7469,10 @@ define <vscale x 8 x half> @vfnmsub_vf_nxv8f16_neg_splat(<vscale x 8 x half> %va
 ; ZVFHMIN-LABEL: vfnmsub_vf_nxv8f16_neg_splat:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v12, a1
 ; ZVFHMIN-NEXT:    lui a1, 8
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v12, v12, a1, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
@@ -7471,9 +7500,10 @@ define <vscale x 8 x half> @vfnmsub_vf_nxv8f16_neg_splat_commute(<vscale x 8 x h
 ; ZVFHMIN-LABEL: vfnmsub_vf_nxv8f16_neg_splat_commute:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v12, a1
 ; ZVFHMIN-NEXT:    lui a1, 8
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v12, v12, a1, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
@@ -7501,9 +7531,10 @@ define <vscale x 8 x half> @vfnmsub_vf_nxv8f16_neg_splat_unmasked(<vscale x 8 x
 ; ZVFHMIN-LABEL: vfnmsub_vf_nxv8f16_neg_splat_unmasked:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v12, a1
 ; ZVFHMIN-NEXT:    lui a1, 8
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v12, v12, a1
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
@@ -7531,9 +7562,10 @@ define <vscale x 8 x half> @vfnmsub_vf_nxv8f16_neg_splat_unmasked_commute(<vscal
 ; ZVFHMIN-LABEL: vfnmsub_vf_nxv8f16_neg_splat_unmasked_commute:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v12, a1
 ; ZVFHMIN-NEXT:    lui a1, 8
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v12, v12, a1
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
@@ -8079,9 +8111,10 @@ define <vscale x 16 x half> @vfnmadd_vf_nxv16f16_neg_splat(<vscale x 16 x half>
 ; ZVFHMIN-NEXT:    addi a1, sp, 16
 ; ZVFHMIN-NEXT:    vs4r.v v8, (a1) # Unknown-size Folded Spill
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v16, a1
 ; ZVFHMIN-NEXT:    lui a1, 8
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v4, v16, a1, v0.t
 ; ZVFHMIN-NEXT:    vxor.vx v12, v12, a1, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
@@ -8118,9 +8151,10 @@ define <vscale x 16 x half> @vfnmadd_vf_nxv16f16_neg_splat_commute(<vscale x 16
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vmv4r.v v4, v8
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v16, a1
 ; ZVFHMIN-NEXT:    lui a1, 8
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v16, v16, a1, v0.t
 ; ZVFHMIN-NEXT:    vxor.vx v12, v12, a1, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
@@ -8150,9 +8184,10 @@ define <vscale x 16 x half> @vfnmadd_vf_nxv16f16_neg_splat_unmasked(<vscale x 16
 ; ZVFHMIN-LABEL: vfnmadd_vf_nxv16f16_neg_splat_unmasked:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v16, a1
 ; ZVFHMIN-NEXT:    lui a1, 8
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v16, v16, a1
 ; ZVFHMIN-NEXT:    vxor.vx v12, v12, a1
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
@@ -8182,9 +8217,10 @@ define <vscale x 16 x half> @vfnmadd_vf_nxv16f16_neg_splat_unmasked_commute(<vsc
 ; ZVFHMIN-LABEL: vfnmadd_vf_nxv16f16_neg_splat_unmasked_commute:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v16, a1
 ; ZVFHMIN-NEXT:    lui a1, 8
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v16, v16, a1
 ; ZVFHMIN-NEXT:    vxor.vx v12, v12, a1
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
@@ -8495,9 +8531,10 @@ define <vscale x 16 x half> @vfnmsub_vf_nxv16f16_neg_splat(<vscale x 16 x half>
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vmv4r.v v4, v8
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v16, a1
 ; ZVFHMIN-NEXT:    lui a1, 8
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v16, v16, a1, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v16
@@ -8527,9 +8564,10 @@ define <vscale x 16 x half> @vfnmsub_vf_nxv16f16_neg_splat_commute(<vscale x 16
 ; ZVFHMIN-NEXT:    vmv4r.v v20, v12
 ; ZVFHMIN-NEXT:    vmv4r.v v4, v8
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v16, a1
 ; ZVFHMIN-NEXT:    lui a1, 8
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v16, v16, a1, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v16
@@ -8557,9 +8595,10 @@ define <vscale x 16 x half> @vfnmsub_vf_nxv16f16_neg_splat_unmasked(<vscale x 16
 ; ZVFHMIN-LABEL: vfnmsub_vf_nxv16f16_neg_splat_unmasked:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v16, a1
 ; ZVFHMIN-NEXT:    lui a1, 8
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v16, v16, a1
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v16
@@ -8587,9 +8626,10 @@ define <vscale x 16 x half> @vfnmsub_vf_nxv16f16_neg_splat_unmasked_commute(<vsc
 ; ZVFHMIN-LABEL: vfnmsub_vf_nxv16f16_neg_splat_unmasked_commute:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v16, a1
 ; ZVFHMIN-NEXT:    lui a1, 8
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v16, v16, a1
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v16
@@ -10492,9 +10532,10 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16_neg_splat(<vscale x 32 x half>
 ; ZVFHMIN-NEXT:    addi a1, a1, 16
 ; ZVFHMIN-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m8, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v24, a1
 ; ZVFHMIN-NEXT:    lui a1, 8
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v8, v24, a1, v0.t
 ; ZVFHMIN-NEXT:    vxor.vx v16, v16, a1, v0.t
 ; ZVFHMIN-NEXT:    csrr a1, vlenb
@@ -10619,9 +10660,10 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16_neg_splat_commute(<vscale x 32
 ; ZVFHMIN-NEXT:    addi a1, a1, 16
 ; ZVFHMIN-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m8, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v24, a1
 ; ZVFHMIN-NEXT:    lui a1, 8
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v8, v24, a1, v0.t
 ; ZVFHMIN-NEXT:    vxor.vx v16, v16, a1, v0.t
 ; ZVFHMIN-NEXT:    csrr a1, vlenb
@@ -10752,9 +10794,10 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16_neg_splat_unmasked(<vscale x 32
 ; ZVFHMIN-NEXT:    addi a1, a1, 16
 ; ZVFHMIN-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m8, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v24, a1
 ; ZVFHMIN-NEXT:    lui a1, 8
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v24, v24, a1
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
 ; ZVFHMIN-NEXT:    slli a2, a2, 3
@@ -10877,9 +10920,10 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16_neg_splat_unmasked_commute(<vsc
 ; ZVFHMIN-NEXT:    addi a1, a1, 16
 ; ZVFHMIN-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m8, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v24, a1
 ; ZVFHMIN-NEXT:    lui a1, 8
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v8, v24, a1
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
 ; ZVFHMIN-NEXT:    slli a2, a2, 3
@@ -12025,7 +12069,7 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_neg_splat(<vscale x 32 x half>
 ; ZVFHMIN-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
 ; ZVFHMIN-NEXT:    vmv8r.v v16, v8
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m8, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v8, a1
 ; ZVFHMIN-NEXT:    lui a1, 8
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
@@ -12033,6 +12077,7 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_neg_splat(<vscale x 32 x half>
 ; ZVFHMIN-NEXT:    add a2, sp, a2
 ; ZVFHMIN-NEXT:    addi a2, a2, 16
 ; ZVFHMIN-NEXT:    vs1r.v v0, (a2) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v8, v8, a1, v0.t
 ; ZVFHMIN-NEXT:    csrr a1, vlenb
 ; ZVFHMIN-NEXT:    slli a2, a1, 1
@@ -12154,11 +12199,12 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_neg_splat_commute(<vscale x 32
 ; ZVFHMIN-NEXT:    addi a1, a1, 16
 ; ZVFHMIN-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m8, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v24, a1
 ; ZVFHMIN-NEXT:    lui a1, 8
 ; ZVFHMIN-NEXT:    addi a2, sp, 16
 ; ZVFHMIN-NEXT:    vs1r.v v0, (a2) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v16, v24, a1, v0.t
 ; ZVFHMIN-NEXT:    csrr a1, vlenb
 ; ZVFHMIN-NEXT:    slli a2, a1, 1
@@ -12304,9 +12350,10 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_neg_splat_unmasked(<vscale x 32
 ; ZVFHMIN-NEXT:    addi a1, a1, 16
 ; ZVFHMIN-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m8, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v8, a1
 ; ZVFHMIN-NEXT:    lui a1, 8
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v8, v8, a1
 ; ZVFHMIN-NEXT:    csrr a1, vlenb
 ; ZVFHMIN-NEXT:    slli a1, a1, 3
@@ -12425,9 +12472,10 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_neg_splat_unmasked_commute(<vsc
 ; ZVFHMIN-NEXT:    addi a1, a1, 16
 ; ZVFHMIN-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m8, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v24, a1
 ; ZVFHMIN-NEXT:    lui a1, 8
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v8, v24, a1
 ; ZVFHMIN-NEXT:    addi a1, sp, 16
 ; ZVFHMIN-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwmacc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfwmacc-vp.ll
index d4930bd2ae0396..80ada4670562d7 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwmacc-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwmacc-vp.ll
@@ -143,8 +143,9 @@ define <vscale x 1 x float> @vfmacc_vf_nxv1f32(<vscale x 1 x half> %va, half %b,
 ; ZVFHMIN-LABEL: vfmacc_vf_nxv1f32:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v10, a1
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
@@ -169,8 +170,9 @@ define <vscale x 1 x float> @vfmacc_vf_nxv1f32_commute(<vscale x 1 x half> %va,
 ; ZVFHMIN-LABEL: vfmacc_vf_nxv1f32_commute:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v11, a1
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v11, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
@@ -196,8 +198,9 @@ define <vscale x 1 x float> @vfmacc_vf_nxv1f32_unmasked(<vscale x 1 x half> %va,
 ; ZVFHMIN-LABEL: vfmacc_vf_nxv1f32_unmasked:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v10, a1
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
@@ -222,8 +225,9 @@ define <vscale x 1 x float> @vfmacc_vf_nxv1f32_tu(<vscale x 1 x half> %va, half
 ; ZVFHMIN-LABEL: vfmacc_vf_nxv1f32_tu:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v10, a1
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, tu, mu
@@ -250,8 +254,9 @@ define <vscale x 1 x float> @vfmacc_vf_nxv1f32_commute_tu(<vscale x 1 x half> %v
 ; ZVFHMIN-LABEL: vfmacc_vf_nxv1f32_commute_tu:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v10, a1
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, tu, mu
@@ -278,8 +283,9 @@ define <vscale x 1 x float> @vfmacc_vf_nxv1f32_unmasked_tu(<vscale x 1 x half> %
 ; ZVFHMIN-LABEL: vfmacc_vf_nxv1f32_unmasked_tu:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v10, a1
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, tu, ma
@@ -356,8 +362,9 @@ define <vscale x 2 x float> @vfmacc_vf_nxv2f32(<vscale x 2 x half> %va, half %b,
 ; ZVFHMIN-LABEL: vfmacc_vf_nxv2f32:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v10, a1
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
@@ -382,8 +389,9 @@ define <vscale x 2 x float> @vfmacc_vf_nxv2f32_unmasked(<vscale x 2 x half> %va,
 ; ZVFHMIN-LABEL: vfmacc_vf_nxv2f32_unmasked:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v10, a1
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
@@ -460,8 +468,9 @@ define <vscale x 4 x float> @vfmacc_vf_nxv4f32(<vscale x 4 x half> %va, half %b,
 ; ZVFHMIN-LABEL: vfmacc_vf_nxv4f32:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v12, a1
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v8, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
@@ -486,8 +495,9 @@ define <vscale x 4 x float> @vfmacc_vf_nxv4f32_unmasked(<vscale x 4 x half> %va,
 ; ZVFHMIN-LABEL: vfmacc_vf_nxv4f32_unmasked:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v12, a1
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
@@ -564,8 +574,9 @@ define <vscale x 8 x float> @vfmacc_vf_nxv8f32(<vscale x 8 x half> %va, half %b,
 ; ZVFHMIN-LABEL: vfmacc_vf_nxv8f32:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v16, a1
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v8, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
@@ -590,8 +601,9 @@ define <vscale x 8 x float> @vfmacc_vf_nxv8f32_unmasked(<vscale x 8 x half> %va,
 ; ZVFHMIN-LABEL: vfmacc_vf_nxv8f32_unmasked:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v16, a1
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
@@ -682,8 +694,9 @@ define <vscale x 16 x float> @vfmacc_vf_nxv16f32(<vscale x 16 x half> %va, half
 ; ZVFHMIN-LABEL: vfmacc_vf_nxv16f32:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v4, a1
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v8, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v4, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
@@ -708,8 +721,9 @@ define <vscale x 16 x float> @vfmacc_vf_nxv16f32_unmasked(<vscale x 16 x half> %
 ; ZVFHMIN-LABEL: vfmacc_vf_nxv16f32_unmasked:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v24, a1
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v24
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwmsac-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfwmsac-vp.ll
index 94b80075ac14c5..c92a79e49c1642 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwmsac-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwmsac-vp.ll
@@ -120,8 +120,9 @@ define <vscale x 1 x float> @vmfsac_vf_nxv1f32(<vscale x 1 x half> %a, half %b,
 ; ZVFHMIN-LABEL: vmfsac_vf_nxv1f32:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v10, a1
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
@@ -147,8 +148,9 @@ define <vscale x 1 x float> @vmfsac_vf_nxv1f32_commute(<vscale x 1 x half> %a, h
 ; ZVFHMIN-LABEL: vmfsac_vf_nxv1f32_commute:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v11, a1
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v11, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
@@ -175,8 +177,9 @@ define <vscale x 1 x float> @vmfsac_vf_nxv1f32_unmasked(<vscale x 1 x half> %a,
 ; ZVFHMIN-LABEL: vmfsac_vf_nxv1f32_unmasked:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v10, a1
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
@@ -252,8 +255,9 @@ define <vscale x 2 x float> @vmfsac_vf_nxv2f32(<vscale x 2 x half> %a, half %b,
 ; ZVFHMIN-LABEL: vmfsac_vf_nxv2f32:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v10, a1
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
@@ -279,8 +283,9 @@ define <vscale x 2 x float> @vmfsac_vf_nxv2f32_commute(<vscale x 2 x half> %a, h
 ; ZVFHMIN-LABEL: vmfsac_vf_nxv2f32_commute:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v11, a1
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v11, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
@@ -307,8 +312,9 @@ define <vscale x 2 x float> @vmfsac_vf_nxv2f32_unmasked(<vscale x 2 x half> %a,
 ; ZVFHMIN-LABEL: vmfsac_vf_nxv2f32_unmasked:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v10, a1
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
@@ -386,8 +392,9 @@ define <vscale x 4 x float> @vmfsac_vf_nxv4f32(<vscale x 4 x half> %a, half %b,
 ; ZVFHMIN-LABEL: vmfsac_vf_nxv4f32:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v12, a1
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v8, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
@@ -413,8 +420,9 @@ define <vscale x 4 x float> @vmfsac_vf_nxv4f32_commute(<vscale x 4 x half> %a, h
 ; ZVFHMIN-LABEL: vmfsac_vf_nxv4f32_commute:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v9, a1
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v9, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
@@ -441,8 +449,9 @@ define <vscale x 4 x float> @vmfsac_vf_nxv4f32_unmasked(<vscale x 4 x half> %a,
 ; ZVFHMIN-LABEL: vmfsac_vf_nxv4f32_unmasked:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v12, a1
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
@@ -520,8 +529,9 @@ define <vscale x 8 x float> @vmfsac_vf_nxv8f32(<vscale x 8 x half> %a, half %b,
 ; ZVFHMIN-LABEL: vmfsac_vf_nxv8f32:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v16, a1
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v8, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
@@ -547,8 +557,9 @@ define <vscale x 8 x float> @vmfsac_vf_nxv8f32_commute(<vscale x 8 x half> %a, h
 ; ZVFHMIN-LABEL: vmfsac_vf_nxv8f32_commute:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v10, a1
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
@@ -575,8 +586,9 @@ define <vscale x 8 x float> @vmfsac_vf_nxv8f32_unmasked(<vscale x 8 x half> %a,
 ; ZVFHMIN-LABEL: vmfsac_vf_nxv8f32_unmasked:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v16, a1
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwnmacc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfwnmacc-vp.ll
index 3fc58acd504e52..6ea58a4e768736 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwnmacc-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwnmacc-vp.ll
@@ -71,8 +71,9 @@ define <vscale x 1 x float> @vfnmacc_vf_nxv1f32(<vscale x 1 x half> %a, half %b,
 ; ZVFHMIN-LABEL: vfnmacc_vf_nxv1f32:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v10, a1
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
@@ -100,8 +101,9 @@ define <vscale x 1 x float> @vfnmacc_vf_nxv1f32_commute(<vscale x 1 x half> %a,
 ; ZVFHMIN-LABEL: vfnmacc_vf_nxv1f32_commute:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v11, a1
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v11, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
@@ -129,8 +131,9 @@ define <vscale x 1 x float> @vfnmacc_vf_nxv1f32_unmasked(<vscale x 1 x half> %a,
 ; ZVFHMIN-LABEL: vfnmacc_vf_nxv1f32_unmasked:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v10, a1
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
@@ -209,8 +212,9 @@ define <vscale x 2 x float> @vfnmacc_vf_nxv2f32(<vscale x 2 x half> %a, half %b,
 ; ZVFHMIN-LABEL: vfnmacc_vf_nxv2f32:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v10, a1
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
@@ -238,8 +242,9 @@ define <vscale x 2 x float> @vfnmacc_vf_nxv2f32_commute(<vscale x 2 x half> %a,
 ; ZVFHMIN-LABEL: vfnmacc_vf_nxv2f32_commute:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v11, a1
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v11, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
@@ -267,8 +272,9 @@ define <vscale x 2 x float> @vfnmacc_vf_nxv2f32_unmasked(<vscale x 2 x half> %a,
 ; ZVFHMIN-LABEL: vfnmacc_vf_nxv2f32_unmasked:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v10, a1
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
@@ -349,8 +355,9 @@ define <vscale x 4 x float> @vfnmacc_vf_nxv4f32(<vscale x 4 x half> %a, half %b,
 ; ZVFHMIN-LABEL: vfnmacc_vf_nxv4f32:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v12, a1
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v8, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
@@ -378,8 +385,9 @@ define <vscale x 4 x float> @vfnmacc_vf_nxv4f32_commute(<vscale x 4 x half> %a,
 ; ZVFHMIN-LABEL: vfnmacc_vf_nxv4f32_commute:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v9, a1
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v9, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
@@ -407,8 +415,9 @@ define <vscale x 4 x float> @vfnmacc_vf_nxv4f32_unmasked(<vscale x 4 x half> %a,
 ; ZVFHMIN-LABEL: vfnmacc_vf_nxv4f32_unmasked:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v12, a1
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
@@ -489,8 +498,9 @@ define <vscale x 8 x float> @vfnmacc_vf_nxv8f32(<vscale x 8 x half> %a, half %b,
 ; ZVFHMIN-LABEL: vfnmacc_vf_nxv8f32:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v16, a1
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v8, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
@@ -518,8 +528,9 @@ define <vscale x 8 x float> @vfnmacc_vf_nxv8f32_commute(<vscale x 8 x half> %a,
 ; ZVFHMIN-LABEL: vfnmacc_vf_nxv8f32_commute:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v10, a1
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
@@ -547,8 +558,9 @@ define <vscale x 8 x float> @vfnmacc_vf_nxv8f32_unmasked(<vscale x 8 x half> %a,
 ; ZVFHMIN-LABEL: vfnmacc_vf_nxv8f32_unmasked:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v16, a1
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
@@ -643,8 +655,9 @@ define <vscale x 16 x float> @vfnmacc_vf_nxv16f32(<vscale x 16 x half> %a, half
 ; ZVFHMIN-LABEL: vfnmacc_vf_nxv16f32:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v4, a1
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v8, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v4, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
@@ -672,8 +685,9 @@ define <vscale x 16 x float> @vfnmacc_vf_nxv16f32_commute(<vscale x 16 x half> %
 ; ZVFHMIN-LABEL: vfnmacc_vf_nxv16f32_commute:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v4, a1
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v8, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v4, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
@@ -701,8 +715,9 @@ define <vscale x 16 x float> @vfnmacc_vf_nxv16f32_unmasked(<vscale x 16 x half>
 ; ZVFHMIN-LABEL: vfnmacc_vf_nxv16f32_unmasked:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v24, a1
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v24
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwnmsac-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfwnmsac-vp.ll
index 692a22bde48822..0afbe58038c76f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwnmsac-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwnmsac-vp.ll
@@ -69,8 +69,9 @@ define <vscale x 1 x float> @vfnmsac_vf_nxv1f32(<vscale x 1 x half> %a, half %b,
 ; ZVFHMIN-LABEL: vfnmsac_vf_nxv1f32:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v10, a1
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
@@ -96,8 +97,9 @@ define <vscale x 1 x float> @vfnmsac_vf_nxv1f32_commute(<vscale x 1 x half> %a,
 ; ZVFHMIN-LABEL: vfnmsac_vf_nxv1f32_commute:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v11, a1
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v11, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
@@ -124,8 +126,9 @@ define <vscale x 1 x float> @vfnmsac_vf_nxv1f32_unmasked(<vscale x 1 x half> %a,
 ; ZVFHMIN-LABEL: vfnmsac_vf_nxv1f32_unmasked:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v10, a1
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
@@ -201,8 +204,9 @@ define <vscale x 2 x float> @vfnmsac_vf_nxv2f32(<vscale x 2 x half> %a, half %b,
 ; ZVFHMIN-LABEL: vfnmsac_vf_nxv2f32:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v10, a1
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
@@ -228,8 +232,9 @@ define <vscale x 2 x float> @vfnmsac_vf_nxv2f32_commute(<vscale x 2 x half> %a,
 ; ZVFHMIN-LABEL: vfnmsac_vf_nxv2f32_commute:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v11, a1
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v11, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
@@ -256,8 +261,9 @@ define <vscale x 2 x float> @vfnmsac_vf_nxv2f32_unmasked(<vscale x 2 x half> %a,
 ; ZVFHMIN-LABEL: vfnmsac_vf_nxv2f32_unmasked:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v10, a1
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
@@ -335,8 +341,9 @@ define <vscale x 4 x float> @vfnmsac_vf_nxv4f32(<vscale x 4 x half> %a, half %b,
 ; ZVFHMIN-LABEL: vfnmsac_vf_nxv4f32:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v12, a1
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v8, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
@@ -362,8 +369,9 @@ define <vscale x 4 x float> @vfnmsac_vf_nxv4f32_commute(<vscale x 4 x half> %a,
 ; ZVFHMIN-LABEL: vfnmsac_vf_nxv4f32_commute:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v9, a1
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v9, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
@@ -390,8 +398,9 @@ define <vscale x 4 x float> @vfnmsac_vf_nxv4f32_unmasked(<vscale x 4 x half> %a,
 ; ZVFHMIN-LABEL: vfnmsac_vf_nxv4f32_unmasked:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v12, a1
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
@@ -469,8 +478,9 @@ define <vscale x 8 x float> @vfnmsac_vf_nxv8f32(<vscale x 8 x half> %a, half %b,
 ; ZVFHMIN-LABEL: vfnmsac_vf_nxv8f32:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v16, a1
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v8, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
@@ -496,8 +506,9 @@ define <vscale x 8 x float> @vfnmsac_vf_nxv8f32_commute(<vscale x 8 x half> %a,
 ; ZVFHMIN-LABEL: vfnmsac_vf_nxv8f32_commute:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v10, a1
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
@@ -524,8 +535,9 @@ define <vscale x 8 x float> @vfnmsac_vf_nxv8f32_unmasked(<vscale x 8 x half> %a,
 ; ZVFHMIN-LABEL: vfnmsac_vf_nxv8f32_unmasked:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v16, a1
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
@@ -617,8 +629,9 @@ define <vscale x 16 x float> @vfnmsac_vf_nxv16f32(<vscale x 16 x half> %a, half
 ; ZVFHMIN-LABEL: vfnmsac_vf_nxv16f32:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v4, a1
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v8, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v4, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
@@ -644,8 +657,9 @@ define <vscale x 16 x float> @vfnmsac_vf_nxv16f32_commute(<vscale x 16 x half> %
 ; ZVFHMIN-LABEL: vfnmsac_vf_nxv16f32_commute:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v4, a1
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v8, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v4, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
@@ -672,8 +686,9 @@ define <vscale x 16 x float> @vfnmsac_vf_nxv16f32_unmasked(<vscale x 16 x half>
 ; ZVFHMIN-LABEL: vfnmsac_vf_nxv16f32_unmasked:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v24, a1
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v24
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
index afeeb095b22769..8bf4772234a2cf 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
@@ -147,7 +147,7 @@ define <vscale x 4 x i32> @vrsub_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) {
 }
 
 define <vscale x 4 x i64> @vwaddu_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) {
-; NOVLOPT-LABEL: vwaddu.vv:
+; NOVLOPT-LABEL: vwaddu_vv:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
 ; NOVLOPT-NEXT:    vwaddu.vv v12, v8, v10
@@ -155,7 +155,7 @@ define <vscale x 4 x i64> @vwaddu_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %
 ; NOVLOPT-NEXT:    vadd.vv v8, v12, v12
 ; NOVLOPT-NEXT:    ret
 ;
-; VLOPT-LABEL: vwaddu.vv:
+; VLOPT-LABEL: vwaddu_vv:
 ; VLOPT:       # %bb.0:
 ; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
 ; VLOPT-NEXT:    vwaddu.vv v12, v8, v10
@@ -168,7 +168,7 @@ define <vscale x 4 x i64> @vwaddu_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %
 }
 
 define <vscale x 4 x i64> @vwaddu_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) {
-; NOVLOPT-LABEL: vwaddu.vx:
+; NOVLOPT-LABEL: vwaddu_vx:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
 ; NOVLOPT-NEXT:    vwaddu.vx v12, v8, a0
@@ -176,7 +176,7 @@ define <vscale x 4 x i64> @vwaddu_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) {
 ; NOVLOPT-NEXT:    vadd.vv v8, v12, v12
 ; NOVLOPT-NEXT:    ret
 ;
-; VLOPT-LABEL: vwaddu.vx:
+; VLOPT-LABEL: vwaddu_vx:
 ; VLOPT:       # %bb.0:
 ; VLOPT-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
 ; VLOPT-NEXT:    vwaddu.vx v12, v8, a0
@@ -189,7 +189,7 @@ define <vscale x 4 x i64> @vwaddu_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) {
 }
 
 define <vscale x 4 x i64> @vwsubu_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) {
-; NOVLOPT-LABEL: vwsubu.vv:
+; NOVLOPT-LABEL: vwsubu_vv:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
 ; NOVLOPT-NEXT:    vwsubu.vv v12, v8, v10
@@ -197,7 +197,7 @@ define <vscale x 4 x i64> @vwsubu_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %
 ; NOVLOPT-NEXT:    vadd.vv v8, v12, v12
 ; NOVLOPT-NEXT:    ret
 ;
-; VLOPT-LABEL: vwsubu.vv:
+; VLOPT-LABEL: vwsubu_vv:
 ; VLOPT:       # %bb.0:
 ; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
 ; VLOPT-NEXT:    vwsubu.vv v12, v8, v10
@@ -210,7 +210,7 @@ define <vscale x 4 x i64> @vwsubu_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %
 }
 
 define <vscale x 4 x i64> @vwsubu_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) {
-; NOVLOPT-LABEL: vwsubu.vx:
+; NOVLOPT-LABEL: vwsubu_vx:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
 ; NOVLOPT-NEXT:    vwsubu.vx v12, v8, a0
@@ -218,7 +218,7 @@ define <vscale x 4 x i64> @vwsubu_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) {
 ; NOVLOPT-NEXT:    vadd.vv v8, v12, v12
 ; NOVLOPT-NEXT:    ret
 ;
-; VLOPT-LABEL: vwsubu.vx:
+; VLOPT-LABEL: vwsubu_vx:
 ; VLOPT:       # %bb.0:
 ; VLOPT-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
 ; VLOPT-NEXT:    vwsubu.vx v12, v8, a0
@@ -231,7 +231,7 @@ define <vscale x 4 x i64> @vwsubu_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) {
 }
 
 define <vscale x 4 x i64> @vwadd_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) {
-; NOVLOPT-LABEL: vwadd.vv:
+; NOVLOPT-LABEL: vwadd_vv:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
 ; NOVLOPT-NEXT:    vwadd.vv v12, v8, v10
@@ -239,7 +239,7 @@ define <vscale x 4 x i64> @vwadd_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b
 ; NOVLOPT-NEXT:    vadd.vv v8, v12, v12
 ; NOVLOPT-NEXT:    ret
 ;
-; VLOPT-LABEL: vwadd.vv:
+; VLOPT-LABEL: vwadd_vv:
 ; VLOPT:       # %bb.0:
 ; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
 ; VLOPT-NEXT:    vwadd.vv v12, v8, v10
@@ -252,7 +252,7 @@ define <vscale x 4 x i64> @vwadd_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b
 }
 
 define <vscale x 4 x i64> @vwadd_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) {
-; NOVLOPT-LABEL: vwadd.vx:
+; NOVLOPT-LABEL: vwadd_vx:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
 ; NOVLOPT-NEXT:    vwadd.vx v12, v8, a0
@@ -260,7 +260,7 @@ define <vscale x 4 x i64> @vwadd_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) {
 ; NOVLOPT-NEXT:    vadd.vv v8, v12, v12
 ; NOVLOPT-NEXT:    ret
 ;
-; VLOPT-LABEL: vwadd.vx:
+; VLOPT-LABEL: vwadd_vx:
 ; VLOPT:       # %bb.0:
 ; VLOPT-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
 ; VLOPT-NEXT:    vwadd.vx v12, v8, a0
@@ -273,7 +273,7 @@ define <vscale x 4 x i64> @vwadd_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) {
 }
 
 define <vscale x 4 x i64> @vwsub_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) {
-; NOVLOPT-LABEL: vwsub.vv:
+; NOVLOPT-LABEL: vwsub_vv:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
 ; NOVLOPT-NEXT:    vwsub.vv v12, v8, v10
@@ -281,7 +281,7 @@ define <vscale x 4 x i64> @vwsub_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b
 ; NOVLOPT-NEXT:    vadd.vv v8, v12, v12
 ; NOVLOPT-NEXT:    ret
 ;
-; VLOPT-LABEL: vwsub.vv:
+; VLOPT-LABEL: vwsub_vv:
 ; VLOPT:       # %bb.0:
 ; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
 ; VLOPT-NEXT:    vwsub.vv v12, v8, v10
@@ -294,7 +294,7 @@ define <vscale x 4 x i64> @vwsub_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b
 }
 
 define <vscale x 4 x i64> @vwsub_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) {
-; NOVLOPT-LABEL: vwsub.vx:
+; NOVLOPT-LABEL: vwsub_vx:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
 ; NOVLOPT-NEXT:    vwsub.vx v12, v8, a0
@@ -302,7 +302,7 @@ define <vscale x 4 x i64> @vwsub_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) {
 ; NOVLOPT-NEXT:    vadd.vv v8, v12, v12
 ; NOVLOPT-NEXT:    ret
 ;
-; VLOPT-LABEL: vwsub.vx:
+; VLOPT-LABEL: vwsub_vx:
 ; VLOPT:       # %bb.0:
 ; VLOPT-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
 ; VLOPT-NEXT:    vwsub.vx v12, v8, a0
@@ -315,7 +315,7 @@ define <vscale x 4 x i64> @vwsub_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) {
 }
 
 define <vscale x 4 x i64> @vwaddu_wv(<vscale x 4 x i64> %a, <vscale x 4 x i32> %b, iXLen %vl) {
-; NOVLOPT-LABEL: vwaddu.wv:
+; NOVLOPT-LABEL: vwaddu_wv:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
 ; NOVLOPT-NEXT:    vwaddu.wv v8, v8, v12
@@ -323,7 +323,7 @@ define <vscale x 4 x i64> @vwaddu_wv(<vscale x 4 x i64> %a, <vscale x 4 x i32> %
 ; NOVLOPT-NEXT:    vadd.vv v8, v8, v8
 ; NOVLOPT-NEXT:    ret
 ;
-; VLOPT-LABEL: vwaddu.wv:
+; VLOPT-LABEL: vwaddu_wv:
 ; VLOPT:       # %bb.0:
 ; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
 ; VLOPT-NEXT:    vwaddu.wv v8, v8, v12
@@ -336,7 +336,7 @@ define <vscale x 4 x i64> @vwaddu_wv(<vscale x 4 x i64> %a, <vscale x 4 x i32> %
 }
 
 define <vscale x 4 x i64> @vwaddu_wx(<vscale x 4 x i64> %a, i32 %b, iXLen %vl) {
-; NOVLOPT-LABEL: vwaddu.wx:
+; NOVLOPT-LABEL: vwaddu_wx:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
 ; NOVLOPT-NEXT:    vwaddu.wx v8, v8, a0
@@ -344,7 +344,7 @@ define <vscale x 4 x i64> @vwaddu_wx(<vscale x 4 x i64> %a, i32 %b, iXLen %vl) {
 ; NOVLOPT-NEXT:    vadd.vv v8, v8, v8
 ; NOVLOPT-NEXT:    ret
 ;
-; VLOPT-LABEL: vwaddu.wx:
+; VLOPT-LABEL: vwaddu_wx:
 ; VLOPT:       # %bb.0:
 ; VLOPT-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
 ; VLOPT-NEXT:    vwaddu.wx v8, v8, a0
@@ -357,7 +357,7 @@ define <vscale x 4 x i64> @vwaddu_wx(<vscale x 4 x i64> %a, i32 %b, iXLen %vl) {
 }
 
 define <vscale x 4 x i64> @vwsubu_wv(<vscale x 4 x i64> %a, <vscale x 4 x i32> %b, iXLen %vl) {
-; NOVLOPT-LABEL: vwsubu.wv:
+; NOVLOPT-LABEL: vwsubu_wv:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
 ; NOVLOPT-NEXT:    vwsubu.wv v8, v8, v12
@@ -365,7 +365,7 @@ define <vscale x 4 x i64> @vwsubu_wv(<vscale x 4 x i64> %a, <vscale x 4 x i32> %
 ; NOVLOPT-NEXT:    vadd.vv v8, v8, v8
 ; NOVLOPT-NEXT:    ret
 ;
-; VLOPT-LABEL: vwsubu.wv:
+; VLOPT-LABEL: vwsubu_wv:
 ; VLOPT:       # %bb.0:
 ; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
 ; VLOPT-NEXT:    vwsubu.wv v8, v8, v12
@@ -378,7 +378,7 @@ define <vscale x 4 x i64> @vwsubu_wv(<vscale x 4 x i64> %a, <vscale x 4 x i32> %
 }
 
 define <vscale x 4 x i64> @vwsubu_wx(<vscale x 4 x i64> %a, i32 %b, iXLen %vl) {
-; NOVLOPT-LABEL: vwsubu.wx:
+; NOVLOPT-LABEL: vwsubu_wx:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
 ; NOVLOPT-NEXT:    vwsubu.wx v8, v8, a0
@@ -386,7 +386,7 @@ define <vscale x 4 x i64> @vwsubu_wx(<vscale x 4 x i64> %a, i32 %b, iXLen %vl) {
 ; NOVLOPT-NEXT:    vadd.vv v8, v8, v8
 ; NOVLOPT-NEXT:    ret
 ;
-; VLOPT-LABEL: vwsubu.wx:
+; VLOPT-LABEL: vwsubu_wx:
 ; VLOPT:       # %bb.0:
 ; VLOPT-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
 ; VLOPT-NEXT:    vwsubu.wx v8, v8, a0
@@ -399,7 +399,7 @@ define <vscale x 4 x i64> @vwsubu_wx(<vscale x 4 x i64> %a, i32 %b, iXLen %vl) {
 }
 
 define <vscale x 4 x i64> @vwadd_wv(<vscale x 4 x i64> %a, <vscale x 4 x i32> %b, iXLen %vl) {
-; NOVLOPT-LABEL: vwadd.wv:
+; NOVLOPT-LABEL: vwadd_wv:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
 ; NOVLOPT-NEXT:    vwadd.wv v8, v8, v12
@@ -407,7 +407,7 @@ define <vscale x 4 x i64> @vwadd_wv(<vscale x 4 x i64> %a, <vscale x 4 x i32> %b
 ; NOVLOPT-NEXT:    vadd.vv v8, v8, v8
 ; NOVLOPT-NEXT:    ret
 ;
-; VLOPT-LABEL: vwadd.wv:
+; VLOPT-LABEL: vwadd_wv:
 ; VLOPT:       # %bb.0:
 ; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
 ; VLOPT-NEXT:    vwadd.wv v8, v8, v12
@@ -420,7 +420,7 @@ define <vscale x 4 x i64> @vwadd_wv(<vscale x 4 x i64> %a, <vscale x 4 x i32> %b
 }
 
 define <vscale x 4 x i64> @vwadd_wx(<vscale x 4 x i64> %a, i32 %b, iXLen %vl) {
-; NOVLOPT-LABEL: vwadd.wx:
+; NOVLOPT-LABEL: vwadd_wx:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
 ; NOVLOPT-NEXT:    vwadd.wx v8, v8, a0
@@ -428,7 +428,7 @@ define <vscale x 4 x i64> @vwadd_wx(<vscale x 4 x i64> %a, i32 %b, iXLen %vl) {
 ; NOVLOPT-NEXT:    vadd.vv v8, v8, v8
 ; NOVLOPT-NEXT:    ret
 ;
-; VLOPT-LABEL: vwadd.wx:
+; VLOPT-LABEL: vwadd_wx:
 ; VLOPT:       # %bb.0:
 ; VLOPT-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
 ; VLOPT-NEXT:    vwadd.wx v8, v8, a0
@@ -441,7 +441,7 @@ define <vscale x 4 x i64> @vwadd_wx(<vscale x 4 x i64> %a, i32 %b, iXLen %vl) {
 }
 
 define <vscale x 4 x i64> @vwsub_wv(<vscale x 4 x i64> %a, <vscale x 4 x i32> %b, iXLen %vl) {
-; NOVLOPT-LABEL: vwsub.wv:
+; NOVLOPT-LABEL: vwsub_wv:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
 ; NOVLOPT-NEXT:    vwsub.wv v8, v8, v12
@@ -449,7 +449,7 @@ define <vscale x 4 x i64> @vwsub_wv(<vscale x 4 x i64> %a, <vscale x 4 x i32> %b
 ; NOVLOPT-NEXT:    vadd.vv v8, v8, v8
 ; NOVLOPT-NEXT:    ret
 ;
-; VLOPT-LABEL: vwsub.wv:
+; VLOPT-LABEL: vwsub_wv:
 ; VLOPT:       # %bb.0:
 ; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
 ; VLOPT-NEXT:    vwsub.wv v8, v8, v12
@@ -462,7 +462,7 @@ define <vscale x 4 x i64> @vwsub_wv(<vscale x 4 x i64> %a, <vscale x 4 x i32> %b
 }
 
 define <vscale x 4 x i64> @vwsub_wx(<vscale x 4 x i64> %a, i32 %b, iXLen %vl) {
-; NOVLOPT-LABEL: vwsub.wx:
+; NOVLOPT-LABEL: vwsub_wx:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
 ; NOVLOPT-NEXT:    vwsub.wx v8, v8, a0
@@ -470,7 +470,7 @@ define <vscale x 4 x i64> @vwsub_wx(<vscale x 4 x i64> %a, i32 %b, iXLen %vl) {
 ; NOVLOPT-NEXT:    vadd.vv v8, v8, v8
 ; NOVLOPT-NEXT:    ret
 ;
-; VLOPT-LABEL: vwsub.wx:
+; VLOPT-LABEL: vwsub_wx:
 ; VLOPT:       # %bb.0:
 ; VLOPT-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
 ; VLOPT-NEXT:    vwsub.wx v8, v8, a0
@@ -483,7 +483,7 @@ define <vscale x 4 x i64> @vwsub_wx(<vscale x 4 x i64> %a, i32 %b, iXLen %vl) {
 }
 
 define <vscale x 4 x i32> @vsext_vf2(<vscale x 4 x i16> %a, <vscale x 4 x i32> %b, iXLen %vl) {
-; NOVLOPT-LABEL: vsext.vf2:
+; NOVLOPT-LABEL: vsext_vf2:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
 ; NOVLOPT-NEXT:    vsext.vf2 v12, v8
@@ -491,7 +491,7 @@ define <vscale x 4 x i32> @vsext_vf2(<vscale x 4 x i16> %a, <vscale x 4 x i32> %
 ; NOVLOPT-NEXT:    vadd.vv v8, v12, v10
 ; NOVLOPT-NEXT:    ret
 ;
-; VLOPT-LABEL: vsext.vf2:
+; VLOPT-LABEL: vsext_vf2:
 ; VLOPT:       # %bb.0:
 ; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
 ; VLOPT-NEXT:    vsext.vf2 v12, v8
@@ -503,7 +503,7 @@ define <vscale x 4 x i32> @vsext_vf2(<vscale x 4 x i16> %a, <vscale x 4 x i32> %
 }
 
 define <vscale x 4 x i32> @vsext_vf4(<vscale x 4 x i8> %a, <vscale x 4 x i32> %b, iXLen %vl) {
-; NOVLOPT-LABEL: vsext.vf4:
+; NOVLOPT-LABEL: vsext_vf4:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
 ; NOVLOPT-NEXT:    vsext.vf4 v12, v8
@@ -511,7 +511,7 @@ define <vscale x 4 x i32> @vsext_vf4(<vscale x 4 x i8> %a, <vscale x 4 x i32> %b
 ; NOVLOPT-NEXT:    vadd.vv v8, v12, v10
 ; NOVLOPT-NEXT:    ret
 ;
-; VLOPT-LABEL: vsext.vf4:
+; VLOPT-LABEL: vsext_vf4:
 ; VLOPT:       # %bb.0:
 ; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
 ; VLOPT-NEXT:    vsext.vf4 v12, v8
@@ -523,7 +523,7 @@ define <vscale x 4 x i32> @vsext_vf4(<vscale x 4 x i8> %a, <vscale x 4 x i32> %b
 }
 
 define <vscale x 4 x i64> @vsext_vf8(<vscale x 4 x i8> %a, <vscale x 4 x i64> %b, iXLen %vl) {
-; NOVLOPT-LABEL: vsext.vf8:
+; NOVLOPT-LABEL: vsext_vf8:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetvli a1, zero, e64, m4, ta, ma
 ; NOVLOPT-NEXT:    vsext.vf8 v16, v8
@@ -531,7 +531,7 @@ define <vscale x 4 x i64> @vsext_vf8(<vscale x 4 x i8> %a, <vscale x 4 x i64> %b
 ; NOVLOPT-NEXT:    vadd.vv v8, v16, v12
 ; NOVLOPT-NEXT:    ret
 ;
-; VLOPT-LABEL: vsext.vf8:
+; VLOPT-LABEL: vsext_vf8:
 ; VLOPT:       # %bb.0:
 ; VLOPT-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
 ; VLOPT-NEXT:    vsext.vf8 v16, v8
@@ -543,7 +543,7 @@ define <vscale x 4 x i64> @vsext_vf8(<vscale x 4 x i8> %a, <vscale x 4 x i64> %b
 }
 
 define <vscale x 4 x i32> @vzext_vf2(<vscale x 4 x i16> %a, <vscale x 4 x i32> %b, iXLen %vl) {
-; NOVLOPT-LABEL: vzext.vf2:
+; NOVLOPT-LABEL: vzext_vf2:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
 ; NOVLOPT-NEXT:    vzext.vf2 v12, v8
@@ -551,7 +551,7 @@ define <vscale x 4 x i32> @vzext_vf2(<vscale x 4 x i16> %a, <vscale x 4 x i32> %
 ; NOVLOPT-NEXT:    vadd.vv v8, v12, v10
 ; NOVLOPT-NEXT:    ret
 ;
-; VLOPT-LABEL: vzext.vf2:
+; VLOPT-LABEL: vzext_vf2:
 ; VLOPT:       # %bb.0:
 ; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
 ; VLOPT-NEXT:    vzext.vf2 v12, v8
@@ -563,7 +563,7 @@ define <vscale x 4 x i32> @vzext_vf2(<vscale x 4 x i16> %a, <vscale x 4 x i32> %
 }
 
 define <vscale x 4 x i32> @vzext_vf4(<vscale x 4 x i8> %a, <vscale x 4 x i32> %b, iXLen %vl) {
-; NOVLOPT-LABEL: vzext.vf4:
+; NOVLOPT-LABEL: vzext_vf4:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
 ; NOVLOPT-NEXT:    vzext.vf4 v12, v8
@@ -571,7 +571,7 @@ define <vscale x 4 x i32> @vzext_vf4(<vscale x 4 x i8> %a, <vscale x 4 x i32> %b
 ; NOVLOPT-NEXT:    vadd.vv v8, v12, v10
 ; NOVLOPT-NEXT:    ret
 ;
-; VLOPT-LABEL: vzext.vf4:
+; VLOPT-LABEL: vzext_vf4:
 ; VLOPT:       # %bb.0:
 ; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
 ; VLOPT-NEXT:    vzext.vf4 v12, v8
@@ -583,7 +583,7 @@ define <vscale x 4 x i32> @vzext_vf4(<vscale x 4 x i8> %a, <vscale x 4 x i32> %b
 }
 
 define <vscale x 4 x i64> @vzext_vf8(<vscale x 4 x i8> %a, <vscale x 4 x i64> %b, iXLen %vl) {
-; NOVLOPT-LABEL: vzext.vf8:
+; NOVLOPT-LABEL: vzext_vf8:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetvli a1, zero, e64, m4, ta, ma
 ; NOVLOPT-NEXT:    vzext.vf8 v16, v8
@@ -591,7 +591,7 @@ define <vscale x 4 x i64> @vzext_vf8(<vscale x 4 x i8> %a, <vscale x 4 x i64> %b
 ; NOVLOPT-NEXT:    vadd.vv v8, v16, v12
 ; NOVLOPT-NEXT:    ret
 ;
-; VLOPT-LABEL: vzext.vf8:
+; VLOPT-LABEL: vzext_vf8:
 ; VLOPT:       # %bb.0:
 ; VLOPT-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
 ; VLOPT-NEXT:    vzext.vf8 v16, v8
@@ -847,7 +847,7 @@ define <vscale x 4 x i32> @vwmaccu_vx(<vscale x 4 x i16> %a, i16 %b, iXLen %vl)
 }
 
 define <vscale x 4 x i32> @vmv_v_i(<vscale x 4 x i32> %a, i32 %x, iXLen %vl) {
-; NOVLOPT-LABEL: vmv.v.i:
+; NOVLOPT-LABEL: vmv_v_i:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
 ; NOVLOPT-NEXT:    vmv.v.i v10, 5
@@ -855,7 +855,7 @@ define <vscale x 4 x i32> @vmv_v_i(<vscale x 4 x i32> %a, i32 %x, iXLen %vl) {
 ; NOVLOPT-NEXT:    vadd.vv v8, v10, v8
 ; NOVLOPT-NEXT:    ret
 ;
-; VLOPT-LABEL: vmv.v.i:
+; VLOPT-LABEL: vmv_v_i:
 ; VLOPT:       # %bb.0:
 ; VLOPT-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
 ; VLOPT-NEXT:    vmv.v.i v10, 5
@@ -867,7 +867,7 @@ define <vscale x 4 x i32> @vmv_v_i(<vscale x 4 x i32> %a, i32 %x, iXLen %vl) {
 }
 
 define <vscale x 4 x i32> @vmv_v_x(<vscale x 4 x i32> %a, i32 %x, iXLen %vl) {
-; NOVLOPT-LABEL: vmv.v.x:
+; NOVLOPT-LABEL: vmv_v_x:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
 ; NOVLOPT-NEXT:    vmv.v.x v10, a0
@@ -875,7 +875,7 @@ define <vscale x 4 x i32> @vmv_v_x(<vscale x 4 x i32> %a, i32 %x, iXLen %vl) {
 ; NOVLOPT-NEXT:    vadd.vv v8, v10, v8
 ; NOVLOPT-NEXT:    ret
 ;
-; VLOPT-LABEL: vmv.v.x:
+; VLOPT-LABEL: vmv_v_x:
 ; VLOPT:       # %bb.0:
 ; VLOPT-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
 ; VLOPT-NEXT:    vmv.v.x v10, a0
@@ -887,7 +887,7 @@ define <vscale x 4 x i32> @vmv_v_x(<vscale x 4 x i32> %a, i32 %x, iXLen %vl) {
 }
 
 define <vscale x 4 x i32> @vwsll_vi(<vscale x 4 x i16> %a, <vscale x 4 x i32> %b, iXLen %vl) {
-; NOVLOPT-LABEL: vwsll.vi:
+; NOVLOPT-LABEL: vwsll_vi:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
 ; NOVLOPT-NEXT:    vwsll.vi v12, v8, 1
@@ -895,7 +895,7 @@ define <vscale x 4 x i32> @vwsll_vi(<vscale x 4 x i16> %a, <vscale x 4 x i32> %b
 ; NOVLOPT-NEXT:    vadd.vv v8, v12, v10
 ; NOVLOPT-NEXT:    ret
 ;
-; VLOPT-LABEL: vwsll.vi:
+; VLOPT-LABEL: vwsll_vi:
 ; VLOPT:       # %bb.0:
 ; VLOPT-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; VLOPT-NEXT:    vwsll.vi v12, v8, 1
diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.ll
index 4f24da3d861afe..50a59073213b99 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.ll
@@ -1,6 +1,9 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
-; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvl512b -verify-machineinstrs | FileCheck %s
-; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvl512b -verify-machineinstrs | FileCheck %s
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvl512b -verify-machineinstrs | FileCheck %s -check-prefixes=CHECK,NOVLOPT
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvl512b -verify-machineinstrs | FileCheck %s -check-prefixes=CHECK,NOVLOPT
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvl512b -riscv-enable-vl-optimizer -verify-machineinstrs | FileCheck %s -check-prefixes=CHECK,VLOPT
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvl512b -riscv-enable-vl-optimizer -verify-machineinstrs | FileCheck %s -check-prefixes=CHECK,VLOPT
+
 
 define <2 x i32> @vdot_lane_s32(<2 x i32> noundef %var_1, <8 x i8> noundef %var_3, <8 x i8> noundef %var_5, <8 x i16> %x) {
 ; CHECK-LABEL: vdot_lane_s32:
@@ -34,12 +37,20 @@ declare <vscale x 2 x i16> @llvm.riscv.vnsrl.nxv2i16.nxv2i32.nxv2i16(
   iXLen);
 
 define <vscale x 2 x i16> @intrinsic_vnsrl_wv_nxv2i16_nxv2i32_nxv2i16(<vscale x 2 x i16> %a, <vscale x 2 x i16> %b, iXLen %2, <vscale x 2 x i32> %3, <vscale x 2 x i32> %4, <vscale x 2 x i16> %z) nounwind {
-; CHECK-LABEL: intrinsic_vnsrl_wv_nxv2i16_nxv2i32_nxv2i16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; CHECK-NEXT:    vwadd.vv v10, v8, v9
-; CHECK-NEXT:    vnsrl.wv v8, v10, v12
-; CHECK-NEXT:    ret
+; NOVLOPT-LABEL: intrinsic_vnsrl_wv_nxv2i16_nxv2i32_nxv2i16:
+; NOVLOPT:       # %bb.0: # %entry
+; NOVLOPT-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
+; NOVLOPT-NEXT:    vwadd.vv v10, v8, v9
+; NOVLOPT-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; NOVLOPT-NEXT:    vnsrl.wv v8, v10, v12
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: intrinsic_vnsrl_wv_nxv2i16_nxv2i32_nxv2i16:
+; VLOPT:       # %bb.0: # %entry
+; VLOPT-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; VLOPT-NEXT:    vwadd.vv v10, v8, v9
+; VLOPT-NEXT:    vnsrl.wv v8, v10, v12
+; VLOPT-NEXT:    ret
 entry:
   %c = sext <vscale x 2 x i16> %a to <vscale x 2 x i32>
   %d = sext <vscale x 2 x i16> %b to <vscale x 2 x i32>
@@ -60,13 +71,22 @@ declare <vscale x 2 x i16> @llvm.riscv.vnclip.nxv2i16.nxv2i32.nxv2i16(
   iXLen, iXLen);
 
 define <vscale x 2 x i16> @vnclip(<vscale x 2 x i16> %a, <vscale x 2 x i16> %b, iXLen %2, <vscale x 2 x i32> %3, <vscale x 2 x i32> %4, <vscale x 2 x i16> %z) nounwind {
-; CHECK-LABEL: vnclip:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; CHECK-NEXT:    vwadd.vv v10, v8, v9
-; CHECK-NEXT:    csrwi vxrm, 0
-; CHECK-NEXT:    vnclip.wv v8, v10, v12
-; CHECK-NEXT:    ret
+; NOVLOPT-LABEL: vnclip:
+; NOVLOPT:       # %bb.0: # %entry
+; NOVLOPT-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
+; NOVLOPT-NEXT:    vwadd.vv v10, v8, v9
+; NOVLOPT-NEXT:    csrwi vxrm, 0
+; NOVLOPT-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; NOVLOPT-NEXT:    vnclip.wv v8, v10, v12
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vnclip:
+; VLOPT:       # %bb.0: # %entry
+; VLOPT-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; VLOPT-NEXT:    vwadd.vv v10, v8, v9
+; VLOPT-NEXT:    csrwi vxrm, 0
+; VLOPT-NEXT:    vnclip.wv v8, v10, v12
+; VLOPT-NEXT:    ret
 entry:
   %c = sext <vscale x 2 x i16> %a to <vscale x 2 x i32>
   %d = sext <vscale x 2 x i16> %b to <vscale x 2 x i32>
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmax-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vmax-vp.ll
index cc02f54cda21a1..f65e708f5303cc 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmax-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmax-vp.ll
@@ -12,7 +12,9 @@ define <vscale x 8 x i7> @vmax_vx_nxv8i7(<vscale x 8 x i7> %a, i7 signext %b, <v
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
 ; CHECK-NEXT:    vsll.vi v8, v8, 1, v0.t
 ; CHECK-NEXT:    vsra.vi v8, v8, 1, v0.t
+; CHECK-NEXT:    vsetvli a2, zero, e8, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
+; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
 ; CHECK-NEXT:    vsll.vi v9, v9, 1, v0.t
 ; CHECK-NEXT:    vsra.vi v9, v9, 1, v0.t
 ; CHECK-NEXT:    vmax.vv v8, v8, v9, v0.t
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmaxu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vmaxu-vp.ll
index 45dc63442f157f..df1ad58e5ecbde 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmaxu-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmaxu-vp.ll
@@ -12,7 +12,9 @@ define <vscale x 8 x i7> @vmaxu_vx_nxv8i7(<vscale x 8 x i7> %a, i7 signext %b, <
 ; CHECK-NEXT:    li a2, 127
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
 ; CHECK-NEXT:    vand.vx v8, v8, a2, v0.t
+; CHECK-NEXT:    vsetvli a3, zero, e8, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
+; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
 ; CHECK-NEXT:    vand.vx v9, v9, a2, v0.t
 ; CHECK-NEXT:    vmaxu.vv v8, v8, v9, v0.t
 ; CHECK-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll
index f1edd99aa0b0dc..0bf0638633aa45 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll
@@ -12,7 +12,9 @@ define <vscale x 8 x i7> @vmin_vx_nxv8i7(<vscale x 8 x i7> %a, i7 signext %b, <v
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
 ; CHECK-NEXT:    vsll.vi v8, v8, 1, v0.t
 ; CHECK-NEXT:    vsra.vi v8, v8, 1, v0.t
+; CHECK-NEXT:    vsetvli a2, zero, e8, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
+; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
 ; CHECK-NEXT:    vsll.vi v9, v9, 1, v0.t
 ; CHECK-NEXT:    vsra.vi v9, v9, 1, v0.t
 ; CHECK-NEXT:    vmin.vv v8, v8, v9, v0.t
diff --git a/llvm/test/CodeGen/RISCV/rvv/vminu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vminu-vp.ll
index 9929cc3b784556..2acebdf2e646d4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vminu-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vminu-vp.ll
@@ -12,7 +12,9 @@ define <vscale x 8 x i7> @vminu_vx_nxv8i7(<vscale x 8 x i7> %a, i7 signext %b, <
 ; CHECK-NEXT:    li a2, 127
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
 ; CHECK-NEXT:    vand.vx v8, v8, a2, v0.t
+; CHECK-NEXT:    vsetvli a3, zero, e8, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
+; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
 ; CHECK-NEXT:    vand.vx v9, v9, a2, v0.t
 ; CHECK-NEXT:    vminu.vv v8, v8, v9, v0.t
 ; CHECK-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmul-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vmul-vp.ll
index 0adee0157c8f38..51026cbcb8c4bf 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmul-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmul-vp.ll
@@ -1440,10 +1440,11 @@ define <vscale x 8 x i64> @vmul_vadd_vx_nxv8i64_unmasked(<vscale x 8 x i64> %va,
 ; CHECK-LABEL: vmul_vadd_vx_nxv8i64_unmasked:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a1, 21
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    vmv.v.x v16, a1
-; CHECK-NEXT:    li a0, 7
-; CHECK-NEXT:    vmadd.vx v8, a0, v16
+; CHECK-NEXT:    li a1, 7
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT:    vmadd.vx v8, a1, v16
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll
index 9d45b0cd078818..84c8321b5b9342 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll
@@ -235,17 +235,17 @@ define <vscale x 8 x i8> @vpgather_nxv8i8(<vscale x 8 x ptr> %ptrs, <vscale x 8
 define <vscale x 8 x i8> @vpgather_baseidx_nxv8i8(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_nxv8i8:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf4 v12, v8
-; RV32-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_nxv8i8:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf8 v16, v8
-; RV64-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
 ; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds i8, ptr %base, <vscale x 8 x i8> %idxs
@@ -268,18 +268,18 @@ define <vscale x 32 x i8> @vpgather_baseidx_nxv32i8(ptr %base, <vscale x 32 x i8
 ; RV32-NEXT:    srli a3, a3, 2
 ; RV32-NEXT:    vsetvli a5, zero, e8, mf2, ta, ma
 ; RV32-NEXT:    vslidedown.vx v0, v0, a3
-; RV32-NEXT:    vsetvli zero, a4, e32, m8, ta, ma
+; RV32-NEXT:    vsetvli a3, zero, e32, m8, ta, ma
 ; RV32-NEXT:    vsext.vf4 v16, v10
-; RV32-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
+; RV32-NEXT:    vsetvli zero, a4, e8, m2, ta, ma
 ; RV32-NEXT:    vluxei32.v v10, (a0), v16, v0.t
 ; RV32-NEXT:    bltu a1, a2, .LBB12_2
 ; RV32-NEXT:  # %bb.1:
 ; RV32-NEXT:    mv a1, a2
 ; RV32-NEXT:  .LBB12_2:
-; RV32-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e32, m8, ta, ma
 ; RV32-NEXT:    vsext.vf4 v16, v8
 ; RV32-NEXT:    vmv1r.v v0, v12
-; RV32-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
@@ -302,18 +302,18 @@ define <vscale x 32 x i8> @vpgather_baseidx_nxv32i8(ptr %base, <vscale x 32 x i8
 ; RV64-NEXT:    srli a4, a2, 3
 ; RV64-NEXT:    vsetvli a7, zero, e8, mf4, ta, ma
 ; RV64-NEXT:    vslidedown.vx v0, v13, a4
-; RV64-NEXT:    vsetvli zero, a6, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli a7, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf8 v16, v11
-; RV64-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
+; RV64-NEXT:    vsetvli zero, a6, e8, m1, ta, ma
 ; RV64-NEXT:    vluxei64.v v11, (a0), v16, v0.t
 ; RV64-NEXT:    bltu a5, a2, .LBB12_2
 ; RV64-NEXT:  # %bb.1:
 ; RV64-NEXT:    mv a5, a2
 ; RV64-NEXT:  .LBB12_2:
-; RV64-NEXT:    vsetvli zero, a5, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli a6, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf8 v16, v10
 ; RV64-NEXT:    vmv1r.v v0, v13
-; RV64-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
+; RV64-NEXT:    vsetvli zero, a5, e8, m1, ta, ma
 ; RV64-NEXT:    vluxei64.v v10, (a0), v16, v0.t
 ; RV64-NEXT:    bltu a1, a3, .LBB12_4
 ; RV64-NEXT:  # %bb.3:
@@ -325,18 +325,18 @@ define <vscale x 32 x i8> @vpgather_baseidx_nxv32i8(ptr %base, <vscale x 32 x i8
 ; RV64-NEXT:    and a3, a5, a3
 ; RV64-NEXT:    vsetvli a5, zero, e8, mf4, ta, ma
 ; RV64-NEXT:    vslidedown.vx v0, v12, a4
-; RV64-NEXT:    vsetvli zero, a3, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli a4, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf8 v16, v9
-; RV64-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
+; RV64-NEXT:    vsetvli zero, a3, e8, m1, ta, ma
 ; RV64-NEXT:    vluxei64.v v9, (a0), v16, v0.t
 ; RV64-NEXT:    bltu a1, a2, .LBB12_6
 ; RV64-NEXT:  # %bb.5:
 ; RV64-NEXT:    mv a1, a2
 ; RV64-NEXT:  .LBB12_6:
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf8 v16, v8
 ; RV64-NEXT:    vmv1r.v v0, v12
-; RV64-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
 ; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds i8, ptr %base, <vscale x 32 x i8> %idxs
@@ -529,19 +529,19 @@ define <vscale x 8 x i16> @vpgather_nxv8i16(<vscale x 8 x ptr> %ptrs, <vscale x
 define <vscale x 8 x i16> @vpgather_baseidx_nxv8i8_nxv8i16(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_nxv8i8_nxv8i16:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf4 v12, v8
 ; RV32-NEXT:    vadd.vv v12, v12, v12
-; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_nxv8i8_nxv8i16:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf8 v16, v8
 ; RV64-NEXT:    vadd.vv v16, v16, v16
-; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
 ; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i8> %idxs
@@ -552,19 +552,19 @@ define <vscale x 8 x i16> @vpgather_baseidx_nxv8i8_nxv8i16(ptr %base, <vscale x
 define <vscale x 8 x i16> @vpgather_baseidx_sext_nxv8i8_nxv8i16(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8i16:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf4 v12, v8
 ; RV32-NEXT:    vadd.vv v12, v12, v12
-; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8i16:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf8 v16, v8
 ; RV64-NEXT:    vadd.vv v16, v16, v16
-; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
 ; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
@@ -576,17 +576,17 @@ define <vscale x 8 x i16> @vpgather_baseidx_sext_nxv8i8_nxv8i16(ptr %base, <vsca
 define <vscale x 8 x i16> @vpgather_baseidx_zext_nxv8i8_nxv8i16(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8i16:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e8, m1, ta, ma
 ; RV32-NEXT:    vwaddu.vv v10, v8, v8
-; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
 ; RV32-NEXT:    vluxei16.v v8, (a0), v10, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8i16:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e8, m1, ta, ma
 ; RV64-NEXT:    vwaddu.vv v10, v8, v8
-; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
 ; RV64-NEXT:    vluxei16.v v8, (a0), v10, v0.t
 ; RV64-NEXT:    ret
   %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
@@ -598,17 +598,18 @@ define <vscale x 8 x i16> @vpgather_baseidx_zext_nxv8i8_nxv8i16(ptr %base, <vsca
 define <vscale x 8 x i16> @vpgather_baseidx_nxv8i16(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_nxv8i16:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
 ; RV32-NEXT:    vwadd.vv v12, v8, v8
+; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_nxv8i16:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf4 v16, v8
 ; RV64-NEXT:    vadd.vv v16, v16, v16
-; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
 ; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i16> %idxs
@@ -754,18 +755,19 @@ define <vscale x 8 x i32> @vpgather_nxv8i32(<vscale x 8 x ptr> %ptrs, <vscale x
 define <vscale x 8 x i32> @vpgather_baseidx_nxv8i8_nxv8i32(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_nxv8i8_nxv8i32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf4 v12, v8
 ; RV32-NEXT:    vsll.vi v8, v12, 2
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v8, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_nxv8i8_nxv8i32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf8 v16, v8
 ; RV64-NEXT:    vsll.vi v16, v16, 2
-; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i8> %idxs
@@ -776,18 +778,19 @@ define <vscale x 8 x i32> @vpgather_baseidx_nxv8i8_nxv8i32(ptr %base, <vscale x
 define <vscale x 8 x i32> @vpgather_baseidx_sext_nxv8i8_nxv8i32(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8i32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf4 v12, v8
 ; RV32-NEXT:    vsll.vi v8, v12, 2
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v8, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8i32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf8 v16, v8
 ; RV64-NEXT:    vsll.vi v16, v16, 2
-; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i32>
@@ -799,19 +802,19 @@ define <vscale x 8 x i32> @vpgather_baseidx_sext_nxv8i8_nxv8i32(ptr %base, <vsca
 define <vscale x 8 x i32> @vpgather_baseidx_zext_nxv8i8_nxv8i32(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8i32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
 ; RV32-NEXT:    vzext.vf2 v10, v8
 ; RV32-NEXT:    vsll.vi v12, v10, 2
-; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vluxei16.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8i32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
 ; RV64-NEXT:    vzext.vf2 v10, v8
 ; RV64-NEXT:    vsll.vi v12, v10, 2
-; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV64-NEXT:    vluxei16.v v8, (a0), v12, v0.t
 ; RV64-NEXT:    ret
   %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i32>
@@ -823,18 +826,19 @@ define <vscale x 8 x i32> @vpgather_baseidx_zext_nxv8i8_nxv8i32(ptr %base, <vsca
 define <vscale x 8 x i32> @vpgather_baseidx_nxv8i16_nxv8i32(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_nxv8i16_nxv8i32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf2 v12, v8
 ; RV32-NEXT:    vsll.vi v8, v12, 2
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v8, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_nxv8i16_nxv8i32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf4 v16, v8
 ; RV64-NEXT:    vsll.vi v16, v16, 2
-; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i16> %idxs
@@ -845,18 +849,19 @@ define <vscale x 8 x i32> @vpgather_baseidx_nxv8i16_nxv8i32(ptr %base, <vscale x
 define <vscale x 8 x i32> @vpgather_baseidx_sext_nxv8i16_nxv8i32(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_sext_nxv8i16_nxv8i32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf2 v12, v8
 ; RV32-NEXT:    vsll.vi v8, v12, 2
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v8, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_sext_nxv8i16_nxv8i32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf4 v16, v8
 ; RV64-NEXT:    vsll.vi v16, v16, 2
-; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i32>
@@ -868,17 +873,19 @@ define <vscale x 8 x i32> @vpgather_baseidx_sext_nxv8i16_nxv8i32(ptr %base, <vsc
 define <vscale x 8 x i32> @vpgather_baseidx_zext_nxv8i16_nxv8i32(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_zext_nxv8i16_nxv8i32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vzext.vf2 v12, v8
 ; RV32-NEXT:    vsll.vi v8, v12, 2
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v8, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_zext_nxv8i16_nxv8i32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV64-NEXT:    vzext.vf2 v12, v8
 ; RV64-NEXT:    vsll.vi v8, v12, 2
+; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV64-NEXT:    vluxei32.v v8, (a0), v8, v0.t
 ; RV64-NEXT:    ret
   %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i32>
@@ -890,17 +897,18 @@ define <vscale x 8 x i32> @vpgather_baseidx_zext_nxv8i16_nxv8i32(ptr %base, <vsc
 define <vscale x 8 x i32> @vpgather_baseidx_nxv8i32(ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_nxv8i32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vsll.vi v8, v8, 2
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v8, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_nxv8i32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf2 v16, v8
 ; RV64-NEXT:    vsll.vi v16, v16, 2
-; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i32> %idxs
@@ -1004,18 +1012,19 @@ define <vscale x 8 x i64> @vpgather_nxv8i64(<vscale x 8 x ptr> %ptrs, <vscale x
 define <vscale x 8 x i64> @vpgather_baseidx_nxv8i8_nxv8i64(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_nxv8i8_nxv8i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf4 v12, v8
 ; RV32-NEXT:    vsll.vi v16, v12, 3
-; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_nxv8i8_nxv8i64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf8 v16, v8
 ; RV64-NEXT:    vsll.vi v8, v16, 3
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i8> %idxs
@@ -1026,18 +1035,19 @@ define <vscale x 8 x i64> @vpgather_baseidx_nxv8i8_nxv8i64(ptr %base, <vscale x
 define <vscale x 8 x i64> @vpgather_baseidx_sext_nxv8i8_nxv8i64(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf4 v12, v8
 ; RV32-NEXT:    vsll.vi v16, v12, 3
-; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8i64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf8 v16, v8
 ; RV64-NEXT:    vsll.vi v8, v16, 3
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
 ; RV64-NEXT:    ret
   %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i64>
@@ -1049,19 +1059,19 @@ define <vscale x 8 x i64> @vpgather_baseidx_sext_nxv8i8_nxv8i64(ptr %base, <vsca
 define <vscale x 8 x i64> @vpgather_baseidx_zext_nxv8i8_nxv8i64(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
 ; RV32-NEXT:    vzext.vf2 v10, v8
 ; RV32-NEXT:    vsll.vi v16, v10, 3
-; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV32-NEXT:    vluxei16.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8i64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
 ; RV64-NEXT:    vzext.vf2 v10, v8
 ; RV64-NEXT:    vsll.vi v16, v10, 3
-; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vluxei16.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i64>
@@ -1073,18 +1083,19 @@ define <vscale x 8 x i64> @vpgather_baseidx_zext_nxv8i8_nxv8i64(ptr %base, <vsca
 define <vscale x 8 x i64> @vpgather_baseidx_nxv8i16_nxv8i64(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_nxv8i16_nxv8i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf2 v12, v8
 ; RV32-NEXT:    vsll.vi v16, v12, 3
-; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_nxv8i16_nxv8i64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf4 v16, v8
 ; RV64-NEXT:    vsll.vi v8, v16, 3
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i16> %idxs
@@ -1095,18 +1106,19 @@ define <vscale x 8 x i64> @vpgather_baseidx_nxv8i16_nxv8i64(ptr %base, <vscale x
 define <vscale x 8 x i64> @vpgather_baseidx_sext_nxv8i16_nxv8i64(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_sext_nxv8i16_nxv8i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf2 v12, v8
 ; RV32-NEXT:    vsll.vi v16, v12, 3
-; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_sext_nxv8i16_nxv8i64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf4 v16, v8
 ; RV64-NEXT:    vsll.vi v8, v16, 3
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
 ; RV64-NEXT:    ret
   %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i64>
@@ -1118,19 +1130,19 @@ define <vscale x 8 x i64> @vpgather_baseidx_sext_nxv8i16_nxv8i64(ptr %base, <vsc
 define <vscale x 8 x i64> @vpgather_baseidx_zext_nxv8i16_nxv8i64(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_zext_nxv8i16_nxv8i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vzext.vf2 v12, v8
 ; RV32-NEXT:    vsll.vi v16, v12, 3
-; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_zext_nxv8i16_nxv8i64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV64-NEXT:    vzext.vf2 v12, v8
 ; RV64-NEXT:    vsll.vi v16, v12, 3
-; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vluxei32.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i64>
@@ -1142,17 +1154,18 @@ define <vscale x 8 x i64> @vpgather_baseidx_zext_nxv8i16_nxv8i64(ptr %base, <vsc
 define <vscale x 8 x i64> @vpgather_baseidx_nxv8i32_nxv8i64(ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_nxv8i32_nxv8i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vsll.vi v16, v8, 3
-; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_nxv8i32_nxv8i64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf2 v16, v8
 ; RV64-NEXT:    vsll.vi v8, v16, 3
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i32> %idxs
@@ -1163,17 +1176,18 @@ define <vscale x 8 x i64> @vpgather_baseidx_nxv8i32_nxv8i64(ptr %base, <vscale x
 define <vscale x 8 x i64> @vpgather_baseidx_sext_nxv8i32_nxv8i64(ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_sext_nxv8i32_nxv8i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vsll.vi v16, v8, 3
-; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_sext_nxv8i32_nxv8i64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf2 v16, v8
 ; RV64-NEXT:    vsll.vi v8, v16, 3
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
 ; RV64-NEXT:    ret
   %eidxs = sext <vscale x 8 x i32> %idxs to <vscale x 8 x i64>
@@ -1185,17 +1199,18 @@ define <vscale x 8 x i64> @vpgather_baseidx_sext_nxv8i32_nxv8i64(ptr %base, <vsc
 define <vscale x 8 x i64> @vpgather_baseidx_zext_nxv8i32_nxv8i64(ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_zext_nxv8i32_nxv8i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vsll.vi v16, v8, 3
-; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_zext_nxv8i32_nxv8i64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vzext.vf2 v16, v8
 ; RV64-NEXT:    vsll.vi v8, v16, 3
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
 ; RV64-NEXT:    ret
   %eidxs = zext <vscale x 8 x i32> %idxs to <vscale x 8 x i64>
@@ -1207,17 +1222,18 @@ define <vscale x 8 x i64> @vpgather_baseidx_zext_nxv8i32_nxv8i64(ptr %base, <vsc
 define <vscale x 8 x i64> @vpgather_baseidx_nxv8i64(ptr %base, <vscale x 8 x i64> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_nxv8i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vnsrl.wi v16, v8, 0
 ; RV32-NEXT:    vsll.vi v16, v16, 3
-; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_nxv8i64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vsll.vi v8, v8, 3
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %idxs
@@ -1515,19 +1531,19 @@ define <vscale x 8 x half> @vpgather_nxv8f16(<vscale x 8 x ptr> %ptrs, <vscale x
 define <vscale x 8 x half> @vpgather_baseidx_nxv8i8_nxv8f16(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_nxv8i8_nxv8f16:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf4 v12, v8
 ; RV32-NEXT:    vadd.vv v12, v12, v12
-; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_nxv8i8_nxv8f16:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf8 v16, v8
 ; RV64-NEXT:    vadd.vv v16, v16, v16
-; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
 ; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds half, ptr %base, <vscale x 8 x i8> %idxs
@@ -1538,19 +1554,19 @@ define <vscale x 8 x half> @vpgather_baseidx_nxv8i8_nxv8f16(ptr %base, <vscale x
 define <vscale x 8 x half> @vpgather_baseidx_sext_nxv8i8_nxv8f16(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8f16:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf4 v12, v8
 ; RV32-NEXT:    vadd.vv v12, v12, v12
-; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8f16:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf8 v16, v8
 ; RV64-NEXT:    vadd.vv v16, v16, v16
-; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
 ; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
@@ -1562,17 +1578,17 @@ define <vscale x 8 x half> @vpgather_baseidx_sext_nxv8i8_nxv8f16(ptr %base, <vsc
 define <vscale x 8 x half> @vpgather_baseidx_zext_nxv8i8_nxv8f16(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8f16:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e8, m1, ta, ma
 ; RV32-NEXT:    vwaddu.vv v10, v8, v8
-; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
 ; RV32-NEXT:    vluxei16.v v8, (a0), v10, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8f16:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e8, m1, ta, ma
 ; RV64-NEXT:    vwaddu.vv v10, v8, v8
-; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
 ; RV64-NEXT:    vluxei16.v v8, (a0), v10, v0.t
 ; RV64-NEXT:    ret
   %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
@@ -1584,17 +1600,18 @@ define <vscale x 8 x half> @vpgather_baseidx_zext_nxv8i8_nxv8f16(ptr %base, <vsc
 define <vscale x 8 x half> @vpgather_baseidx_nxv8f16(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_nxv8f16:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
 ; RV32-NEXT:    vwadd.vv v12, v8, v8
+; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_nxv8f16:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf4 v16, v8
 ; RV64-NEXT:    vadd.vv v16, v16, v16
-; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
 ; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds half, ptr %base, <vscale x 8 x i16> %idxs
@@ -1698,18 +1715,19 @@ define <vscale x 8 x float> @vpgather_nxv8f32(<vscale x 8 x ptr> %ptrs, <vscale
 define <vscale x 8 x float> @vpgather_baseidx_nxv8i8_nxv8f32(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_nxv8i8_nxv8f32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf4 v12, v8
 ; RV32-NEXT:    vsll.vi v8, v12, 2
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v8, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_nxv8i8_nxv8f32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf8 v16, v8
 ; RV64-NEXT:    vsll.vi v16, v16, 2
-; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i8> %idxs
@@ -1720,18 +1738,19 @@ define <vscale x 8 x float> @vpgather_baseidx_nxv8i8_nxv8f32(ptr %base, <vscale
 define <vscale x 8 x float> @vpgather_baseidx_sext_nxv8i8_nxv8f32(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8f32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf4 v12, v8
 ; RV32-NEXT:    vsll.vi v8, v12, 2
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v8, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8f32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf8 v16, v8
 ; RV64-NEXT:    vsll.vi v16, v16, 2
-; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i32>
@@ -1743,19 +1762,19 @@ define <vscale x 8 x float> @vpgather_baseidx_sext_nxv8i8_nxv8f32(ptr %base, <vs
 define <vscale x 8 x float> @vpgather_baseidx_zext_nxv8i8_nxv8f32(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8f32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
 ; RV32-NEXT:    vzext.vf2 v10, v8
 ; RV32-NEXT:    vsll.vi v12, v10, 2
-; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vluxei16.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8f32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
 ; RV64-NEXT:    vzext.vf2 v10, v8
 ; RV64-NEXT:    vsll.vi v12, v10, 2
-; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV64-NEXT:    vluxei16.v v8, (a0), v12, v0.t
 ; RV64-NEXT:    ret
   %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i32>
@@ -1767,18 +1786,19 @@ define <vscale x 8 x float> @vpgather_baseidx_zext_nxv8i8_nxv8f32(ptr %base, <vs
 define <vscale x 8 x float> @vpgather_baseidx_nxv8i16_nxv8f32(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_nxv8i16_nxv8f32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf2 v12, v8
 ; RV32-NEXT:    vsll.vi v8, v12, 2
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v8, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_nxv8i16_nxv8f32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf4 v16, v8
 ; RV64-NEXT:    vsll.vi v16, v16, 2
-; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i16> %idxs
@@ -1789,18 +1809,19 @@ define <vscale x 8 x float> @vpgather_baseidx_nxv8i16_nxv8f32(ptr %base, <vscale
 define <vscale x 8 x float> @vpgather_baseidx_sext_nxv8i16_nxv8f32(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_sext_nxv8i16_nxv8f32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf2 v12, v8
 ; RV32-NEXT:    vsll.vi v8, v12, 2
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v8, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_sext_nxv8i16_nxv8f32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf4 v16, v8
 ; RV64-NEXT:    vsll.vi v16, v16, 2
-; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i32>
@@ -1812,17 +1833,19 @@ define <vscale x 8 x float> @vpgather_baseidx_sext_nxv8i16_nxv8f32(ptr %base, <v
 define <vscale x 8 x float> @vpgather_baseidx_zext_nxv8i16_nxv8f32(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_zext_nxv8i16_nxv8f32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vzext.vf2 v12, v8
 ; RV32-NEXT:    vsll.vi v8, v12, 2
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v8, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_zext_nxv8i16_nxv8f32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV64-NEXT:    vzext.vf2 v12, v8
 ; RV64-NEXT:    vsll.vi v8, v12, 2
+; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV64-NEXT:    vluxei32.v v8, (a0), v8, v0.t
 ; RV64-NEXT:    ret
   %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i32>
@@ -1834,17 +1857,18 @@ define <vscale x 8 x float> @vpgather_baseidx_zext_nxv8i16_nxv8f32(ptr %base, <v
 define <vscale x 8 x float> @vpgather_baseidx_nxv8f32(ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_nxv8f32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vsll.vi v8, v8, 2
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v8, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_nxv8f32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf2 v16, v8
 ; RV64-NEXT:    vsll.vi v16, v16, 2
-; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i32> %idxs
@@ -1948,18 +1972,19 @@ define <vscale x 6 x double> @vpgather_nxv6f64(<vscale x 6 x ptr> %ptrs, <vscale
 define <vscale x 6 x double> @vpgather_baseidx_nxv6i8_nxv6f64(ptr %base, <vscale x 6 x i8> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_nxv6i8_nxv6f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf4 v12, v8
 ; RV32-NEXT:    vsll.vi v16, v12, 3
-; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_nxv6i8_nxv6f64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf8 v16, v8
 ; RV64-NEXT:    vsll.vi v8, v16, 3
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds double, ptr %base, <vscale x 6 x i8> %idxs
@@ -1970,18 +1995,19 @@ define <vscale x 6 x double> @vpgather_baseidx_nxv6i8_nxv6f64(ptr %base, <vscale
 define <vscale x 6 x double> @vpgather_baseidx_sext_nxv6i8_nxv6f64(ptr %base, <vscale x 6 x i8> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_sext_nxv6i8_nxv6f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf4 v12, v8
 ; RV32-NEXT:    vsll.vi v16, v12, 3
-; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_sext_nxv6i8_nxv6f64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf8 v16, v8
 ; RV64-NEXT:    vsll.vi v8, v16, 3
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
 ; RV64-NEXT:    ret
   %eidxs = sext <vscale x 6 x i8> %idxs to <vscale x 6 x i64>
@@ -1993,19 +2019,19 @@ define <vscale x 6 x double> @vpgather_baseidx_sext_nxv6i8_nxv6f64(ptr %base, <v
 define <vscale x 6 x double> @vpgather_baseidx_zext_nxv6i8_nxv6f64(ptr %base, <vscale x 6 x i8> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_zext_nxv6i8_nxv6f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
 ; RV32-NEXT:    vzext.vf2 v10, v8
 ; RV32-NEXT:    vsll.vi v16, v10, 3
-; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV32-NEXT:    vluxei16.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_zext_nxv6i8_nxv6f64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
 ; RV64-NEXT:    vzext.vf2 v10, v8
 ; RV64-NEXT:    vsll.vi v16, v10, 3
-; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vluxei16.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %eidxs = zext <vscale x 6 x i8> %idxs to <vscale x 6 x i64>
@@ -2017,18 +2043,19 @@ define <vscale x 6 x double> @vpgather_baseidx_zext_nxv6i8_nxv6f64(ptr %base, <v
 define <vscale x 6 x double> @vpgather_baseidx_nxv6i16_nxv6f64(ptr %base, <vscale x 6 x i16> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_nxv6i16_nxv6f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf2 v12, v8
 ; RV32-NEXT:    vsll.vi v16, v12, 3
-; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_nxv6i16_nxv6f64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf4 v16, v8
 ; RV64-NEXT:    vsll.vi v8, v16, 3
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds double, ptr %base, <vscale x 6 x i16> %idxs
@@ -2039,18 +2066,19 @@ define <vscale x 6 x double> @vpgather_baseidx_nxv6i16_nxv6f64(ptr %base, <vscal
 define <vscale x 6 x double> @vpgather_baseidx_sext_nxv6i16_nxv6f64(ptr %base, <vscale x 6 x i16> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_sext_nxv6i16_nxv6f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf2 v12, v8
 ; RV32-NEXT:    vsll.vi v16, v12, 3
-; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_sext_nxv6i16_nxv6f64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf4 v16, v8
 ; RV64-NEXT:    vsll.vi v8, v16, 3
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
 ; RV64-NEXT:    ret
   %eidxs = sext <vscale x 6 x i16> %idxs to <vscale x 6 x i64>
@@ -2062,19 +2090,19 @@ define <vscale x 6 x double> @vpgather_baseidx_sext_nxv6i16_nxv6f64(ptr %base, <
 define <vscale x 6 x double> @vpgather_baseidx_zext_nxv6i16_nxv6f64(ptr %base, <vscale x 6 x i16> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_zext_nxv6i16_nxv6f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vzext.vf2 v12, v8
 ; RV32-NEXT:    vsll.vi v16, v12, 3
-; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_zext_nxv6i16_nxv6f64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV64-NEXT:    vzext.vf2 v12, v8
 ; RV64-NEXT:    vsll.vi v16, v12, 3
-; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vluxei32.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %eidxs = zext <vscale x 6 x i16> %idxs to <vscale x 6 x i64>
@@ -2086,17 +2114,18 @@ define <vscale x 6 x double> @vpgather_baseidx_zext_nxv6i16_nxv6f64(ptr %base, <
 define <vscale x 6 x double> @vpgather_baseidx_nxv6i32_nxv6f64(ptr %base, <vscale x 6 x i32> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_nxv6i32_nxv6f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vsll.vi v16, v8, 3
-; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_nxv6i32_nxv6f64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf2 v16, v8
 ; RV64-NEXT:    vsll.vi v8, v16, 3
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds double, ptr %base, <vscale x 6 x i32> %idxs
@@ -2107,17 +2136,18 @@ define <vscale x 6 x double> @vpgather_baseidx_nxv6i32_nxv6f64(ptr %base, <vscal
 define <vscale x 6 x double> @vpgather_baseidx_sext_nxv6i32_nxv6f64(ptr %base, <vscale x 6 x i32> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_sext_nxv6i32_nxv6f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vsll.vi v16, v8, 3
-; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_sext_nxv6i32_nxv6f64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf2 v16, v8
 ; RV64-NEXT:    vsll.vi v8, v16, 3
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
 ; RV64-NEXT:    ret
   %eidxs = sext <vscale x 6 x i32> %idxs to <vscale x 6 x i64>
@@ -2129,17 +2159,18 @@ define <vscale x 6 x double> @vpgather_baseidx_sext_nxv6i32_nxv6f64(ptr %base, <
 define <vscale x 6 x double> @vpgather_baseidx_zext_nxv6i32_nxv6f64(ptr %base, <vscale x 6 x i32> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_zext_nxv6i32_nxv6f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vsll.vi v16, v8, 3
-; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_zext_nxv6i32_nxv6f64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vzext.vf2 v16, v8
 ; RV64-NEXT:    vsll.vi v8, v16, 3
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
 ; RV64-NEXT:    ret
   %eidxs = zext <vscale x 6 x i32> %idxs to <vscale x 6 x i64>
@@ -2151,17 +2182,18 @@ define <vscale x 6 x double> @vpgather_baseidx_zext_nxv6i32_nxv6f64(ptr %base, <
 define <vscale x 6 x double> @vpgather_baseidx_nxv6f64(ptr %base, <vscale x 6 x i64> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_nxv6f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vnsrl.wi v16, v8, 0
 ; RV32-NEXT:    vsll.vi v16, v16, 3
-; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_nxv6f64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vsll.vi v8, v8, 3
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds double, ptr %base, <vscale x 6 x i64> %idxs
@@ -2191,18 +2223,19 @@ define <vscale x 8 x double> @vpgather_nxv8f64(<vscale x 8 x ptr> %ptrs, <vscale
 define <vscale x 8 x double> @vpgather_baseidx_nxv8i8_nxv8f64(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_nxv8i8_nxv8f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf4 v12, v8
 ; RV32-NEXT:    vsll.vi v16, v12, 3
-; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_nxv8i8_nxv8f64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf8 v16, v8
 ; RV64-NEXT:    vsll.vi v8, v16, 3
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i8> %idxs
@@ -2213,18 +2246,19 @@ define <vscale x 8 x double> @vpgather_baseidx_nxv8i8_nxv8f64(ptr %base, <vscale
 define <vscale x 8 x double> @vpgather_baseidx_sext_nxv8i8_nxv8f64(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf4 v12, v8
 ; RV32-NEXT:    vsll.vi v16, v12, 3
-; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8f64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf8 v16, v8
 ; RV64-NEXT:    vsll.vi v8, v16, 3
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
 ; RV64-NEXT:    ret
   %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i64>
@@ -2236,19 +2270,19 @@ define <vscale x 8 x double> @vpgather_baseidx_sext_nxv8i8_nxv8f64(ptr %base, <v
 define <vscale x 8 x double> @vpgather_baseidx_zext_nxv8i8_nxv8f64(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
 ; RV32-NEXT:    vzext.vf2 v10, v8
 ; RV32-NEXT:    vsll.vi v16, v10, 3
-; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV32-NEXT:    vluxei16.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8f64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
 ; RV64-NEXT:    vzext.vf2 v10, v8
 ; RV64-NEXT:    vsll.vi v16, v10, 3
-; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vluxei16.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i64>
@@ -2260,18 +2294,19 @@ define <vscale x 8 x double> @vpgather_baseidx_zext_nxv8i8_nxv8f64(ptr %base, <v
 define <vscale x 8 x double> @vpgather_baseidx_nxv8i16_nxv8f64(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_nxv8i16_nxv8f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf2 v12, v8
 ; RV32-NEXT:    vsll.vi v16, v12, 3
-; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_nxv8i16_nxv8f64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf4 v16, v8
 ; RV64-NEXT:    vsll.vi v8, v16, 3
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i16> %idxs
@@ -2282,18 +2317,19 @@ define <vscale x 8 x double> @vpgather_baseidx_nxv8i16_nxv8f64(ptr %base, <vscal
 define <vscale x 8 x double> @vpgather_baseidx_sext_nxv8i16_nxv8f64(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_sext_nxv8i16_nxv8f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf2 v12, v8
 ; RV32-NEXT:    vsll.vi v16, v12, 3
-; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_sext_nxv8i16_nxv8f64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf4 v16, v8
 ; RV64-NEXT:    vsll.vi v8, v16, 3
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
 ; RV64-NEXT:    ret
   %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i64>
@@ -2305,19 +2341,19 @@ define <vscale x 8 x double> @vpgather_baseidx_sext_nxv8i16_nxv8f64(ptr %base, <
 define <vscale x 8 x double> @vpgather_baseidx_zext_nxv8i16_nxv8f64(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_zext_nxv8i16_nxv8f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vzext.vf2 v12, v8
 ; RV32-NEXT:    vsll.vi v16, v12, 3
-; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_zext_nxv8i16_nxv8f64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV64-NEXT:    vzext.vf2 v12, v8
 ; RV64-NEXT:    vsll.vi v16, v12, 3
-; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vluxei32.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i64>
@@ -2329,17 +2365,18 @@ define <vscale x 8 x double> @vpgather_baseidx_zext_nxv8i16_nxv8f64(ptr %base, <
 define <vscale x 8 x double> @vpgather_baseidx_nxv8i32_nxv8f64(ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_nxv8i32_nxv8f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vsll.vi v16, v8, 3
-; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_nxv8i32_nxv8f64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf2 v16, v8
 ; RV64-NEXT:    vsll.vi v8, v16, 3
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i32> %idxs
@@ -2350,17 +2387,18 @@ define <vscale x 8 x double> @vpgather_baseidx_nxv8i32_nxv8f64(ptr %base, <vscal
 define <vscale x 8 x double> @vpgather_baseidx_sext_nxv8i32_nxv8f64(ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_sext_nxv8i32_nxv8f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vsll.vi v16, v8, 3
-; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_sext_nxv8i32_nxv8f64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf2 v16, v8
 ; RV64-NEXT:    vsll.vi v8, v16, 3
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
 ; RV64-NEXT:    ret
   %eidxs = sext <vscale x 8 x i32> %idxs to <vscale x 8 x i64>
@@ -2372,17 +2410,18 @@ define <vscale x 8 x double> @vpgather_baseidx_sext_nxv8i32_nxv8f64(ptr %base, <
 define <vscale x 8 x double> @vpgather_baseidx_zext_nxv8i32_nxv8f64(ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_zext_nxv8i32_nxv8f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vsll.vi v16, v8, 3
-; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_zext_nxv8i32_nxv8f64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vzext.vf2 v16, v8
 ; RV64-NEXT:    vsll.vi v8, v16, 3
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
 ; RV64-NEXT:    ret
   %eidxs = zext <vscale x 8 x i32> %idxs to <vscale x 8 x i64>
@@ -2394,17 +2433,18 @@ define <vscale x 8 x double> @vpgather_baseidx_zext_nxv8i32_nxv8f64(ptr %base, <
 define <vscale x 8 x double> @vpgather_baseidx_nxv8f64(ptr %base, <vscale x 8 x i64> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_baseidx_nxv8f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vnsrl.wi v16, v8, 0
 ; RV32-NEXT:    vsll.vi v16, v16, 3
-; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_nxv8f64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vsll.vi v8, v8, 3
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %idxs
diff --git a/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll
index a1b753e3135a10..4d715c7031000e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll
@@ -47,15 +47,17 @@ define void @vpscatter_nxv2i8(<vscale x 2 x i8> %val, <vscale x 2 x ptr> %ptrs,
 define void @vpscatter_nxv2i16_truncstore_nxv2i8(<vscale x 2 x i16> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_nxv2i16_truncstore_nxv2i8:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
+; RV32-NEXT:    vsetvli a1, zero, e8, mf4, ta, ma
 ; RV32-NEXT:    vnsrl.wi v8, v8, 0
+; RV32-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_nxv2i16_truncstore_nxv2i8:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
+; RV64-NEXT:    vsetvli a1, zero, e8, mf4, ta, ma
 ; RV64-NEXT:    vnsrl.wi v8, v8, 0
+; RV64-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
 ; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
 ; RV64-NEXT:    ret
   %tval = trunc <vscale x 2 x i16> %val to <vscale x 2 x i8>
@@ -66,19 +68,21 @@ define void @vpscatter_nxv2i16_truncstore_nxv2i8(<vscale x 2 x i16> %val, <vscal
 define void @vpscatter_nxv2i32_truncstore_nxv2i8(<vscale x 2 x i32> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_nxv2i32_truncstore_nxv2i8:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; RV32-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
 ; RV32-NEXT:    vnsrl.wi v8, v8, 0
 ; RV32-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
 ; RV32-NEXT:    vnsrl.wi v8, v8, 0
+; RV32-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_nxv2i32_truncstore_nxv2i8:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; RV64-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
 ; RV64-NEXT:    vnsrl.wi v8, v8, 0
 ; RV64-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
 ; RV64-NEXT:    vnsrl.wi v8, v8, 0
+; RV64-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
 ; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
 ; RV64-NEXT:    ret
   %tval = trunc <vscale x 2 x i32> %val to <vscale x 2 x i8>
@@ -89,23 +93,25 @@ define void @vpscatter_nxv2i32_truncstore_nxv2i8(<vscale x 2 x i32> %val, <vscal
 define void @vpscatter_nxv2i64_truncstore_nxv2i8(<vscale x 2 x i64> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_nxv2i64_truncstore_nxv2i8:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; RV32-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
 ; RV32-NEXT:    vnsrl.wi v11, v8, 0
 ; RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
 ; RV32-NEXT:    vnsrl.wi v8, v11, 0
 ; RV32-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
 ; RV32-NEXT:    vnsrl.wi v8, v8, 0
+; RV32-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (zero), v10, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_nxv2i64_truncstore_nxv2i8:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; RV64-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
 ; RV64-NEXT:    vnsrl.wi v12, v8, 0
 ; RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
 ; RV64-NEXT:    vnsrl.wi v8, v12, 0
 ; RV64-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
 ; RV64-NEXT:    vnsrl.wi v8, v8, 0
+; RV64-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
 ; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
 ; RV64-NEXT:    ret
   %tval = trunc <vscale x 2 x i64> %val to <vscale x 2 x i8>
@@ -168,17 +174,17 @@ define void @vpscatter_nxv8i8(<vscale x 8 x i8> %val, <vscale x 8 x ptr> %ptrs,
 define void @vpscatter_baseidx_nxv8i8(<vscale x 8 x i8> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_nxv8i8:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf4 v12, v9
-; RV32-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_nxv8i8:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf8 v16, v9
-; RV64-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds i8, ptr %base, <vscale x 8 x i8> %idxs
@@ -225,15 +231,17 @@ define void @vpscatter_nxv2i16(<vscale x 2 x i16> %val, <vscale x 2 x ptr> %ptrs
 define void @vpscatter_nxv2i32_truncstore_nxv2i16(<vscale x 2 x i32> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_nxv2i32_truncstore_nxv2i16:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; RV32-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
 ; RV32-NEXT:    vnsrl.wi v8, v8, 0
+; RV32-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_nxv2i32_truncstore_nxv2i16:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; RV64-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
 ; RV64-NEXT:    vnsrl.wi v8, v8, 0
+; RV64-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
 ; RV64-NEXT:    ret
   %tval = trunc <vscale x 2 x i32> %val to <vscale x 2 x i16>
@@ -244,19 +252,21 @@ define void @vpscatter_nxv2i32_truncstore_nxv2i16(<vscale x 2 x i32> %val, <vsca
 define void @vpscatter_nxv2i64_truncstore_nxv2i16(<vscale x 2 x i64> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_nxv2i64_truncstore_nxv2i16:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; RV32-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
 ; RV32-NEXT:    vnsrl.wi v11, v8, 0
 ; RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
 ; RV32-NEXT:    vnsrl.wi v8, v11, 0
+; RV32-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (zero), v10, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_nxv2i64_truncstore_nxv2i16:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; RV64-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
 ; RV64-NEXT:    vnsrl.wi v12, v8, 0
 ; RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
 ; RV64-NEXT:    vnsrl.wi v8, v12, 0
+; RV64-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
 ; RV64-NEXT:    ret
   %tval = trunc <vscale x 2 x i64> %val to <vscale x 2 x i16>
@@ -319,19 +329,19 @@ define void @vpscatter_nxv8i16(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs
 define void @vpscatter_baseidx_nxv8i8_nxv8i16(<vscale x 8 x i16> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_nxv8i8_nxv8i16:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf4 v12, v10
 ; RV32-NEXT:    vadd.vv v12, v12, v12
-; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_nxv8i8_nxv8i16:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf8 v16, v10
 ; RV64-NEXT:    vadd.vv v16, v16, v16
-; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i8> %idxs
@@ -342,19 +352,19 @@ define void @vpscatter_baseidx_nxv8i8_nxv8i16(<vscale x 8 x i16> %val, ptr %base
 define void @vpscatter_baseidx_sext_nxv8i8_nxv8i16(<vscale x 8 x i16> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8i16:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf4 v12, v10
 ; RV32-NEXT:    vadd.vv v12, v12, v12
-; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8i16:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf8 v16, v10
 ; RV64-NEXT:    vadd.vv v16, v16, v16
-; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
@@ -366,17 +376,17 @@ define void @vpscatter_baseidx_sext_nxv8i8_nxv8i16(<vscale x 8 x i16> %val, ptr
 define void @vpscatter_baseidx_zext_nxv8i8_nxv8i16(<vscale x 8 x i16> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8i16:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e8, m1, ta, ma
 ; RV32-NEXT:    vwaddu.vv v12, v10, v10
-; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
 ; RV32-NEXT:    vsoxei16.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8i16:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e8, m1, ta, ma
 ; RV64-NEXT:    vwaddu.vv v12, v10, v10
-; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
 ; RV64-NEXT:    vsoxei16.v v8, (a0), v12, v0.t
 ; RV64-NEXT:    ret
   %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
@@ -388,17 +398,18 @@ define void @vpscatter_baseidx_zext_nxv8i8_nxv8i16(<vscale x 8 x i16> %val, ptr
 define void @vpscatter_baseidx_nxv8i16(<vscale x 8 x i16> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_nxv8i16:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
 ; RV32-NEXT:    vwadd.vv v12, v10, v10
+; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_nxv8i16:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf4 v16, v10
 ; RV64-NEXT:    vadd.vv v16, v16, v16
-; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i16> %idxs
@@ -419,8 +430,9 @@ define void @vpscatter_baseidx_vpsext_nxv8i16_nxv8i16(<vscale x 8 x i16> %val, p
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV64-NEXT:    vsext.vf2 v12, v10, v0.t
+; RV64-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV64-NEXT:    vwadd.vv v16, v12, v12
-; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %eidxs = call <vscale x 8 x i32> @llvm.vp.sext.nxv8i16.nxv8i32(<vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 %evl)
@@ -442,8 +454,9 @@ define void @vpscatter_baseidx_vpzext_nxv8i16_nxv8i16(<vscale x 8 x i16> %val, p
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV64-NEXT:    vzext.vf2 v12, v10, v0.t
+; RV64-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV64-NEXT:    vwadd.vv v16, v12, v12
-; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %eidxs = call <vscale x 8 x i32> @llvm.vp.zext.nxv8i16.nxv8i32(<vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 %evl)
@@ -458,10 +471,10 @@ define void @vpscatter_baseidx_vpsext_nxv8i32_nxv8i16(<vscale x 8 x i16> %val, p
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV32-NEXT:    vsext.vf2 v16, v12, v0.t
-; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vnsrl.wi v12, v16, 0
 ; RV32-NEXT:    vadd.vv v12, v12, v12
-; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
 ;
@@ -484,10 +497,10 @@ define void @vpscatter_baseidx_vpzext_nxv8i32_nxv8i16(<vscale x 8 x i16> %val, p
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV32-NEXT:    vzext.vf2 v16, v12, v0.t
-; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vnsrl.wi v12, v16, 0
 ; RV32-NEXT:    vadd.vv v12, v12, v12
-; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
 ;
@@ -543,15 +556,17 @@ define void @vpscatter_nxv2i32(<vscale x 2 x i32> %val, <vscale x 2 x ptr> %ptrs
 define void @vpscatter_nxv2i64_truncstore_nxv2i32(<vscale x 2 x i64> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_nxv2i64_truncstore_nxv2i32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; RV32-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
 ; RV32-NEXT:    vnsrl.wi v11, v8, 0
+; RV32-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
 ; RV32-NEXT:    vsoxei32.v v11, (zero), v10, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_nxv2i64_truncstore_nxv2i32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; RV64-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
 ; RV64-NEXT:    vnsrl.wi v12, v8, 0
+; RV64-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
 ; RV64-NEXT:    vsoxei64.v v12, (zero), v10, v0.t
 ; RV64-NEXT:    ret
   %tval = trunc <vscale x 2 x i64> %val to <vscale x 2 x i32>
@@ -614,18 +629,19 @@ define void @vpscatter_nxv8i32(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs
 define void @vpscatter_baseidx_nxv8i8_nxv8i32(<vscale x 8 x i32> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_nxv8i8_nxv8i32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf4 v16, v12
 ; RV32-NEXT:    vsll.vi v12, v16, 2
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_nxv8i8_nxv8i32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf8 v16, v12
 ; RV64-NEXT:    vsll.vi v16, v16, 2
-; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i8> %idxs
@@ -636,18 +652,19 @@ define void @vpscatter_baseidx_nxv8i8_nxv8i32(<vscale x 8 x i32> %val, ptr %base
 define void @vpscatter_baseidx_sext_nxv8i8_nxv8i32(<vscale x 8 x i32> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8i32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf4 v16, v12
 ; RV32-NEXT:    vsll.vi v12, v16, 2
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8i32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf8 v16, v12
 ; RV64-NEXT:    vsll.vi v16, v16, 2
-; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i32>
@@ -659,19 +676,19 @@ define void @vpscatter_baseidx_sext_nxv8i8_nxv8i32(<vscale x 8 x i32> %val, ptr
 define void @vpscatter_baseidx_zext_nxv8i8_nxv8i32(<vscale x 8 x i32> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8i32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
 ; RV32-NEXT:    vzext.vf2 v14, v12
 ; RV32-NEXT:    vsll.vi v12, v14, 2
-; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsoxei16.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8i32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
 ; RV64-NEXT:    vzext.vf2 v14, v12
 ; RV64-NEXT:    vsll.vi v12, v14, 2
-; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV64-NEXT:    vsoxei16.v v8, (a0), v12, v0.t
 ; RV64-NEXT:    ret
   %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i32>
@@ -683,18 +700,19 @@ define void @vpscatter_baseidx_zext_nxv8i8_nxv8i32(<vscale x 8 x i32> %val, ptr
 define void @vpscatter_baseidx_nxv8i16_nxv8i32(<vscale x 8 x i32> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_nxv8i16_nxv8i32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf2 v16, v12
 ; RV32-NEXT:    vsll.vi v12, v16, 2
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_nxv8i16_nxv8i32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf4 v16, v12
 ; RV64-NEXT:    vsll.vi v16, v16, 2
-; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i16> %idxs
@@ -705,18 +723,19 @@ define void @vpscatter_baseidx_nxv8i16_nxv8i32(<vscale x 8 x i32> %val, ptr %bas
 define void @vpscatter_baseidx_sext_nxv8i16_nxv8i32(<vscale x 8 x i32> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_sext_nxv8i16_nxv8i32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf2 v16, v12
 ; RV32-NEXT:    vsll.vi v12, v16, 2
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_sext_nxv8i16_nxv8i32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf4 v16, v12
 ; RV64-NEXT:    vsll.vi v16, v16, 2
-; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i32>
@@ -728,17 +747,19 @@ define void @vpscatter_baseidx_sext_nxv8i16_nxv8i32(<vscale x 8 x i32> %val, ptr
 define void @vpscatter_baseidx_zext_nxv8i16_nxv8i32(<vscale x 8 x i32> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_zext_nxv8i16_nxv8i32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vzext.vf2 v16, v12
 ; RV32-NEXT:    vsll.vi v12, v16, 2
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_zext_nxv8i16_nxv8i32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV64-NEXT:    vzext.vf2 v16, v12
 ; RV64-NEXT:    vsll.vi v12, v16, 2
+; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV64-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
 ; RV64-NEXT:    ret
   %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i32>
@@ -750,17 +771,18 @@ define void @vpscatter_baseidx_zext_nxv8i16_nxv8i32(<vscale x 8 x i32> %val, ptr
 define void @vpscatter_baseidx_nxv8i32(<vscale x 8 x i32> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_nxv8i32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vsll.vi v12, v12, 2
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_nxv8i32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf2 v16, v12
 ; RV64-NEXT:    vsll.vi v16, v16, 2
-; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i32> %idxs
@@ -859,18 +881,19 @@ define void @vpscatter_nxv8i64(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs
 define void @vpscatter_baseidx_nxv8i8_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_nxv8i8_nxv8i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf4 v20, v16
 ; RV32-NEXT:    vsll.vi v16, v20, 3
-; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_nxv8i8_nxv8i64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf8 v24, v16
 ; RV64-NEXT:    vsll.vi v16, v24, 3
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i8> %idxs
@@ -881,18 +904,19 @@ define void @vpscatter_baseidx_nxv8i8_nxv8i64(<vscale x 8 x i64> %val, ptr %base
 define void @vpscatter_baseidx_sext_nxv8i8_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf4 v20, v16
 ; RV32-NEXT:    vsll.vi v16, v20, 3
-; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8i64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf8 v24, v16
 ; RV64-NEXT:    vsll.vi v16, v24, 3
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i64>
@@ -904,19 +928,19 @@ define void @vpscatter_baseidx_sext_nxv8i8_nxv8i64(<vscale x 8 x i64> %val, ptr
 define void @vpscatter_baseidx_zext_nxv8i8_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
 ; RV32-NEXT:    vzext.vf2 v18, v16
 ; RV32-NEXT:    vsll.vi v16, v18, 3
-; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV32-NEXT:    vsoxei16.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8i64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
 ; RV64-NEXT:    vzext.vf2 v18, v16
 ; RV64-NEXT:    vsll.vi v16, v18, 3
-; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsoxei16.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i64>
@@ -928,18 +952,19 @@ define void @vpscatter_baseidx_zext_nxv8i8_nxv8i64(<vscale x 8 x i64> %val, ptr
 define void @vpscatter_baseidx_nxv8i16_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_nxv8i16_nxv8i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf2 v20, v16
 ; RV32-NEXT:    vsll.vi v16, v20, 3
-; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_nxv8i16_nxv8i64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf4 v24, v16
 ; RV64-NEXT:    vsll.vi v16, v24, 3
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i16> %idxs
@@ -950,18 +975,19 @@ define void @vpscatter_baseidx_nxv8i16_nxv8i64(<vscale x 8 x i64> %val, ptr %bas
 define void @vpscatter_baseidx_sext_nxv8i16_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_sext_nxv8i16_nxv8i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf2 v20, v16
 ; RV32-NEXT:    vsll.vi v16, v20, 3
-; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_sext_nxv8i16_nxv8i64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf4 v24, v16
 ; RV64-NEXT:    vsll.vi v16, v24, 3
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i64>
@@ -973,19 +999,19 @@ define void @vpscatter_baseidx_sext_nxv8i16_nxv8i64(<vscale x 8 x i64> %val, ptr
 define void @vpscatter_baseidx_zext_nxv8i16_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_zext_nxv8i16_nxv8i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vzext.vf2 v20, v16
 ; RV32-NEXT:    vsll.vi v16, v20, 3
-; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_zext_nxv8i16_nxv8i64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV64-NEXT:    vzext.vf2 v20, v16
 ; RV64-NEXT:    vsll.vi v16, v20, 3
-; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i64>
@@ -997,17 +1023,18 @@ define void @vpscatter_baseidx_zext_nxv8i16_nxv8i64(<vscale x 8 x i64> %val, ptr
 define void @vpscatter_baseidx_nxv8i32_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_nxv8i32_nxv8i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vsll.vi v16, v16, 3
-; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_nxv8i32_nxv8i64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf2 v24, v16
 ; RV64-NEXT:    vsll.vi v16, v24, 3
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i32> %idxs
@@ -1018,17 +1045,18 @@ define void @vpscatter_baseidx_nxv8i32_nxv8i64(<vscale x 8 x i64> %val, ptr %bas
 define void @vpscatter_baseidx_sext_nxv8i32_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_sext_nxv8i32_nxv8i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vsll.vi v16, v16, 3
-; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_sext_nxv8i32_nxv8i64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf2 v24, v16
 ; RV64-NEXT:    vsll.vi v16, v24, 3
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %eidxs = sext <vscale x 8 x i32> %idxs to <vscale x 8 x i64>
@@ -1040,17 +1068,18 @@ define void @vpscatter_baseidx_sext_nxv8i32_nxv8i64(<vscale x 8 x i64> %val, ptr
 define void @vpscatter_baseidx_zext_nxv8i32_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_zext_nxv8i32_nxv8i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vsll.vi v16, v16, 3
-; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_zext_nxv8i32_nxv8i64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vzext.vf2 v24, v16
 ; RV64-NEXT:    vsll.vi v16, v24, 3
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %eidxs = zext <vscale x 8 x i32> %idxs to <vscale x 8 x i64>
@@ -1062,17 +1091,18 @@ define void @vpscatter_baseidx_zext_nxv8i32_nxv8i64(<vscale x 8 x i64> %val, ptr
 define void @vpscatter_baseidx_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i64> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_nxv8i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vnsrl.wi v24, v16, 0
 ; RV32-NEXT:    vsll.vi v16, v24, 3
-; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_nxv8i64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vsll.vi v16, v16, 3
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %idxs
@@ -1350,19 +1380,19 @@ define void @vpscatter_nxv8f16(<vscale x 8 x half> %val, <vscale x 8 x ptr> %ptr
 define void @vpscatter_baseidx_nxv8i8_nxv8f16(<vscale x 8 x half> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_nxv8i8_nxv8f16:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf4 v12, v10
 ; RV32-NEXT:    vadd.vv v12, v12, v12
-; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_nxv8i8_nxv8f16:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf8 v16, v10
 ; RV64-NEXT:    vadd.vv v16, v16, v16
-; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds half, ptr %base, <vscale x 8 x i8> %idxs
@@ -1373,19 +1403,19 @@ define void @vpscatter_baseidx_nxv8i8_nxv8f16(<vscale x 8 x half> %val, ptr %bas
 define void @vpscatter_baseidx_sext_nxv8i8_nxv8f16(<vscale x 8 x half> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8f16:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf4 v12, v10
 ; RV32-NEXT:    vadd.vv v12, v12, v12
-; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8f16:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf8 v16, v10
 ; RV64-NEXT:    vadd.vv v16, v16, v16
-; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
@@ -1397,17 +1427,17 @@ define void @vpscatter_baseidx_sext_nxv8i8_nxv8f16(<vscale x 8 x half> %val, ptr
 define void @vpscatter_baseidx_zext_nxv8i8_nxv8f16(<vscale x 8 x half> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8f16:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e8, m1, ta, ma
 ; RV32-NEXT:    vwaddu.vv v12, v10, v10
-; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
 ; RV32-NEXT:    vsoxei16.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8f16:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e8, m1, ta, ma
 ; RV64-NEXT:    vwaddu.vv v12, v10, v10
-; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
 ; RV64-NEXT:    vsoxei16.v v8, (a0), v12, v0.t
 ; RV64-NEXT:    ret
   %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
@@ -1419,17 +1449,18 @@ define void @vpscatter_baseidx_zext_nxv8i8_nxv8f16(<vscale x 8 x half> %val, ptr
 define void @vpscatter_baseidx_nxv8f16(<vscale x 8 x half> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_nxv8f16:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
 ; RV32-NEXT:    vwadd.vv v12, v10, v10
+; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_nxv8f16:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf4 v16, v10
 ; RV64-NEXT:    vadd.vv v16, v16, v16
-; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds half, ptr %base, <vscale x 8 x i16> %idxs
@@ -1528,18 +1559,19 @@ define void @vpscatter_nxv8f32(<vscale x 8 x float> %val, <vscale x 8 x ptr> %pt
 define void @vpscatter_baseidx_nxv8i8_nxv8f32(<vscale x 8 x float> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_nxv8i8_nxv8f32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf4 v16, v12
 ; RV32-NEXT:    vsll.vi v12, v16, 2
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_nxv8i8_nxv8f32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf8 v16, v12
 ; RV64-NEXT:    vsll.vi v16, v16, 2
-; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i8> %idxs
@@ -1550,18 +1582,19 @@ define void @vpscatter_baseidx_nxv8i8_nxv8f32(<vscale x 8 x float> %val, ptr %ba
 define void @vpscatter_baseidx_sext_nxv8i8_nxv8f32(<vscale x 8 x float> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8f32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf4 v16, v12
 ; RV32-NEXT:    vsll.vi v12, v16, 2
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8f32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf8 v16, v12
 ; RV64-NEXT:    vsll.vi v16, v16, 2
-; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i32>
@@ -1573,19 +1606,19 @@ define void @vpscatter_baseidx_sext_nxv8i8_nxv8f32(<vscale x 8 x float> %val, pt
 define void @vpscatter_baseidx_zext_nxv8i8_nxv8f32(<vscale x 8 x float> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8f32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
 ; RV32-NEXT:    vzext.vf2 v14, v12
 ; RV32-NEXT:    vsll.vi v12, v14, 2
-; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsoxei16.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8f32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
 ; RV64-NEXT:    vzext.vf2 v14, v12
 ; RV64-NEXT:    vsll.vi v12, v14, 2
-; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV64-NEXT:    vsoxei16.v v8, (a0), v12, v0.t
 ; RV64-NEXT:    ret
   %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i32>
@@ -1597,18 +1630,19 @@ define void @vpscatter_baseidx_zext_nxv8i8_nxv8f32(<vscale x 8 x float> %val, pt
 define void @vpscatter_baseidx_nxv8i16_nxv8f32(<vscale x 8 x float> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_nxv8i16_nxv8f32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf2 v16, v12
 ; RV32-NEXT:    vsll.vi v12, v16, 2
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_nxv8i16_nxv8f32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf4 v16, v12
 ; RV64-NEXT:    vsll.vi v16, v16, 2
-; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i16> %idxs
@@ -1619,18 +1653,19 @@ define void @vpscatter_baseidx_nxv8i16_nxv8f32(<vscale x 8 x float> %val, ptr %b
 define void @vpscatter_baseidx_sext_nxv8i16_nxv8f32(<vscale x 8 x float> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_sext_nxv8i16_nxv8f32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf2 v16, v12
 ; RV32-NEXT:    vsll.vi v12, v16, 2
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_sext_nxv8i16_nxv8f32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf4 v16, v12
 ; RV64-NEXT:    vsll.vi v16, v16, 2
-; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i32>
@@ -1642,17 +1677,19 @@ define void @vpscatter_baseidx_sext_nxv8i16_nxv8f32(<vscale x 8 x float> %val, p
 define void @vpscatter_baseidx_zext_nxv8i16_nxv8f32(<vscale x 8 x float> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_zext_nxv8i16_nxv8f32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vzext.vf2 v16, v12
 ; RV32-NEXT:    vsll.vi v12, v16, 2
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_zext_nxv8i16_nxv8f32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV64-NEXT:    vzext.vf2 v16, v12
 ; RV64-NEXT:    vsll.vi v12, v16, 2
+; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV64-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
 ; RV64-NEXT:    ret
   %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i32>
@@ -1664,17 +1701,18 @@ define void @vpscatter_baseidx_zext_nxv8i16_nxv8f32(<vscale x 8 x float> %val, p
 define void @vpscatter_baseidx_nxv8f32(<vscale x 8 x float> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_nxv8f32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vsll.vi v12, v12, 2
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_nxv8f32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf2 v16, v12
 ; RV64-NEXT:    vsll.vi v16, v16, 2
-; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i32> %idxs
@@ -1773,18 +1811,19 @@ define void @vpscatter_nxv6f64(<vscale x 6 x double> %val, <vscale x 6 x ptr> %p
 define void @vpscatter_baseidx_nxv6i8_nxv6f64(<vscale x 6 x double> %val, ptr %base, <vscale x 6 x i8> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_nxv6i8_nxv6f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf4 v20, v16
 ; RV32-NEXT:    vsll.vi v16, v20, 3
-; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_nxv6i8_nxv6f64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf8 v24, v16
 ; RV64-NEXT:    vsll.vi v16, v24, 3
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds double, ptr %base, <vscale x 6 x i8> %idxs
@@ -1795,18 +1834,19 @@ define void @vpscatter_baseidx_nxv6i8_nxv6f64(<vscale x 6 x double> %val, ptr %b
 define void @vpscatter_baseidx_sext_nxv6i8_nxv6f64(<vscale x 6 x double> %val, ptr %base, <vscale x 6 x i8> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_sext_nxv6i8_nxv6f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf4 v20, v16
 ; RV32-NEXT:    vsll.vi v16, v20, 3
-; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_sext_nxv6i8_nxv6f64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf8 v24, v16
 ; RV64-NEXT:    vsll.vi v16, v24, 3
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %eidxs = sext <vscale x 6 x i8> %idxs to <vscale x 6 x i64>
@@ -1818,19 +1858,19 @@ define void @vpscatter_baseidx_sext_nxv6i8_nxv6f64(<vscale x 6 x double> %val, p
 define void @vpscatter_baseidx_zext_nxv6i8_nxv6f64(<vscale x 6 x double> %val, ptr %base, <vscale x 6 x i8> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_zext_nxv6i8_nxv6f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
 ; RV32-NEXT:    vzext.vf2 v18, v16
 ; RV32-NEXT:    vsll.vi v16, v18, 3
-; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV32-NEXT:    vsoxei16.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_zext_nxv6i8_nxv6f64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
 ; RV64-NEXT:    vzext.vf2 v18, v16
 ; RV64-NEXT:    vsll.vi v16, v18, 3
-; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsoxei16.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %eidxs = zext <vscale x 6 x i8> %idxs to <vscale x 6 x i64>
@@ -1842,18 +1882,19 @@ define void @vpscatter_baseidx_zext_nxv6i8_nxv6f64(<vscale x 6 x double> %val, p
 define void @vpscatter_baseidx_nxv6i16_nxv6f64(<vscale x 6 x double> %val, ptr %base, <vscale x 6 x i16> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_nxv6i16_nxv6f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf2 v20, v16
 ; RV32-NEXT:    vsll.vi v16, v20, 3
-; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_nxv6i16_nxv6f64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf4 v24, v16
 ; RV64-NEXT:    vsll.vi v16, v24, 3
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds double, ptr %base, <vscale x 6 x i16> %idxs
@@ -1864,18 +1905,19 @@ define void @vpscatter_baseidx_nxv6i16_nxv6f64(<vscale x 6 x double> %val, ptr %
 define void @vpscatter_baseidx_sext_nxv6i16_nxv6f64(<vscale x 6 x double> %val, ptr %base, <vscale x 6 x i16> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_sext_nxv6i16_nxv6f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf2 v20, v16
 ; RV32-NEXT:    vsll.vi v16, v20, 3
-; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_sext_nxv6i16_nxv6f64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf4 v24, v16
 ; RV64-NEXT:    vsll.vi v16, v24, 3
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %eidxs = sext <vscale x 6 x i16> %idxs to <vscale x 6 x i64>
@@ -1887,19 +1929,19 @@ define void @vpscatter_baseidx_sext_nxv6i16_nxv6f64(<vscale x 6 x double> %val,
 define void @vpscatter_baseidx_zext_nxv6i16_nxv6f64(<vscale x 6 x double> %val, ptr %base, <vscale x 6 x i16> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_zext_nxv6i16_nxv6f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vzext.vf2 v20, v16
 ; RV32-NEXT:    vsll.vi v16, v20, 3
-; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_zext_nxv6i16_nxv6f64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV64-NEXT:    vzext.vf2 v20, v16
 ; RV64-NEXT:    vsll.vi v16, v20, 3
-; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %eidxs = zext <vscale x 6 x i16> %idxs to <vscale x 6 x i64>
@@ -1911,17 +1953,18 @@ define void @vpscatter_baseidx_zext_nxv6i16_nxv6f64(<vscale x 6 x double> %val,
 define void @vpscatter_baseidx_nxv6i32_nxv6f64(<vscale x 6 x double> %val, ptr %base, <vscale x 6 x i32> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_nxv6i32_nxv6f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vsll.vi v16, v16, 3
-; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_nxv6i32_nxv6f64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf2 v24, v16
 ; RV64-NEXT:    vsll.vi v16, v24, 3
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds double, ptr %base, <vscale x 6 x i32> %idxs
@@ -1932,17 +1975,18 @@ define void @vpscatter_baseidx_nxv6i32_nxv6f64(<vscale x 6 x double> %val, ptr %
 define void @vpscatter_baseidx_sext_nxv6i32_nxv6f64(<vscale x 6 x double> %val, ptr %base, <vscale x 6 x i32> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_sext_nxv6i32_nxv6f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vsll.vi v16, v16, 3
-; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_sext_nxv6i32_nxv6f64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf2 v24, v16
 ; RV64-NEXT:    vsll.vi v16, v24, 3
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %eidxs = sext <vscale x 6 x i32> %idxs to <vscale x 6 x i64>
@@ -1954,17 +1998,18 @@ define void @vpscatter_baseidx_sext_nxv6i32_nxv6f64(<vscale x 6 x double> %val,
 define void @vpscatter_baseidx_zext_nxv6i32_nxv6f64(<vscale x 6 x double> %val, ptr %base, <vscale x 6 x i32> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_zext_nxv6i32_nxv6f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vsll.vi v16, v16, 3
-; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_zext_nxv6i32_nxv6f64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vzext.vf2 v24, v16
 ; RV64-NEXT:    vsll.vi v16, v24, 3
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %eidxs = zext <vscale x 6 x i32> %idxs to <vscale x 6 x i64>
@@ -1976,17 +2021,18 @@ define void @vpscatter_baseidx_zext_nxv6i32_nxv6f64(<vscale x 6 x double> %val,
 define void @vpscatter_baseidx_nxv6f64(<vscale x 6 x double> %val, ptr %base, <vscale x 6 x i64> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_nxv6f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vnsrl.wi v24, v16, 0
 ; RV32-NEXT:    vsll.vi v16, v24, 3
-; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_nxv6f64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vsll.vi v16, v16, 3
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds double, ptr %base, <vscale x 6 x i64> %idxs
@@ -2015,18 +2061,19 @@ define void @vpscatter_nxv8f64(<vscale x 8 x double> %val, <vscale x 8 x ptr> %p
 define void @vpscatter_baseidx_nxv8i8_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_nxv8i8_nxv8f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf4 v20, v16
 ; RV32-NEXT:    vsll.vi v16, v20, 3
-; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_nxv8i8_nxv8f64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf8 v24, v16
 ; RV64-NEXT:    vsll.vi v16, v24, 3
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i8> %idxs
@@ -2037,18 +2084,19 @@ define void @vpscatter_baseidx_nxv8i8_nxv8f64(<vscale x 8 x double> %val, ptr %b
 define void @vpscatter_baseidx_sext_nxv8i8_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf4 v20, v16
 ; RV32-NEXT:    vsll.vi v16, v20, 3
-; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8f64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf8 v24, v16
 ; RV64-NEXT:    vsll.vi v16, v24, 3
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i64>
@@ -2060,19 +2108,19 @@ define void @vpscatter_baseidx_sext_nxv8i8_nxv8f64(<vscale x 8 x double> %val, p
 define void @vpscatter_baseidx_zext_nxv8i8_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
 ; RV32-NEXT:    vzext.vf2 v18, v16
 ; RV32-NEXT:    vsll.vi v16, v18, 3
-; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV32-NEXT:    vsoxei16.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8f64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
 ; RV64-NEXT:    vzext.vf2 v18, v16
 ; RV64-NEXT:    vsll.vi v16, v18, 3
-; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsoxei16.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i64>
@@ -2084,18 +2132,19 @@ define void @vpscatter_baseidx_zext_nxv8i8_nxv8f64(<vscale x 8 x double> %val, p
 define void @vpscatter_baseidx_nxv8i16_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_nxv8i16_nxv8f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf2 v20, v16
 ; RV32-NEXT:    vsll.vi v16, v20, 3
-; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_nxv8i16_nxv8f64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf4 v24, v16
 ; RV64-NEXT:    vsll.vi v16, v24, 3
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i16> %idxs
@@ -2106,18 +2155,19 @@ define void @vpscatter_baseidx_nxv8i16_nxv8f64(<vscale x 8 x double> %val, ptr %
 define void @vpscatter_baseidx_sext_nxv8i16_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_sext_nxv8i16_nxv8f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf2 v20, v16
 ; RV32-NEXT:    vsll.vi v16, v20, 3
-; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_sext_nxv8i16_nxv8f64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf4 v24, v16
 ; RV64-NEXT:    vsll.vi v16, v24, 3
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i64>
@@ -2129,19 +2179,19 @@ define void @vpscatter_baseidx_sext_nxv8i16_nxv8f64(<vscale x 8 x double> %val,
 define void @vpscatter_baseidx_zext_nxv8i16_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_zext_nxv8i16_nxv8f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vzext.vf2 v20, v16
 ; RV32-NEXT:    vsll.vi v16, v20, 3
-; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_zext_nxv8i16_nxv8f64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV64-NEXT:    vzext.vf2 v20, v16
 ; RV64-NEXT:    vsll.vi v16, v20, 3
-; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i64>
@@ -2153,17 +2203,18 @@ define void @vpscatter_baseidx_zext_nxv8i16_nxv8f64(<vscale x 8 x double> %val,
 define void @vpscatter_baseidx_nxv8i32_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_nxv8i32_nxv8f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vsll.vi v16, v16, 3
-; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_nxv8i32_nxv8f64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf2 v24, v16
 ; RV64-NEXT:    vsll.vi v16, v24, 3
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i32> %idxs
@@ -2174,17 +2225,18 @@ define void @vpscatter_baseidx_nxv8i32_nxv8f64(<vscale x 8 x double> %val, ptr %
 define void @vpscatter_baseidx_sext_nxv8i32_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_sext_nxv8i32_nxv8f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vsll.vi v16, v16, 3
-; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_sext_nxv8i32_nxv8f64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf2 v24, v16
 ; RV64-NEXT:    vsll.vi v16, v24, 3
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %eidxs = sext <vscale x 8 x i32> %idxs to <vscale x 8 x i64>
@@ -2196,17 +2248,18 @@ define void @vpscatter_baseidx_sext_nxv8i32_nxv8f64(<vscale x 8 x double> %val,
 define void @vpscatter_baseidx_zext_nxv8i32_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_zext_nxv8i32_nxv8f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vsll.vi v16, v16, 3
-; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_zext_nxv8i32_nxv8f64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vzext.vf2 v24, v16
 ; RV64-NEXT:    vsll.vi v16, v24, 3
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %eidxs = zext <vscale x 8 x i32> %idxs to <vscale x 8 x i64>
@@ -2218,17 +2271,18 @@ define void @vpscatter_baseidx_zext_nxv8i32_nxv8f64(<vscale x 8 x double> %val,
 define void @vpscatter_baseidx_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i64> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_nxv8f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vnsrl.wi v24, v16, 0
 ; RV32-NEXT:    vsll.vi v16, v24, 3
-; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_baseidx_nxv8f64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
 ; RV64-NEXT:    vsll.vi v16, v16, 3
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %idxs
diff --git a/llvm/test/CodeGen/RISCV/rvv/vrem-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vrem-vp.ll
index 9304e8c58f90db..2ef96f4b3896fc 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vrem-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vrem-vp.ll
@@ -12,7 +12,9 @@ define <vscale x 8 x i7> @vrem_vx_nxv8i7(<vscale x 8 x i7> %a, i7 signext %b, <v
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
 ; CHECK-NEXT:    vsll.vi v8, v8, 1, v0.t
 ; CHECK-NEXT:    vsra.vi v8, v8, 1, v0.t
+; CHECK-NEXT:    vsetvli a2, zero, e8, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
+; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
 ; CHECK-NEXT:    vsll.vi v9, v9, 1, v0.t
 ; CHECK-NEXT:    vsra.vi v9, v9, 1, v0.t
 ; CHECK-NEXT:    vrem.vv v8, v8, v9, v0.t
diff --git a/llvm/test/CodeGen/RISCV/rvv/vremu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vremu-vp.ll
index 1d4ee06cb9ac85..1f1ed4a1269acb 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vremu-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vremu-vp.ll
@@ -12,7 +12,9 @@ define <vscale x 8 x i7> @vremu_vx_nxv8i7(<vscale x 8 x i7> %a, i7 signext %b, <
 ; CHECK-NEXT:    li a2, 127
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
 ; CHECK-NEXT:    vand.vx v8, v8, a2, v0.t
+; CHECK-NEXT:    vsetvli a3, zero, e8, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
+; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
 ; CHECK-NEXT:    vand.vx v9, v9, a2, v0.t
 ; CHECK-NEXT:    vremu.vv v8, v8, v9, v0.t
 ; CHECK-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll
index f28153d427ebfe..027c81180d5f19 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll
@@ -1125,6 +1125,7 @@ exit:
 define <vscale x 4 x i32> @clobbered_forwarded_avl(i64 %n, <vscale x 4 x i32> %v, i1 %cmp) {
 ; CHECK-LABEL: clobbered_forwarded_avl:
 ; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    mv a2, a0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
 ; CHECK-NEXT:    andi a1, a1, 1
 ; CHECK-NEXT:  .LBB27_1: # %for.body
@@ -1132,7 +1133,9 @@ define <vscale x 4 x i32> @clobbered_forwarded_avl(i64 %n, <vscale x 4 x i32> %v
 ; CHECK-NEXT:    addi a0, a0, 1
 ; CHECK-NEXT:    bnez a1, .LBB27_1
 ; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
+; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vadd.vv v10, v8, v8
+; CHECK-NEXT:    vsetvli zero, a2, e32, m2, ta, ma
 ; CHECK-NEXT:    vadd.vv v8, v10, v8
 ; CHECK-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vshl-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vshl-vp.ll
index f5c46aec86b864..380835494ed17d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vshl-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vshl-vp.ll
@@ -9,9 +9,10 @@ declare <vscale x 8 x i7> @llvm.vp.shl.nxv8i7(<vscale x 8 x i7>, <vscale x 8 x i
 define <vscale x 8 x i7> @vsll_vx_nxv8i7(<vscale x 8 x i7> %a, i7 signext %b, <vscale x 8 x i1> %mask, i32 zeroext %evl) {
 ; CHECK-LABEL: vsll_vx_nxv8i7:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-NEXT:    vsetvli a2, zero, e8, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
 ; CHECK-NEXT:    li a0, 127
+; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
 ; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
 ; CHECK-NEXT:    vsll.vv v8, v8, v9, v0.t
 ; CHECK-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsra-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vsra-sdnode.ll
index 68ea51845392c9..382c8297473b78 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsra-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsra-sdnode.ll
@@ -944,9 +944,9 @@ define <vscale x 1 x i8> @vsra_vv_nxv1i8_sext_zext_mixed_trunc(<vscale x 1 x i8>
 ; CHECK-NEXT:    vsext.vf4 v9, v8
 ; CHECK-NEXT:    vzext.vf4 v10, v8
 ; CHECK-NEXT:    vsra.vv v8, v9, v10
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
 ; CHECK-NEXT:    vnsrl.wi v8, v8, 0
-; CHECK-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
 ; CHECK-NEXT:    vnsrl.wi v8, v8, 0, v0.t
 ; CHECK-NEXT:    ret
   %sexted_va = sext <vscale x 1 x i8> %va to <vscale x 1 x i32>
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsra-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vsra-vp.ll
index ecce91982b14a4..cff8cc710d21f3 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsra-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsra-vp.ll
@@ -12,8 +12,10 @@ define <vscale x 8 x i7> @vsra_vx_nxv8i7(<vscale x 8 x i7> %a, i7 signext %b, <v
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
 ; CHECK-NEXT:    vsll.vi v8, v8, 1, v0.t
 ; CHECK-NEXT:    vsra.vi v8, v8, 1, v0.t
+; CHECK-NEXT:    vsetvli a2, zero, e8, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
 ; CHECK-NEXT:    li a0, 127
+; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
 ; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
 ; CHECK-NEXT:    vsra.vv v8, v8, v9, v0.t
 ; CHECK-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsrl-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vsrl-vp.ll
index 9431b9abc91fbf..ff6771b643031f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsrl-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsrl-vp.ll
@@ -12,7 +12,9 @@ define <vscale x 8 x i7> @vsrl_vx_nxv8i7(<vscale x 8 x i7> %a, i7 signext %b, <v
 ; CHECK-NEXT:    li a2, 127
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
 ; CHECK-NEXT:    vand.vx v8, v8, a2, v0.t
+; CHECK-NEXT:    vsetvli a3, zero, e8, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
+; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
 ; CHECK-NEXT:    vand.vx v9, v9, a2, v0.t
 ; CHECK-NEXT:    vsrl.vv v8, v8, v9, v0.t
 ; CHECK-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/vssub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vssub-vp.ll
index bab66aad8c7e2a..613b58b0f1b88a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vssub-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vssub-vp.ll
@@ -62,8 +62,9 @@ define <vscale x 1 x i8> @vssub_vx_nxv1i8(<vscale x 1 x i8> %va, i8 %b, <vscale
 define <vscale x 1 x i8> @vssub_vx_nxv1i8_commute(<vscale x 1 x i8> %va, i8 %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vssub_vx_nxv1i8_commute:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT:    vsetvli a2, zero, e8, mf8, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
 ; CHECK-NEXT:    vssub.vv v8, v9, v8, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x i8> poison, i8 %b, i32 0
diff --git a/llvm/test/CodeGen/RISCV/rvv/vssubu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vssubu-vp.ll
index ca56145260f51c..8c729d7d9bfb6e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vssubu-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vssubu-vp.ll
@@ -60,8 +60,9 @@ define <vscale x 1 x i8> @vssubu_vx_nxv1i8(<vscale x 1 x i8> %va, i8 %b, <vscale
 define <vscale x 1 x i8> @vssubu_vx_nxv1i8_commute(<vscale x 1 x i8> %va, i8 %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vssubu_vx_nxv1i8_commute:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT:    vsetvli a2, zero, e8, mf8, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
 ; CHECK-NEXT:    vssubu.vv v8, v9, v8, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x i8> poison, i8 %b, i32 0
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwsll-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vwsll-vp.ll
index c30c4763dd46d5..bb3076b3a945e8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwsll-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwsll-vp.ll
@@ -13,9 +13,10 @@ declare <vscale x 2 x i64> @llvm.vp.shl.nxv2i64(<vscale x 2 x i64>, <vscale x 2
 define <vscale x 2 x i64> @vwsll_vv_nxv2i64_sext(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b, <vscale x 2 x i1> %m, i32 zeroext %vl) {
 ; CHECK-LABEL: vwsll_vv_nxv2i64_sext:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-NEXT:    vsetvli a1, zero, e64, m2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v10, v8
 ; CHECK-NEXT:    vsext.vf2 v12, v9
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
 ; CHECK-NEXT:    vsll.vv v8, v10, v12, v0.t
 ; CHECK-NEXT:    ret
 ;
@@ -34,9 +35,10 @@ define <vscale x 2 x i64> @vwsll_vv_nxv2i64_sext(<vscale x 2 x i32> %a, <vscale
 define <vscale x 2 x i64> @vwsll_vv_nxv2i64_zext(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b, <vscale x 2 x i1> %m, i32 zeroext %vl) {
 ; CHECK-LABEL: vwsll_vv_nxv2i64_zext:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-NEXT:    vsetvli a1, zero, e64, m2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v10, v8
 ; CHECK-NEXT:    vzext.vf2 v12, v9
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
 ; CHECK-NEXT:    vsll.vv v8, v10, v12, v0.t
 ; CHECK-NEXT:    ret
 ;
@@ -55,15 +57,17 @@ define <vscale x 2 x i64> @vwsll_vv_nxv2i64_zext(<vscale x 2 x i32> %a, <vscale
 define <vscale x 2 x i64> @vwsll_vx_i64_nxv2i64(<vscale x 2 x i32> %a, i64 %b, <vscale x 2 x i1> %m, i32 zeroext %vl) {
 ; CHECK-RV32-LABEL: vwsll_vx_i64_nxv2i64:
 ; CHECK-RV32:       # %bb.0:
-; CHECK-RV32-NEXT:    vsetvli zero, a2, e64, m2, ta, ma
+; CHECK-RV32-NEXT:    vsetvli a1, zero, e64, m2, ta, ma
 ; CHECK-RV32-NEXT:    vzext.vf2 v10, v8
+; CHECK-RV32-NEXT:    vsetvli zero, a2, e64, m2, ta, ma
 ; CHECK-RV32-NEXT:    vsll.vx v8, v10, a0, v0.t
 ; CHECK-RV32-NEXT:    ret
 ;
 ; CHECK-RV64-LABEL: vwsll_vx_i64_nxv2i64:
 ; CHECK-RV64:       # %bb.0:
-; CHECK-RV64-NEXT:    vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-RV64-NEXT:    vsetvli a2, zero, e64, m2, ta, ma
 ; CHECK-RV64-NEXT:    vzext.vf2 v10, v8
+; CHECK-RV64-NEXT:    vsetvli zero, a1, e64, m2, ta, ma
 ; CHECK-RV64-NEXT:    vsll.vx v8, v10, a0, v0.t
 ; CHECK-RV64-NEXT:    ret
 ;
@@ -90,11 +94,12 @@ define <vscale x 2 x i64> @vwsll_vx_i64_nxv2i64(<vscale x 2 x i32> %a, i64 %b, <
 define <vscale x 2 x i64> @vwsll_vx_i32_nxv2i64_sext(<vscale x 2 x i32> %a, i32 %b, <vscale x 2 x i1> %m, i32 zeroext %vl) {
 ; CHECK-LABEL: vwsll_vx_i32_nxv2i64_sext:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v10, v8
 ; CHECK-NEXT:    vsext.vf2 v12, v9
+; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, ma
 ; CHECK-NEXT:    vsll.vv v8, v10, v12, v0.t
 ; CHECK-NEXT:    ret
 ;
@@ -115,11 +120,12 @@ define <vscale x 2 x i64> @vwsll_vx_i32_nxv2i64_sext(<vscale x 2 x i32> %a, i32
 define <vscale x 2 x i64> @vwsll_vx_i32_nxv2i64_zext(<vscale x 2 x i32> %a, i32 %b, <vscale x 2 x i1> %m, i32 zeroext %vl) {
 ; CHECK-LABEL: vwsll_vx_i32_nxv2i64_zext:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v10, v8
 ; CHECK-NEXT:    vzext.vf2 v12, v9
+; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, ma
 ; CHECK-NEXT:    vsll.vv v8, v10, v12, v0.t
 ; CHECK-NEXT:    ret
 ;
@@ -140,11 +146,12 @@ define <vscale x 2 x i64> @vwsll_vx_i32_nxv2i64_zext(<vscale x 2 x i32> %a, i32
 define <vscale x 2 x i64> @vwsll_vx_i16_nxv2i64_sext(<vscale x 2 x i32> %a, i16 %b, <vscale x 2 x i1> %m, i32 zeroext %vl) {
 ; CHECK-LABEL: vwsll_vx_i16_nxv2i64_sext:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-NEXT:    vsetvli a2, zero, e16, mf2, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v10, v8
 ; CHECK-NEXT:    vsext.vf4 v12, v9
+; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, ma
 ; CHECK-NEXT:    vsll.vv v8, v10, v12, v0.t
 ; CHECK-NEXT:    ret
 ;
@@ -165,11 +172,12 @@ define <vscale x 2 x i64> @vwsll_vx_i16_nxv2i64_sext(<vscale x 2 x i32> %a, i16
 define <vscale x 2 x i64> @vwsll_vx_i16_nxv2i64_zext(<vscale x 2 x i32> %a, i16 %b, <vscale x 2 x i1> %m, i32 zeroext %vl) {
 ; CHECK-LABEL: vwsll_vx_i16_nxv2i64_zext:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-NEXT:    vsetvli a2, zero, e16, mf2, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v10, v8
 ; CHECK-NEXT:    vzext.vf4 v12, v9
+; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, ma
 ; CHECK-NEXT:    vsll.vv v8, v10, v12, v0.t
 ; CHECK-NEXT:    ret
 ;
@@ -190,11 +198,12 @@ define <vscale x 2 x i64> @vwsll_vx_i16_nxv2i64_zext(<vscale x 2 x i32> %a, i16
 define <vscale x 2 x i64> @vwsll_vx_i8_nxv2i64_sext(<vscale x 2 x i32> %a, i8 %b, <vscale x 2 x i1> %m, i32 zeroext %vl) {
 ; CHECK-LABEL: vwsll_vx_i8_nxv2i64_sext:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-NEXT:    vsetvli a2, zero, e8, mf4, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v10, v8
 ; CHECK-NEXT:    vsext.vf8 v12, v9
+; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, ma
 ; CHECK-NEXT:    vsll.vv v8, v10, v12, v0.t
 ; CHECK-NEXT:    ret
 ;
@@ -215,11 +224,12 @@ define <vscale x 2 x i64> @vwsll_vx_i8_nxv2i64_sext(<vscale x 2 x i32> %a, i8 %b
 define <vscale x 2 x i64> @vwsll_vx_i8_nxv2i64_zext(<vscale x 2 x i32> %a, i8 %b, <vscale x 2 x i1> %m, i32 zeroext %vl) {
 ; CHECK-LABEL: vwsll_vx_i8_nxv2i64_zext:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-NEXT:    vsetvli a2, zero, e8, mf4, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v10, v8
 ; CHECK-NEXT:    vzext.vf8 v12, v9
+; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, ma
 ; CHECK-NEXT:    vsll.vv v8, v10, v12, v0.t
 ; CHECK-NEXT:    ret
 ;
@@ -240,8 +250,9 @@ define <vscale x 2 x i64> @vwsll_vx_i8_nxv2i64_zext(<vscale x 2 x i32> %a, i8 %b
 define <vscale x 2 x i64> @vwsll_vi_nxv2i64(<vscale x 2 x i32> %a, <vscale x 2 x i1> %m, i32 zeroext %vl) {
 ; CHECK-LABEL: vwsll_vi_nxv2i64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-NEXT:    vsetvli a1, zero, e64, m2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v10, v8
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
 ; CHECK-NEXT:    vsll.vi v8, v10, 2, v0.t
 ; CHECK-NEXT:    ret
 ;
@@ -265,9 +276,10 @@ declare <vscale x 4 x i32> @llvm.vp.shl.nxv4i32(<vscale x 4 x i32>, <vscale x 4
 define <vscale x 4 x i32> @vwsll_vv_nxv4i32_sext(<vscale x 4 x i16> %a, <vscale x 4 x i16> %b, <vscale x 4 x i1> %m, i32 zeroext %vl) {
 ; CHECK-LABEL: vwsll_vv_nxv4i32_sext:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v10, v8
 ; CHECK-NEXT:    vsext.vf2 v12, v9
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
 ; CHECK-NEXT:    vsll.vv v8, v10, v12, v0.t
 ; CHECK-NEXT:    ret
 ;
@@ -286,9 +298,10 @@ define <vscale x 4 x i32> @vwsll_vv_nxv4i32_sext(<vscale x 4 x i16> %a, <vscale
 define <vscale x 4 x i32> @vwsll_vv_nxv4i32_zext(<vscale x 4 x i16> %a, <vscale x 4 x i16> %b, <vscale x 4 x i1> %m, i32 zeroext %vl) {
 ; CHECK-LABEL: vwsll_vv_nxv4i32_zext:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v10, v8
 ; CHECK-NEXT:    vzext.vf2 v12, v9
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
 ; CHECK-NEXT:    vsll.vv v8, v10, v12, v0.t
 ; CHECK-NEXT:    ret
 ;
@@ -307,15 +320,17 @@ define <vscale x 4 x i32> @vwsll_vv_nxv4i32_zext(<vscale x 4 x i16> %a, <vscale
 define <vscale x 4 x i32> @vwsll_vx_i64_nxv4i32(<vscale x 4 x i16> %a, i64 %b, <vscale x 4 x i1> %m, i32 zeroext %vl) {
 ; CHECK-RV32-LABEL: vwsll_vx_i64_nxv4i32:
 ; CHECK-RV32:       # %bb.0:
-; CHECK-RV32-NEXT:    vsetvli zero, a2, e32, m2, ta, ma
+; CHECK-RV32-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
 ; CHECK-RV32-NEXT:    vzext.vf2 v10, v8
+; CHECK-RV32-NEXT:    vsetvli zero, a2, e32, m2, ta, ma
 ; CHECK-RV32-NEXT:    vsll.vx v8, v10, a0, v0.t
 ; CHECK-RV32-NEXT:    ret
 ;
 ; CHECK-RV64-LABEL: vwsll_vx_i64_nxv4i32:
 ; CHECK-RV64:       # %bb.0:
-; CHECK-RV64-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-RV64-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
 ; CHECK-RV64-NEXT:    vzext.vf2 v10, v8
+; CHECK-RV64-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
 ; CHECK-RV64-NEXT:    vsll.vx v8, v10, a0, v0.t
 ; CHECK-RV64-NEXT:    ret
 ;
@@ -343,8 +358,9 @@ define <vscale x 4 x i32> @vwsll_vx_i64_nxv4i32(<vscale x 4 x i16> %a, i64 %b, <
 define <vscale x 4 x i32> @vwsll_vx_i32_nxv4i32(<vscale x 4 x i16> %a, i32 %b, <vscale x 4 x i1> %m, i32 zeroext %vl) {
 ; CHECK-LABEL: vwsll_vx_i32_nxv4i32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v10, v8
+; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
 ; CHECK-NEXT:    vsll.vx v8, v10, a0, v0.t
 ; CHECK-NEXT:    ret
 ;
@@ -364,11 +380,12 @@ define <vscale x 4 x i32> @vwsll_vx_i32_nxv4i32(<vscale x 4 x i16> %a, i32 %b, <
 define <vscale x 4 x i32> @vwsll_vx_i16_nxv4i32_sext(<vscale x 4 x i16> %a, i16 %b, <vscale x 4 x i1> %m, i32 zeroext %vl) {
 ; CHECK-LABEL: vwsll_vx_i16_nxv4i32_sext:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-NEXT:    vsetvli a2, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v10, v8
 ; CHECK-NEXT:    vsext.vf2 v12, v9
+; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
 ; CHECK-NEXT:    vsll.vv v8, v10, v12, v0.t
 ; CHECK-NEXT:    ret
 ;
@@ -389,11 +406,12 @@ define <vscale x 4 x i32> @vwsll_vx_i16_nxv4i32_sext(<vscale x 4 x i16> %a, i16
 define <vscale x 4 x i32> @vwsll_vx_i16_nxv4i32_zext(<vscale x 4 x i16> %a, i16 %b, <vscale x 4 x i1> %m, i32 zeroext %vl) {
 ; CHECK-LABEL: vwsll_vx_i16_nxv4i32_zext:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-NEXT:    vsetvli a2, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v10, v8
 ; CHECK-NEXT:    vzext.vf2 v12, v9
+; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
 ; CHECK-NEXT:    vsll.vv v8, v10, v12, v0.t
 ; CHECK-NEXT:    ret
 ;
@@ -414,11 +432,12 @@ define <vscale x 4 x i32> @vwsll_vx_i16_nxv4i32_zext(<vscale x 4 x i16> %a, i16
 define <vscale x 4 x i32> @vwsll_vx_i8_nxv4i32_sext(<vscale x 4 x i16> %a, i8 %b, <vscale x 4 x i1> %m, i32 zeroext %vl) {
 ; CHECK-LABEL: vwsll_vx_i8_nxv4i32_sext:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-NEXT:    vsetvli a2, zero, e8, mf2, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v10, v8
 ; CHECK-NEXT:    vsext.vf4 v12, v9
+; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
 ; CHECK-NEXT:    vsll.vv v8, v10, v12, v0.t
 ; CHECK-NEXT:    ret
 ;
@@ -439,11 +458,12 @@ define <vscale x 4 x i32> @vwsll_vx_i8_nxv4i32_sext(<vscale x 4 x i16> %a, i8 %b
 define <vscale x 4 x i32> @vwsll_vx_i8_nxv4i32_zext(<vscale x 4 x i16> %a, i8 %b, <vscale x 4 x i1> %m, i32 zeroext %vl) {
 ; CHECK-LABEL: vwsll_vx_i8_nxv4i32_zext:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-NEXT:    vsetvli a2, zero, e8, mf2, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v10, v8
 ; CHECK-NEXT:    vzext.vf4 v12, v9
+; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
 ; CHECK-NEXT:    vsll.vv v8, v10, v12, v0.t
 ; CHECK-NEXT:    ret
 ;
@@ -464,8 +484,9 @@ define <vscale x 4 x i32> @vwsll_vx_i8_nxv4i32_zext(<vscale x 4 x i16> %a, i8 %b
 define <vscale x 4 x i32> @vwsll_vi_nxv4i32(<vscale x 4 x i16> %a, <vscale x 4 x i1> %m, i32 zeroext %vl) {
 ; CHECK-LABEL: vwsll_vi_nxv4i32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v10, v8
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
 ; CHECK-NEXT:    vsll.vi v8, v10, 2, v0.t
 ; CHECK-NEXT:    ret
 ;
@@ -490,9 +511,10 @@ declare <vscale x 8 x i16> @llvm.vp.shl.nxv8i16(<vscale x 8 x i16>, <vscale x 8
 define <vscale x 8 x i16> @vwsll_vv_nxv8i16_sext(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b, <vscale x 8 x i1> %m, i32 zeroext %vl) {
 ; CHECK-LABEL: vwsll_vv_nxv8i16_sext:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v10, v8
 ; CHECK-NEXT:    vsext.vf2 v12, v9
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; CHECK-NEXT:    vsll.vv v8, v10, v12, v0.t
 ; CHECK-NEXT:    ret
 ;
@@ -511,9 +533,10 @@ define <vscale x 8 x i16> @vwsll_vv_nxv8i16_sext(<vscale x 8 x i8> %a, <vscale x
 define <vscale x 8 x i16> @vwsll_vv_nxv8i16_zext(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b, <vscale x 8 x i1> %m, i32 zeroext %vl) {
 ; CHECK-LABEL: vwsll_vv_nxv8i16_zext:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v10, v8
 ; CHECK-NEXT:    vzext.vf2 v12, v9
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; CHECK-NEXT:    vsll.vv v8, v10, v12, v0.t
 ; CHECK-NEXT:    ret
 ;
@@ -532,15 +555,17 @@ define <vscale x 8 x i16> @vwsll_vv_nxv8i16_zext(<vscale x 8 x i8> %a, <vscale x
 define <vscale x 8 x i16> @vwsll_vx_i64_nxv8i16(<vscale x 8 x i8> %a, i64 %b, <vscale x 8 x i1> %m, i32 zeroext %vl) {
 ; CHECK-RV32-LABEL: vwsll_vx_i64_nxv8i16:
 ; CHECK-RV32:       # %bb.0:
-; CHECK-RV32-NEXT:    vsetvli zero, a2, e16, m2, ta, ma
+; CHECK-RV32-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
 ; CHECK-RV32-NEXT:    vzext.vf2 v10, v8
+; CHECK-RV32-NEXT:    vsetvli zero, a2, e16, m2, ta, ma
 ; CHECK-RV32-NEXT:    vsll.vx v8, v10, a0, v0.t
 ; CHECK-RV32-NEXT:    ret
 ;
 ; CHECK-RV64-LABEL: vwsll_vx_i64_nxv8i16:
 ; CHECK-RV64:       # %bb.0:
-; CHECK-RV64-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-RV64-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
 ; CHECK-RV64-NEXT:    vzext.vf2 v10, v8
+; CHECK-RV64-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
 ; CHECK-RV64-NEXT:    vsll.vx v8, v10, a0, v0.t
 ; CHECK-RV64-NEXT:    ret
 ;
@@ -568,8 +593,9 @@ define <vscale x 8 x i16> @vwsll_vx_i64_nxv8i16(<vscale x 8 x i8> %a, i64 %b, <v
 define <vscale x 8 x i16> @vwsll_vx_i32_nxv8i16(<vscale x 8 x i8> %a, i32 %b, <vscale x 8 x i1> %m, i32 zeroext %vl) {
 ; CHECK-LABEL: vwsll_vx_i32_nxv8i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v10, v8
+; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
 ; CHECK-NEXT:    vsll.vx v8, v10, a0, v0.t
 ; CHECK-NEXT:    ret
 ;
@@ -590,8 +616,9 @@ define <vscale x 8 x i16> @vwsll_vx_i32_nxv8i16(<vscale x 8 x i8> %a, i32 %b, <v
 define <vscale x 8 x i16> @vwsll_vx_i16_nxv8i16(<vscale x 8 x i8> %a, i16 %b, <vscale x 8 x i1> %m, i32 zeroext %vl) {
 ; CHECK-LABEL: vwsll_vx_i16_nxv8i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v10, v8
+; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
 ; CHECK-NEXT:    vsll.vx v8, v10, a0, v0.t
 ; CHECK-NEXT:    ret
 ;
@@ -611,11 +638,12 @@ define <vscale x 8 x i16> @vwsll_vx_i16_nxv8i16(<vscale x 8 x i8> %a, i16 %b, <v
 define <vscale x 8 x i16> @vwsll_vx_i8_nxv8i16_sext(<vscale x 8 x i8> %a, i8 %b, <vscale x 8 x i1> %m, i32 zeroext %vl) {
 ; CHECK-LABEL: vwsll_vx_i8_nxv8i16_sext:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-NEXT:    vsetvli a2, zero, e8, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v10, v8
 ; CHECK-NEXT:    vsext.vf2 v12, v9
+; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
 ; CHECK-NEXT:    vsll.vv v8, v10, v12, v0.t
 ; CHECK-NEXT:    ret
 ;
@@ -636,11 +664,12 @@ define <vscale x 8 x i16> @vwsll_vx_i8_nxv8i16_sext(<vscale x 8 x i8> %a, i8 %b,
 define <vscale x 8 x i16> @vwsll_vx_i8_nxv8i16_zext(<vscale x 8 x i8> %a, i8 %b, <vscale x 8 x i1> %m, i32 zeroext %vl) {
 ; CHECK-LABEL: vwsll_vx_i8_nxv8i16_zext:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-NEXT:    vsetvli a2, zero, e8, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v10, v8
 ; CHECK-NEXT:    vzext.vf2 v12, v9
+; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
 ; CHECK-NEXT:    vsll.vv v8, v10, v12, v0.t
 ; CHECK-NEXT:    ret
 ;
@@ -661,8 +690,9 @@ define <vscale x 8 x i16> @vwsll_vx_i8_nxv8i16_zext(<vscale x 8 x i8> %a, i8 %b,
 define <vscale x 8 x i16> @vwsll_vi_nxv8i16(<vscale x 8 x i8> %a, <vscale x 8 x i1> %m, i32 zeroext %vl) {
 ; CHECK-LABEL: vwsll_vi_nxv8i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v10, v8
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; CHECK-NEXT:    vsll.vi v8, v10, 2, v0.t
 ; CHECK-NEXT:    ret
 ;

>From 6fc029059ce56ffc5f6d08a0e4261d21478bbc27 Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Mon, 30 Sep 2024 08:37:40 -0700
Subject: [PATCH 14/17] fixup! cleanup and respond to comments

---
 llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp | 45 +++++++---------------
 1 file changed, 13 insertions(+), 32 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
index 48308b2f822637..d90acb5c2352ba 100644
--- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
@@ -24,8 +24,6 @@
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/InitializePasses.h"
 
-#include <algorithm>
-
 using namespace llvm;
 
 #define DEBUG_TYPE "riscv-vl-optimizer"
@@ -91,18 +89,12 @@ struct OperandInfo {
 
   unsigned Log2EEW;
 
-  // true if this is a vector register that only uses element 0 of the register.
-  bool IsScalar;
-
   OperandInfo(RISCVII::VLMUL EMUL, unsigned Log2EEW)
-      : S(State::Known), EMUL(RISCVVType::decodeVLMUL(EMUL)), Log2EEW(Log2EEW),
-        IsScalar(false) {}
+      : S(State::Known), EMUL(RISCVVType::decodeVLMUL(EMUL)), Log2EEW(Log2EEW) {
+  }
 
   OperandInfo(std::pair<unsigned, bool> EMUL, unsigned Log2EEW)
-      : S(State::Known), EMUL(EMUL), Log2EEW(Log2EEW), IsScalar(false) {}
-
-  OperandInfo(unsigned Log2EEW)
-      : S(State::Known), EMUL(std::nullopt), Log2EEW(Log2EEW), IsScalar(true) {}
+      : S(State::Known), EMUL(EMUL), Log2EEW(Log2EEW) {}
 
   OperandInfo() : S(State::Unknown) {}
 
@@ -112,11 +104,6 @@ struct OperandInfo {
   static bool EMULAndEEWAreEqual(const OperandInfo &A, const OperandInfo &B) {
     assert(A.isKnown() && B.isKnown() && "Both operands must be known");
 
-    if (A.IsScalar != B.IsScalar)
-      return false;
-    if (A.IsScalar && B.IsScalar)
-      return A.Log2EEW == B.Log2EEW;
-
     return A.Log2EEW == B.Log2EEW && A.EMUL->first == B.EMUL->first &&
            A.EMUL->second == B.EMUL->second;
   }
@@ -126,10 +113,6 @@ struct OperandInfo {
       OS << "Unknown";
       return;
     }
-    if (IsScalar) {
-      OS << "EEW:" << (1 << Log2EEW);
-      return;
-    }
     assert(EMUL && "Expected EMUL to have value");
     OS << "EMUL: ";
     if (EMUL->second)
@@ -191,16 +174,6 @@ getEMULEqualsEEWDivSEWTimesLMUL(unsigned Log2EEW, const MachineInstr &MI) {
 } // end namespace RISCVVType
 } // end namespace llvm
 
-static bool isOpN(const MachineOperand &MO, unsigned OpN) {
-  const MachineInstr &MI = *MO.getParent();
-  bool HasPassthru = RISCVII::isFirstDefTiedToFirstUse(MI.getDesc());
-
-  if (HasPassthru)
-    return MO.getOperandNo() == OpN + 1;
-
-  return MO.getOperandNo() == OpN;
-}
-
 /// Dest has EEW=SEW and EMUL=LMUL. Source EEW=SEW/Factor (i.e. F2 => EEW/2).
 /// Source has EMUL=(EEW/SEW)*LMUL. LMUL and SEW comes from TSFlags of MI.
 static OperandInfo getIntegerExtensionOperandInfo(unsigned Factor,
@@ -257,7 +230,6 @@ static OperandInfo getOperandInfo(const MachineInstr &MI,
     return {};
 
   bool IsMODef = MO.getOperandNo() == 0;
-  bool IsOp1 = isOpN(MO, 1);
 
   // All mask operands have EEW=1, EMUL=(EEW/SEW)*LMUL
   if (isMaskOperand(MI, MO, MRI))
@@ -500,6 +472,8 @@ static OperandInfo getOperandInfo(const MachineInstr &MI,
   case RISCV::VWADD_WX:
   case RISCV::VWSUB_WV:
   case RISCV::VWSUB_WX: {
+    bool HasPassthru = RISCVII::isFirstDefTiedToFirstUse(MI.getDesc());
+    bool IsOp1 = HasPassthru ? MO.getOperandNo() == 1 : MO.getOperandNo() == 2;
     bool TwoTimes = IsMODef || IsOp1;
     unsigned Log2EEW = TwoTimes ? MILog2SEW + 1 : MILog2SEW;
     RISCVII::VLMUL EMUL =
@@ -549,6 +523,8 @@ static OperandInfo getOperandInfo(const MachineInstr &MI,
   case RISCV::VNSRA_WI:
   case RISCV::VNSRA_WV:
   case RISCV::VNSRA_WX: {
+    bool HasPassthru = RISCVII::isFirstDefTiedToFirstUse(MI.getDesc());
+    bool IsOp1 = HasPassthru ? MO.getOperandNo() == 1 : MO.getOperandNo() == 2;
     bool TwoTimes = IsOp1;
     unsigned Log2EEW = TwoTimes ? MILog2SEW + 1 : MILog2SEW;
     RISCVII::VLMUL EMUL =
@@ -630,6 +606,8 @@ static OperandInfo getOperandInfo(const MachineInstr &MI,
   case RISCV::VWMACCUS_VX: {
     // Operand 0 is destination as a def and Operand 1 is destination as a use
     // due to SSA.
+    bool HasPassthru = RISCVII::isFirstDefTiedToFirstUse(MI.getDesc());
+    bool IsOp1 = HasPassthru ? MO.getOperandNo() == 1 : MO.getOperandNo() == 2;
     bool TwoTimes = IsMODef || IsOp1;
     unsigned Log2EEW = TwoTimes ? MILog2SEW + 1 : MILog2SEW;
     RISCVII::VLMUL EMUL =
@@ -701,6 +679,8 @@ static OperandInfo getOperandInfo(const MachineInstr &MI,
   case RISCV::VNCLIP_WI:
   case RISCV::VNCLIP_WV:
   case RISCV::VNCLIP_WX: {
+    bool HasPassthru = RISCVII::isFirstDefTiedToFirstUse(MI.getDesc());
+    bool IsOp1 = HasPassthru ? MO.getOperandNo() == 1 : MO.getOperandNo() == 2;
     bool TwoTimes = !IsMODef && IsOp1;
     unsigned Log2EEW = TwoTimes ? MILog2SEW + 1 : MILog2SEW;
     RISCVII::VLMUL EMUL =
@@ -864,7 +844,8 @@ static bool isVectorOpUsedAsScalarOp(MachineOperand &MO) {
   case RISCV::VFREDUSUM_VS:
   case RISCV::VFWREDOSUM_VS:
   case RISCV::VFWREDUSUM_VS: {
-    return isOpN(MO, 2);
+    bool HasPassthru = RISCVII::isFirstDefTiedToFirstUse(MI->getDesc());
+    return HasPassthru ? MO.getOperandNo() == 2 : MO.getOperandNo() == 3;
   }
   default:
     return false;

>From c2d007086e45d06aee7804db057d0a400a6474eb Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Mon, 30 Sep 2024 18:17:02 -0700
Subject: [PATCH 15/17] fixup! more reorganization

---
 llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp    | 413 +++++-------------
 llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.ll |  48 +-
 2 files changed, 122 insertions(+), 339 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
index d90acb5c2352ba..16d5d16b5fbd2a 100644
--- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
@@ -247,161 +247,6 @@ static OperandInfo getOperandInfo(const MachineInstr &MI,
     llvm_unreachable("Configuration setting instructions do not read or write "
                      "vector registers");
 
-  // 7.8. Vector Load/Store Segment Instructions
-  // 7.8.1. Vector Unit-Stride Segment Loads and Stores
-  // v.*seg<nf>e<eew>.*
-  // EEW=eew, EMUL=LMUL
-  case RISCV::VLSEG2E8_V:
-  case RISCV::VLSEG2E8FF_V:
-  case RISCV::VLSEG3E8_V:
-  case RISCV::VLSEG3E8FF_V:
-  case RISCV::VLSEG4E8_V:
-  case RISCV::VLSEG4E8FF_V:
-  case RISCV::VLSEG5E8_V:
-  case RISCV::VLSEG5E8FF_V:
-  case RISCV::VLSEG6E8_V:
-  case RISCV::VLSEG6E8FF_V:
-  case RISCV::VLSEG7E8_V:
-  case RISCV::VLSEG7E8FF_V:
-  case RISCV::VLSEG8E8_V:
-  case RISCV::VLSEG8E8FF_V:
-  case RISCV::VSSEG2E8_V:
-  case RISCV::VSSEG3E8_V:
-  case RISCV::VSSEG4E8_V:
-  case RISCV::VSSEG5E8_V:
-  case RISCV::VSSEG6E8_V:
-  case RISCV::VSSEG7E8_V:
-  case RISCV::VSSEG8E8_V:
-    return OperandInfo(MIVLMul, 3);
-  case RISCV::VLSEG2E16_V:
-  case RISCV::VLSEG2E16FF_V:
-  case RISCV::VLSEG3E16_V:
-  case RISCV::VLSEG3E16FF_V:
-  case RISCV::VLSEG4E16_V:
-  case RISCV::VLSEG4E16FF_V:
-  case RISCV::VLSEG5E16_V:
-  case RISCV::VLSEG5E16FF_V:
-  case RISCV::VLSEG6E16_V:
-  case RISCV::VLSEG6E16FF_V:
-  case RISCV::VLSEG7E16_V:
-  case RISCV::VLSEG7E16FF_V:
-  case RISCV::VLSEG8E16_V:
-  case RISCV::VLSEG8E16FF_V:
-  case RISCV::VSSEG2E16_V:
-  case RISCV::VSSEG3E16_V:
-  case RISCV::VSSEG4E16_V:
-  case RISCV::VSSEG5E16_V:
-  case RISCV::VSSEG6E16_V:
-  case RISCV::VSSEG7E16_V:
-  case RISCV::VSSEG8E16_V:
-    return OperandInfo(MIVLMul, 4);
-  case RISCV::VLSEG2E32_V:
-  case RISCV::VLSEG2E32FF_V:
-  case RISCV::VLSEG3E32_V:
-  case RISCV::VLSEG3E32FF_V:
-  case RISCV::VLSEG4E32_V:
-  case RISCV::VLSEG4E32FF_V:
-  case RISCV::VLSEG5E32_V:
-  case RISCV::VLSEG5E32FF_V:
-  case RISCV::VLSEG6E32_V:
-  case RISCV::VLSEG6E32FF_V:
-  case RISCV::VLSEG7E32_V:
-  case RISCV::VLSEG7E32FF_V:
-  case RISCV::VLSEG8E32_V:
-  case RISCV::VLSEG8E32FF_V:
-  case RISCV::VSSEG2E32_V:
-  case RISCV::VSSEG3E32_V:
-  case RISCV::VSSEG4E32_V:
-  case RISCV::VSSEG5E32_V:
-  case RISCV::VSSEG6E32_V:
-  case RISCV::VSSEG7E32_V:
-  case RISCV::VSSEG8E32_V:
-    return OperandInfo(MIVLMul, 5);
-  case RISCV::VLSEG2E64_V:
-  case RISCV::VLSEG2E64FF_V:
-  case RISCV::VLSEG3E64_V:
-  case RISCV::VLSEG3E64FF_V:
-  case RISCV::VLSEG4E64_V:
-  case RISCV::VLSEG4E64FF_V:
-  case RISCV::VLSEG5E64_V:
-  case RISCV::VLSEG5E64FF_V:
-  case RISCV::VLSEG6E64_V:
-  case RISCV::VLSEG6E64FF_V:
-  case RISCV::VLSEG7E64_V:
-  case RISCV::VLSEG7E64FF_V:
-  case RISCV::VLSEG8E64_V:
-  case RISCV::VLSEG8E64FF_V:
-  case RISCV::VSSEG2E64_V:
-  case RISCV::VSSEG3E64_V:
-  case RISCV::VSSEG4E64_V:
-  case RISCV::VSSEG5E64_V:
-  case RISCV::VSSEG6E64_V:
-  case RISCV::VSSEG7E64_V:
-  case RISCV::VSSEG8E64_V:
-    return OperandInfo(MIVLMul, 6);
-
-  // 7.8.2. Vector Strided Segment Loads and Stores
-  case RISCV::VLSSEG2E8_V:
-  case RISCV::VLSSEG3E8_V:
-  case RISCV::VLSSEG4E8_V:
-  case RISCV::VLSSEG5E8_V:
-  case RISCV::VLSSEG6E8_V:
-  case RISCV::VLSSEG7E8_V:
-  case RISCV::VLSSEG8E8_V:
-  case RISCV::VSSSEG2E8_V:
-  case RISCV::VSSSEG3E8_V:
-  case RISCV::VSSSEG4E8_V:
-  case RISCV::VSSSEG5E8_V:
-  case RISCV::VSSSEG6E8_V:
-  case RISCV::VSSSEG7E8_V:
-  case RISCV::VSSSEG8E8_V:
-    return OperandInfo(MIVLMul, 3);
-  case RISCV::VLSSEG2E16_V:
-  case RISCV::VLSSEG3E16_V:
-  case RISCV::VLSSEG4E16_V:
-  case RISCV::VLSSEG5E16_V:
-  case RISCV::VLSSEG6E16_V:
-  case RISCV::VLSSEG7E16_V:
-  case RISCV::VLSSEG8E16_V:
-  case RISCV::VSSSEG2E16_V:
-  case RISCV::VSSSEG3E16_V:
-  case RISCV::VSSSEG4E16_V:
-  case RISCV::VSSSEG5E16_V:
-  case RISCV::VSSSEG6E16_V:
-  case RISCV::VSSSEG7E16_V:
-  case RISCV::VSSSEG8E16_V:
-    return OperandInfo(MIVLMul, 4);
-  case RISCV::VLSSEG2E32_V:
-  case RISCV::VLSSEG3E32_V:
-  case RISCV::VLSSEG4E32_V:
-  case RISCV::VLSSEG5E32_V:
-  case RISCV::VLSSEG6E32_V:
-  case RISCV::VLSSEG7E32_V:
-  case RISCV::VLSSEG8E32_V:
-  case RISCV::VSSSEG2E32_V:
-  case RISCV::VSSSEG3E32_V:
-  case RISCV::VSSSEG4E32_V:
-  case RISCV::VSSSEG5E32_V:
-  case RISCV::VSSSEG6E32_V:
-  case RISCV::VSSSEG7E32_V:
-  case RISCV::VSSSEG8E32_V:
-    return OperandInfo(MIVLMul, 5);
-  case RISCV::VLSSEG2E64_V:
-  case RISCV::VLSSEG3E64_V:
-  case RISCV::VLSSEG4E64_V:
-  case RISCV::VLSSEG5E64_V:
-  case RISCV::VLSSEG6E64_V:
-  case RISCV::VLSSEG7E64_V:
-  case RISCV::VLSSEG8E64_V:
-  case RISCV::VSSSEG2E64_V:
-  case RISCV::VSSSEG3E64_V:
-  case RISCV::VSSSEG4E64_V:
-  case RISCV::VSSSEG5E64_V:
-  case RISCV::VSSSEG6E64_V:
-  case RISCV::VSSSEG7E64_V:
-  case RISCV::VSSSEG8E64_V:
-    return OperandInfo(MIVLMul, 6);
-
   // 7.9. Vector Load/Store Whole Register Instructions
   // EMUL=nr. EEW=eew. Since in-register byte layouts are idential to in-memory
   // byte layouts, the same data is writen to destination register regardless
@@ -445,52 +290,6 @@ static OperandInfo getOperandInfo(const MachineInstr &MI,
   case RISCV::VSUB_VX:
   case RISCV::VRSUB_VI:
   case RISCV::VRSUB_VX:
-    return OperandInfo(MIVLMul, MILog2SEW);
-
-  // 11.2. Vector Widening Integer Add/Subtract
-  // Def uses EEW=2*SEW and EMUL=2*LMUL. Operands use EEW=SEW and EMUL=LMUL.
-  case RISCV::VWADDU_VV:
-  case RISCV::VWADDU_VX:
-  case RISCV::VWSUBU_VV:
-  case RISCV::VWSUBU_VX:
-  case RISCV::VWADD_VV:
-  case RISCV::VWADD_VX:
-  case RISCV::VWSUB_VV:
-  case RISCV::VWSUB_VX:
-  case RISCV::VWSLL_VI: {
-    unsigned Log2EEW = IsMODef ? MILog2SEW + 1 : MILog2SEW;
-    RISCVII::VLMUL EMUL =
-        IsMODef ? RISCVVType::twoTimesVLMUL(MIVLMul) : MIVLMul;
-    return OperandInfo(EMUL, Log2EEW);
-  }
-  // Def and Op1 uses EEW=2*SEW and EMUL=2*LMUL. Op2 uses EEW=SEW and EMUL=LMUL
-  case RISCV::VWADDU_WV:
-  case RISCV::VWADDU_WX:
-  case RISCV::VWSUBU_WV:
-  case RISCV::VWSUBU_WX:
-  case RISCV::VWADD_WV:
-  case RISCV::VWADD_WX:
-  case RISCV::VWSUB_WV:
-  case RISCV::VWSUB_WX: {
-    bool HasPassthru = RISCVII::isFirstDefTiedToFirstUse(MI.getDesc());
-    bool IsOp1 = HasPassthru ? MO.getOperandNo() == 1 : MO.getOperandNo() == 2;
-    bool TwoTimes = IsMODef || IsOp1;
-    unsigned Log2EEW = TwoTimes ? MILog2SEW + 1 : MILog2SEW;
-    RISCVII::VLMUL EMUL =
-        TwoTimes ? RISCVVType::twoTimesVLMUL(MIVLMul) : MIVLMul;
-    return OperandInfo(EMUL, Log2EEW);
-  }
-  // 11.3. Vector Integer Extension
-  case RISCV::VZEXT_VF2:
-  case RISCV::VSEXT_VF2:
-    return getIntegerExtensionOperandInfo(2, MI, MO);
-  case RISCV::VZEXT_VF4:
-  case RISCV::VSEXT_VF4:
-    return getIntegerExtensionOperandInfo(4, MI, MO);
-  case RISCV::VZEXT_VF8:
-  case RISCV::VSEXT_VF8:
-    return getIntegerExtensionOperandInfo(8, MI, MO);
-
   // 11.5. Vector Bitwise Logical Instructions
   // 11.6. Vector Single-Width Shift Instructions
   // EEW=SEW. EMUL=LMUL.
@@ -512,26 +311,6 @@ static OperandInfo getOperandInfo(const MachineInstr &MI,
   case RISCV::VSRA_VI:
   case RISCV::VSRA_VV:
   case RISCV::VSRA_VX:
-    return OperandInfo(MIVLMul, MILog2SEW);
-
-  // 11.7. Vector Narrowing Integer Right Shift Instructions
-  // Destination EEW=SEW and EMUL=LMUL, Op 1 has EEW=2*SEW EMUL=2*LMUL. Op2 has
-  // EEW=SEW EMUL=LMUL.
-  case RISCV::VNSRL_WX:
-  case RISCV::VNSRL_WI:
-  case RISCV::VNSRL_WV:
-  case RISCV::VNSRA_WI:
-  case RISCV::VNSRA_WV:
-  case RISCV::VNSRA_WX: {
-    bool HasPassthru = RISCVII::isFirstDefTiedToFirstUse(MI.getDesc());
-    bool IsOp1 = HasPassthru ? MO.getOperandNo() == 1 : MO.getOperandNo() == 2;
-    bool TwoTimes = IsOp1;
-    unsigned Log2EEW = TwoTimes ? MILog2SEW + 1 : MILog2SEW;
-    RISCVII::VLMUL EMUL =
-        TwoTimes ? RISCVVType::twoTimesVLMUL(MIVLMul) : MIVLMul;
-    return OperandInfo(EMUL, Log2EEW);
-  }
-
   // 11.9. Vector Integer Min/Max Instructions
   // EEW=SEW. EMUL=LMUL.
   case RISCV::VMINU_VV:
@@ -542,8 +321,6 @@ static OperandInfo getOperandInfo(const MachineInstr &MI,
   case RISCV::VMAXU_VX:
   case RISCV::VMAX_VV:
   case RISCV::VMAX_VX:
-    return OperandInfo(MIVLMul, MILog2SEW);
-
   // 11.10. Vector Single-Width Integer Multiply Instructions
   // Source and Dest EEW=SEW and EMUL=LMUL.
   case RISCV::VMUL_VV:
@@ -553,9 +330,7 @@ static OperandInfo getOperandInfo(const MachineInstr &MI,
   case RISCV::VMULHU_VV:
   case RISCV::VMULHU_VX:
   case RISCV::VMULHSU_VV:
-  case RISCV::VMULHSU_VX: {
-    return OperandInfo(MIVLMul, MILog2SEW);
-  }
+  case RISCV::VMULHSU_VX:
   // 11.11. Vector Integer Divide Instructions
   // EEW=SEW. EMUL=LMUL.
   case RISCV::VDIVU_VV:
@@ -566,21 +341,6 @@ static OperandInfo getOperandInfo(const MachineInstr &MI,
   case RISCV::VREMU_VX:
   case RISCV::VREM_VV:
   case RISCV::VREM_VX:
-    return OperandInfo(MIVLMul, MILog2SEW);
-
-  // 11.12. Vector Widening Integer Multiply Instructions
-  // Source and Destination EMUL=LMUL. Destination EEW=2*SEW. Source EEW=SEW.
-  case RISCV::VWMUL_VV:
-  case RISCV::VWMUL_VX:
-  case RISCV::VWMULSU_VV:
-  case RISCV::VWMULSU_VX:
-  case RISCV::VWMULU_VV:
-  case RISCV::VWMULU_VX: {
-    unsigned Log2EEW = IsMODef ? MILog2SEW + 1 : MILog2SEW;
-    RISCVII::VLMUL EMUL =
-        IsMODef ? RISCVVType::twoTimesVLMUL(MIVLMul) : MIVLMul;
-    return OperandInfo(EMUL, Log2EEW);
-  }
   // 11.13. Vector Single-Width Integer Multiply-Add Instructions
   // EEW=SEW. EMUL=LMUL.
   case RISCV::VMACC_VV:
@@ -591,39 +351,6 @@ static OperandInfo getOperandInfo(const MachineInstr &MI,
   case RISCV::VMADD_VX:
   case RISCV::VNMSUB_VV:
   case RISCV::VNMSUB_VX:
-    return OperandInfo(MIVLMul, MILog2SEW);
-
-  // 11.14. Vector Widening Integer Multiply-Add Instructions
-  // Destination EEW=2*SEW and EMUL=2*LMUL. Source EEW=SEW and EMUL=LMUL.
-  // Even though the add is a 2*SEW addition, the operands of the add are the
-  // Dest which is 2*SEW and the result of the multiply which is 2*SEW.
-  case RISCV::VWMACCU_VV:
-  case RISCV::VWMACCU_VX:
-  case RISCV::VWMACC_VV:
-  case RISCV::VWMACC_VX:
-  case RISCV::VWMACCSU_VV:
-  case RISCV::VWMACCSU_VX:
-  case RISCV::VWMACCUS_VX: {
-    // Operand 0 is destination as a def and Operand 1 is destination as a use
-    // due to SSA.
-    bool HasPassthru = RISCVII::isFirstDefTiedToFirstUse(MI.getDesc());
-    bool IsOp1 = HasPassthru ? MO.getOperandNo() == 1 : MO.getOperandNo() == 2;
-    bool TwoTimes = IsMODef || IsOp1;
-    unsigned Log2EEW = TwoTimes ? MILog2SEW + 1 : MILog2SEW;
-    RISCVII::VLMUL EMUL =
-        TwoTimes ? RISCVVType::twoTimesVLMUL(MIVLMul) : MIVLMul;
-    return OperandInfo(EMUL, Log2EEW);
-  }
-  // 11.15. Vector Integer Merge Instructions
-  // EEW=SEW and EMUL=LMUL, except the mask operand has EEW=1 and EMUL=
-  // (EEW/SEW)*LMUL.
-  case RISCV::VMERGE_VIM:
-  case RISCV::VMERGE_VVM:
-  case RISCV::VMERGE_VXM:
-    if (MO.getOperandNo() == 3)
-      return OperandInfo(RISCVVType::getEMULEqualsEEWDivSEWTimesLMUL(0, MI), 0);
-    return OperandInfo(MIVLMul, MILog2SEW);
-
   // 11.16. Vector Integer Move Instructions
   // 12. Vector Fixed-Point Arithmetic Instructions
   // 12.1. Vector Single-Width Saturating Add and Subtract
@@ -650,17 +377,6 @@ static OperandInfo getOperandInfo(const MachineInstr &MI,
   case RISCV::VASUBU_VX:
   case RISCV::VASUB_VV:
   case RISCV::VASUB_VX:
-    return OperandInfo(MIVLMul, MILog2SEW);
-
-  // 12.3. Vector Single-Width Fractional Multiply with Rounding and Saturation
-  // Destination EEW=2*SEW and EMUL=2*EMUL. Source EEW=SEW and EMUL=LMUL.
-  case RISCV::VSMUL_VV:
-  case RISCV::VSMUL_VX: {
-    unsigned Log2EEW = IsMODef ? MILog2SEW + 1 : MILog2SEW;
-    RISCVII::VLMUL EMUL =
-        IsMODef ? RISCVVType::twoTimesVLMUL(MIVLMul) : MIVLMul;
-    return OperandInfo(EMUL, Log2EEW);
-  }
   // 12.4. Vector Single-Width Scaling Shift Instructions
   // EEW=SEW. EMUL=LMUL.
   case RISCV::VSSRL_VI:
@@ -669,25 +385,6 @@ static OperandInfo getOperandInfo(const MachineInstr &MI,
   case RISCV::VSSRA_VI:
   case RISCV::VSSRA_VV:
   case RISCV::VSSRA_VX:
-    return OperandInfo(MIVLMul, MILog2SEW);
-
-  // 12.5. Vector Narrowing Fixed-Point Clip Instructions
-  // Destination and Op1 EEW=SEW and EMUL=LMUL. Op2 EEW=2*SEW and EMUL=2*LMUL
-  case RISCV::VNCLIPU_WI:
-  case RISCV::VNCLIPU_WV:
-  case RISCV::VNCLIPU_WX:
-  case RISCV::VNCLIP_WI:
-  case RISCV::VNCLIP_WV:
-  case RISCV::VNCLIP_WX: {
-    bool HasPassthru = RISCVII::isFirstDefTiedToFirstUse(MI.getDesc());
-    bool IsOp1 = HasPassthru ? MO.getOperandNo() == 1 : MO.getOperandNo() == 2;
-    bool TwoTimes = !IsMODef && IsOp1;
-    unsigned Log2EEW = TwoTimes ? MILog2SEW + 1 : MILog2SEW;
-    RISCVII::VLMUL EMUL =
-        TwoTimes ? RISCVVType::twoTimesVLMUL(MIVLMul) : MIVLMul;
-    return OperandInfo(EMUL, Log2EEW);
-  }
-
   // 16. Vector Permutation Instructions
   // 16.1. Integer Scalar Move Instructions
   // 16.2. Floating-Point Scalar Move Instructions
@@ -696,8 +393,6 @@ static OperandInfo getOperandInfo(const MachineInstr &MI,
   case RISCV::VMV_S_X:
   case RISCV::VFMV_F_S:
   case RISCV::VFMV_S_F:
-    return OperandInfo(MIVLMul, MILog2SEW);
-
   // 16.3. Vector Slide Instructions
   // EMUL=LMUL. EEW=SEW.
   case RISCV::VSLIDEUP_VI:
@@ -708,19 +403,119 @@ static OperandInfo getOperandInfo(const MachineInstr &MI,
   case RISCV::VFSLIDE1UP_VF:
   case RISCV::VSLIDE1DOWN_VX:
   case RISCV::VFSLIDE1DOWN_VF:
-    return OperandInfo(MIVLMul, MILog2SEW);
-
   // 16.4. Vector Register Gather Instructions
   // EMUL=LMUL. EEW=SEW. For mask operand, EMUL=1 and EEW=1.
   case RISCV::VRGATHER_VI:
   case RISCV::VRGATHER_VV:
   case RISCV::VRGATHER_VX:
-    return OperandInfo(MIVLMul, MILog2SEW);
   // 16.5. Vector Compress Instruction
   // EMUL=LMUL. EEW=SEW.
   case RISCV::VCOMPRESS_VM:
     return OperandInfo(MIVLMul, MILog2SEW);
 
+  // 11.2. Vector Widening Integer Add/Subtract
+  // Def uses EEW=2*SEW and EMUL=2*LMUL. Operands use EEW=SEW and EMUL=LMUL.
+  case RISCV::VWADDU_VV:
+  case RISCV::VWADDU_VX:
+  case RISCV::VWSUBU_VV:
+  case RISCV::VWSUBU_VX:
+  case RISCV::VWADD_VV:
+  case RISCV::VWADD_VX:
+  case RISCV::VWSUB_VV:
+  case RISCV::VWSUB_VX:
+  case RISCV::VWSLL_VI:
+  // 12.3. Vector Single-Width Fractional Multiply with Rounding and Saturation
+  // Destination EEW=2*SEW and EMUL=2*EMUL. Source EEW=SEW and EMUL=LMUL.
+  case RISCV::VSMUL_VV:
+  case RISCV::VSMUL_VX:
+  // 11.12. Vector Widening Integer Multiply Instructions
+  // Source and Destination EMUL=LMUL. Destination EEW=2*SEW. Source EEW=SEW.
+  case RISCV::VWMUL_VV:
+  case RISCV::VWMUL_VX:
+  case RISCV::VWMULSU_VV:
+  case RISCV::VWMULSU_VX:
+  case RISCV::VWMULU_VV:
+  case RISCV::VWMULU_VX: {
+    unsigned Log2EEW = IsMODef ? MILog2SEW + 1 : MILog2SEW;
+    RISCVII::VLMUL EMUL =
+        IsMODef ? RISCVVType::twoTimesVLMUL(MIVLMul) : MIVLMul;
+    return OperandInfo(EMUL, Log2EEW);
+  }
+
+  // Def and Op1 uses EEW=2*SEW and EMUL=2*LMUL. Op2 uses EEW=SEW and EMUL=LMUL
+  case RISCV::VWADDU_WV:
+  case RISCV::VWADDU_WX:
+  case RISCV::VWSUBU_WV:
+  case RISCV::VWSUBU_WX:
+  case RISCV::VWADD_WV:
+  case RISCV::VWADD_WX:
+  case RISCV::VWSUB_WV:
+  case RISCV::VWSUB_WX:
+  // 11.14. Vector Widening Integer Multiply-Add Instructions
+  // Destination EEW=2*SEW and EMUL=2*LMUL. Source EEW=SEW and EMUL=LMUL.
+  // Even though the add is a 2*SEW addition, the operands of the add are the
+  // Dest which is 2*SEW and the result of the multiply which is 2*SEW.
+  case RISCV::VWMACCU_VV:
+  case RISCV::VWMACCU_VX:
+  case RISCV::VWMACC_VV:
+  case RISCV::VWMACC_VX:
+  case RISCV::VWMACCSU_VV:
+  case RISCV::VWMACCSU_VX:
+  case RISCV::VWMACCUS_VX: {
+    bool IsOp1 = HasPassthru ? MO.getOperandNo() == 1 : MO.getOperandNo() == 2;
+    bool TwoTimes = IsMODef || IsOp1;
+    unsigned Log2EEW = TwoTimes ? MILog2SEW + 1 : MILog2SEW;
+    RISCVII::VLMUL EMUL =
+        TwoTimes ? RISCVVType::twoTimesVLMUL(MIVLMul) : MIVLMul;
+    return OperandInfo(EMUL, Log2EEW);
+  }
+
+  // 11.3. Vector Integer Extension
+  case RISCV::VZEXT_VF2:
+  case RISCV::VSEXT_VF2:
+    return getIntegerExtensionOperandInfo(2, MI, MO);
+  case RISCV::VZEXT_VF4:
+  case RISCV::VSEXT_VF4:
+    return getIntegerExtensionOperandInfo(4, MI, MO);
+  case RISCV::VZEXT_VF8:
+  case RISCV::VSEXT_VF8:
+    return getIntegerExtensionOperandInfo(8, MI, MO);
+
+  // 11.7. Vector Narrowing Integer Right Shift Instructions
+  // Destination EEW=SEW and EMUL=LMUL, Op 1 has EEW=2*SEW EMUL=2*LMUL. Op2 has
+  // EEW=SEW EMUL=LMUL.
+  case RISCV::VNSRL_WX:
+  case RISCV::VNSRL_WI:
+  case RISCV::VNSRL_WV:
+  case RISCV::VNSRA_WI:
+  case RISCV::VNSRA_WV:
+  case RISCV::VNSRA_WX:
+  // 12.5. Vector Narrowing Fixed-Point Clip Instructions
+  // Destination and Op1 EEW=SEW and EMUL=LMUL. Op2 EEW=2*SEW and EMUL=2*LMUL
+  case RISCV::VNCLIPU_WI:
+  case RISCV::VNCLIPU_WV:
+  case RISCV::VNCLIPU_WX:
+  case RISCV::VNCLIP_WI:
+  case RISCV::VNCLIP_WV:
+  case RISCV::VNCLIP_WX: {
+    bool IsOp1 = HasPassthru ? MO.getOperandNo() == 1 : MO.getOperandNo() == 2;
+    bool TwoTimes = IsOp1;
+    unsigned Log2EEW = TwoTimes ? MILog2SEW + 1 : MILog2SEW;
+    RISCVII::VLMUL EMUL =
+        TwoTimes ? RISCVVType::twoTimesVLMUL(MIVLMul) : MIVLMul;
+    return OperandInfo(EMUL, Log2EEW);
+  }
+
+  // 11.15. Vector Integer Merge Instructions
+  // EEW=SEW and EMUL=LMUL, except the mask operand has EEW=1 and EMUL=
+  // (EEW/SEW)*LMUL.
+  case RISCV::VMERGE_VIM:
+  case RISCV::VMERGE_VVM:
+  case RISCV::VMERGE_VXM:
+    if (MO.getOperandNo() == 3)
+      return OperandInfo(RISCVVType::getEMULEqualsEEWDivSEWTimesLMUL(0, MI), 0);
+    return OperandInfo(MIVLMul, MILog2SEW);
+
   default:
     return {};
   }
diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.ll
index 50a59073213b99..a7abd90ea73913 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.ll
@@ -37,20 +37,13 @@ declare <vscale x 2 x i16> @llvm.riscv.vnsrl.nxv2i16.nxv2i32.nxv2i16(
   iXLen);
 
 define <vscale x 2 x i16> @intrinsic_vnsrl_wv_nxv2i16_nxv2i32_nxv2i16(<vscale x 2 x i16> %a, <vscale x 2 x i16> %b, iXLen %2, <vscale x 2 x i32> %3, <vscale x 2 x i32> %4, <vscale x 2 x i16> %z) nounwind {
-; NOVLOPT-LABEL: intrinsic_vnsrl_wv_nxv2i16_nxv2i32_nxv2i16:
-; NOVLOPT:       # %bb.0: # %entry
-; NOVLOPT-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
-; NOVLOPT-NEXT:    vwadd.vv v10, v8, v9
-; NOVLOPT-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; NOVLOPT-NEXT:    vnsrl.wv v8, v10, v12
-; NOVLOPT-NEXT:    ret
-;
-; VLOPT-LABEL: intrinsic_vnsrl_wv_nxv2i16_nxv2i32_nxv2i16:
-; VLOPT:       # %bb.0: # %entry
-; VLOPT-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; VLOPT-NEXT:    vwadd.vv v10, v8, v9
-; VLOPT-NEXT:    vnsrl.wv v8, v10, v12
-; VLOPT-NEXT:    ret
+; CHECK-LABEL: intrinsic_vnsrl_wv_nxv2i16_nxv2i32_nxv2i16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-NEXT:    vwadd.vv v10, v8, v9
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT:    vnsrl.wv v8, v10, v12
+; CHECK-NEXT:    ret
 entry:
   %c = sext <vscale x 2 x i16> %a to <vscale x 2 x i32>
   %d = sext <vscale x 2 x i16> %b to <vscale x 2 x i32>
@@ -71,22 +64,14 @@ declare <vscale x 2 x i16> @llvm.riscv.vnclip.nxv2i16.nxv2i32.nxv2i16(
   iXLen, iXLen);
 
 define <vscale x 2 x i16> @vnclip(<vscale x 2 x i16> %a, <vscale x 2 x i16> %b, iXLen %2, <vscale x 2 x i32> %3, <vscale x 2 x i32> %4, <vscale x 2 x i16> %z) nounwind {
-; NOVLOPT-LABEL: vnclip:
-; NOVLOPT:       # %bb.0: # %entry
-; NOVLOPT-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
-; NOVLOPT-NEXT:    vwadd.vv v10, v8, v9
-; NOVLOPT-NEXT:    csrwi vxrm, 0
-; NOVLOPT-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; NOVLOPT-NEXT:    vnclip.wv v8, v10, v12
-; NOVLOPT-NEXT:    ret
-;
-; VLOPT-LABEL: vnclip:
-; VLOPT:       # %bb.0: # %entry
-; VLOPT-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; VLOPT-NEXT:    vwadd.vv v10, v8, v9
-; VLOPT-NEXT:    csrwi vxrm, 0
-; VLOPT-NEXT:    vnclip.wv v8, v10, v12
-; VLOPT-NEXT:    ret
+; CHECK-LABEL: vnclip:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-NEXT:    vwadd.vv v10, v8, v9
+; CHECK-NEXT:    csrwi vxrm, 0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT:    vnclip.wv v8, v10, v12
+; CHECK-NEXT:    ret
 entry:
   %c = sext <vscale x 2 x i16> %a to <vscale x 2 x i32>
   %d = sext <vscale x 2 x i16> %b to <vscale x 2 x i32>
@@ -99,3 +84,6 @@ entry:
 
   ret <vscale x 2 x i16> %x
 }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; NOVLOPT: {{.*}}
+; VLOPT: {{.*}}

>From 1294ffdb364e41eda79a752b1948299ab4286f9a Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Tue, 1 Oct 2024 08:42:31 -0700
Subject: [PATCH 16/17] fixup! simplify getOperandInfo some more

---
 llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp   | 55 +++--------------
 llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll | 65 ++++++++++++++++++++
 2 files changed, 74 insertions(+), 46 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
index 16d5d16b5fbd2a..dbe8bde3b45b52 100644
--- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
@@ -247,40 +247,6 @@ static OperandInfo getOperandInfo(const MachineInstr &MI,
     llvm_unreachable("Configuration setting instructions do not read or write "
                      "vector registers");
 
-  // 7.9. Vector Load/Store Whole Register Instructions
-  // EMUL=nr. EEW=eew. Since in-register byte layouts are idential to in-memory
-  // byte layouts, the same data is writen to destination register regardless
-  // of EEW. eew is just a hint to the hardware and has not functional impact.
-  // Therefore, it is be okay if we ignore eew and always use the same EEW to
-  // create more optimization opportunities.
-  // FIXME: Instead of using any SEW, we really ought to return the SEW in the
-  // instruction and add a field to OperandInfo that says the SEW is just a hint
-  // so that this optimization can use any sew to construct a ratio.
-  case RISCV::VL1RE8_V:
-  case RISCV::VL1RE16_V:
-  case RISCV::VL1RE32_V:
-  case RISCV::VL1RE64_V:
-  case RISCV::VS1R_V:
-    return OperandInfo(RISCVII::VLMUL::LMUL_1, 0);
-  case RISCV::VL2RE8_V:
-  case RISCV::VL2RE16_V:
-  case RISCV::VL2RE32_V:
-  case RISCV::VL2RE64_V:
-  case RISCV::VS2R_V:
-    return OperandInfo(RISCVII::VLMUL::LMUL_2, 0);
-  case RISCV::VL4RE8_V:
-  case RISCV::VL4RE16_V:
-  case RISCV::VL4RE32_V:
-  case RISCV::VL4RE64_V:
-  case RISCV::VS4R_V:
-    return OperandInfo(RISCVII::VLMUL::LMUL_4, 0);
-  case RISCV::VL8RE8_V:
-  case RISCV::VL8RE16_V:
-  case RISCV::VL8RE32_V:
-  case RISCV::VL8RE64_V:
-  case RISCV::VS8R_V:
-    return OperandInfo(RISCVII::VLMUL::LMUL_8, 0);
-
   // 11. Vector Integer Arithmetic Instructions
   // 11.1. Vector Single-Width Integer Add and Subtract
   case RISCV::VADD_VI:
@@ -351,6 +317,12 @@ static OperandInfo getOperandInfo(const MachineInstr &MI,
   case RISCV::VMADD_VX:
   case RISCV::VNMSUB_VV:
   case RISCV::VNMSUB_VX:
+  // 11.15. Vector Integer Merge Instructions
+  // EEW=SEW and EMUL=LMUL, except the mask operand has EEW=1 and EMUL=
+  // (EEW/SEW)*LMUL. Mask operand is handled before this switch.
+  case RISCV::VMERGE_VIM:
+  case RISCV::VMERGE_VVM:
+  case RISCV::VMERGE_VXM:
   // 11.16. Vector Integer Move Instructions
   // 12. Vector Fixed-Point Arithmetic Instructions
   // 12.1. Vector Single-Width Saturating Add and Subtract
@@ -506,16 +478,6 @@ static OperandInfo getOperandInfo(const MachineInstr &MI,
     return OperandInfo(EMUL, Log2EEW);
   }
 
-  // 11.15. Vector Integer Merge Instructions
-  // EEW=SEW and EMUL=LMUL, except the mask operand has EEW=1 and EMUL=
-  // (EEW/SEW)*LMUL.
-  case RISCV::VMERGE_VIM:
-  case RISCV::VMERGE_VVM:
-  case RISCV::VMERGE_VXM:
-    if (MO.getOperandNo() == 3)
-      return OperandInfo(RISCVVType::getEMULEqualsEEWDivSEWTimesLMUL(0, MI), 0);
-    return OperandInfo(MIVLMul, MILog2SEW);
-
   default:
     return {};
   }
@@ -700,11 +662,11 @@ bool RISCVVLOptimizer::isCandidate(const MachineInstr &MI) const {
   // TODO: Use a better approach than a white-list, such as adding
   // properties to instructions using something like TSFlags.
   if (!isSupportedInstr(MI)) {
-    LLVM_DEBUG(dbgs() << "  Not a candidate due to unsupported instruction\n");
+    LLVM_DEBUG(dbgs() << "Not a candidate due to unsupported instruction\n");
     return false;
   }
 
-  LLVM_DEBUG(dbgs() << "  Found a candidate for VL reduction: " << MI << "\n");
+  LLVM_DEBUG(dbgs() << "Found a candidate for VL reduction: " << MI << "\n");
   return true;
 }
 
@@ -819,6 +781,7 @@ bool RISCVVLOptimizer::tryReduceVL(MachineInstr &OrigMI) {
       continue;
 
     // All our checks passed. We can reduce VL.
+    LLVM_DEBUG(dbgs() << "    Reducing VL for: " << MI << "\n");
     unsigned VLOpNum = RISCVII::getVLOpNum(MI.getDesc());
     MachineOperand &VLOp = MI.getOperand(VLOpNum);
     VLOp.ChangeToRegister(*CommonVL, false);
diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
index 8bf4772234a2cf..107252338829bd 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
@@ -906,3 +906,68 @@ define <vscale x 4 x i32> @vwsll_vi(<vscale x 4 x i16> %a, <vscale x 4 x i32> %b
   %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %b, iXLen %vl)
   ret <vscale x 4 x i32> %2
 }
+
+; Test getOperandInfo
+
+define <vscale x 1 x i8> @vmerge_vim(<vscale x 1 x i8> %a, i8 %b, <vscale x 1 x i1> %m, iXLen %vl) {
+; NOVLOPT-LABEL: vmerge_vim:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a2, zero, e8, mf8, tu, ma
+; NOVLOPT-NEXT:    vmv.v.x v8, a0
+; NOVLOPT-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
+; NOVLOPT-NEXT:    vmerge.vim v8, v8, 2, v0
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vmerge_vim:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a1, e8, mf8, tu, ma
+; VLOPT-NEXT:    vmv.v.x v8, a0
+; VLOPT-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
+; VLOPT-NEXT:    vmerge.vim v8, v8, 2, v0
+; VLOPT-NEXT:    ret
+  %2 = call <vscale x 1 x i8> @llvm.riscv.vmv.v.x.nxv1i8(<vscale x 1 x i8> %a, i8 %b, iXLen -1)
+  %3 = call <vscale x 1 x i8> @llvm.riscv.vmerge.nxv1i8.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> %2, i8 2, <vscale x 1 x i1> %m, iXLen %vl)
+  ret <vscale x 1 x i8> %3
+}
+
+define <vscale x 1 x i8> @vmerge_vxm(<vscale x 1 x i8> %a, i8 %b, <vscale x 1 x i1> %m, iXLen %vl) {
+; NOVLOPT-LABEL: vmerge_vxm:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a2, zero, e8, mf8, tu, ma
+; NOVLOPT-NEXT:    vmv.v.x v8, a0
+; NOVLOPT-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
+; NOVLOPT-NEXT:    vmerge.vxm v8, v8, a0, v0
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vmerge_vxm:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a1, e8, mf8, tu, ma
+; VLOPT-NEXT:    vmv.v.x v8, a0
+; VLOPT-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
+; VLOPT-NEXT:    vmerge.vxm v8, v8, a0, v0
+; VLOPT-NEXT:    ret
+  %2 = call <vscale x 1 x i8> @llvm.riscv.vmv.v.x.nxv1i8(<vscale x 1 x i8> %a, i8 %b, iXLen -1)
+  %3 = call <vscale x 1 x i8> @llvm.riscv.vmerge.nxv1i8.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> %2, i8 %b, <vscale x 1 x i1> %m, iXLen %vl)
+  ret <vscale x 1 x i8> %3
+}
+
+define <vscale x 1 x i8> @vmerge_vvm(<vscale x 1 x i8> %a, i8 %b, <vscale x 1 x i8> %c, <vscale x 1 x i1> %m, iXLen %vl) {
+; NOVLOPT-LABEL: vmerge_vvm:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a2, zero, e8, mf8, tu, ma
+; NOVLOPT-NEXT:    vmv.v.x v8, a0
+; NOVLOPT-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
+; NOVLOPT-NEXT:    vmerge.vvm v8, v8, v9, v0
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vmerge_vvm:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a1, e8, mf8, tu, ma
+; VLOPT-NEXT:    vmv.v.x v8, a0
+; VLOPT-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
+; VLOPT-NEXT:    vmerge.vvm v8, v8, v9, v0
+; VLOPT-NEXT:    ret
+  %2 = call <vscale x 1 x i8> @llvm.riscv.vmv.v.x.nxv1i8(<vscale x 1 x i8> %a, i8 %b, iXLen -1)
+  %3 = call <vscale x 1 x i8> @llvm.riscv.vmerge.nxv1i8.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> %2, <vscale x 1 x i8> %c, <vscale x 1 x i1> %m, iXLen %vl)
+  ret <vscale x 1 x i8> %3
+}

>From ed785c547cf1ee923a520b2e7c127f15aff46996 Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Fri, 11 Oct 2024 06:44:00 -0700
Subject: [PATCH 17/17] fixup! remove Vector Single-Width Fractional Multiply
 with Rounding and Saturation from getOperandInfo

---
 llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
index dbe8bde3b45b52..90af9ef898d951 100644
--- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
@@ -396,10 +396,6 @@ static OperandInfo getOperandInfo(const MachineInstr &MI,
   case RISCV::VWSUB_VV:
   case RISCV::VWSUB_VX:
   case RISCV::VWSLL_VI:
-  // 12.3. Vector Single-Width Fractional Multiply with Rounding and Saturation
-  // Destination EEW=2*SEW and EMUL=2*EMUL. Source EEW=SEW and EMUL=LMUL.
-  case RISCV::VSMUL_VV:
-  case RISCV::VSMUL_VX:
   // 11.12. Vector Widening Integer Multiply Instructions
   // Source and Destination EMUL=LMUL. Destination EEW=2*SEW. Source EEW=SEW.
   case RISCV::VWMUL_VV:



More information about the llvm-commits mailing list